1
1

Update to the latest version provided by Guillaume.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2017-06-13 22:15:09 -04:00
родитель fc21ffadc9
Коммит 2c00c4209a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1
39 изменённых файлов: 5196 добавлений и 2728 удалений

Просмотреть файл

@ -13,20 +13,25 @@
if topo_treematch_local
extra_treematch_files = treematch/tm_bucket.h \
treematch/tm_hwloc.h treematch/tm_mapping.h \
treematch/tm_mapping.h \
treematch/tm_timings.h treematch/tm_tree.h \
treematch/tm_kpartitioning.h treematch/uthash.h\
treematch/IntConstantInitializedVector.h \
treematch/tm_mt.h \
treematch/tm_mt.h treematch/fibo.h \
treematch/tm_thread_pool.h treematch/tm_verbose.h \
treematch/tm_malloc.h \
treematch/tm_malloc.h treematch/k-partitioning.h\
treematch/tm_solution.h treematch/tm_topology.h\
treematch/PriorityQueue.h \
treematch/IntConstantInitializedVector.c \
treematch/tm_mt.c \
treematch/tm_mt.c treematch/fibo.c \
treematch/tm_thread_pool.c treematch/tm_verbose.c \
treematch/tm_malloc.c \
treematch/tm_malloc.c treematch/treematch.h \
treematch/tm_mapping.c treematch/tm_timings.c \
treematch/tm_bucket.c treematch/tm_tree.c \
treematch/tm_hwloc.c treematch/tm_kpartitioning.c
treematch/tm_topology.c treematch/tm_kpartitioning.c \
treematch/tm_solution.c treematch/k-partitioning.c \
treematch/PriorityQueue.c
EXTRA_DIST = treematch/COPYING treematch/LICENSE
endif
sources = \

Просмотреть файл

@ -70,7 +70,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* module,
int n, const int nodes[],
const int degrees[], const int targets[],
const int weights[],
struct opal_info_t *info, int reorder,
struct ompi_info_t *info, int reorder,
ompi_communicator_t **newcomm);
/*
* ******************************************************************

Просмотреть файл

@ -62,6 +62,9 @@ mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component =
static int init_query(bool enable_progress_threads, bool enable_mpi_threads)
{
if(NULL == opal_hwloc_topology) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OMPI_SUCCESS;
}
@ -95,3 +98,4 @@ static int mca_topo_treematch_component_register(void)
MCA_BASE_VAR_SCOPE_READONLY, &mca_topo_treematch_component.reorder_mode);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -3,8 +3,8 @@
* Copyright (c) 2011-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2015 INRIA. All rights reserved.
* Copyright (c) 2012-2015 Bordeaux Poytechnic Institute
* Copyright (c) 2011-2016 INRIA. All rights reserved.
* Copyright (c) 2012-2017 Bordeaux Poytechnic Institute
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -25,6 +25,7 @@
#include "opal/mca/hwloc/hwloc-internal.h"
#include "ompi/mca/topo/treematch/topo_treematch.h"
#include "ompi/mca/topo/treematch/treematch/treematch.h"
#include "ompi/mca/topo/treematch/treematch/tm_mapping.h"
#include "ompi/mca/topo/base/base.h"
@ -46,6 +47,7 @@
#define FALLBACK() \
do { free(nodes_roots); \
free(lindex_to_grank); \
if( NULL != set) hwloc_bitmap_free(set); \
goto fallback; } \
while(0);
@ -92,8 +94,8 @@ static void dump_int_array( char* prolog, char* line_prolog, int* array, size_t
size_t i;
fprintf(stdout,"%s : ", prolog);
for(i = 0; i < num_procs_in_node ; i++)
fprintf(stdout,"[$s%i:%i] ", line_prolog, i, array[i]);
for(i = 0; i < length ; i++)
fprintf(stdout,"%s [%lu:%i] ", line_prolog, i, array[i]);
fprintf(stdout,"\n");
}
static void dump_double_array( char* prolog, char* line_prolog, double* array, size_t length )
@ -101,8 +103,8 @@ static void dump_double_array( char* prolog, char* line_prolog, double* array, s
size_t i;
fprintf(stdout,"%s : ", prolog);
for(i = 0; i < num_procs_in_node ; i++)
fprintf(stdout,"%s [%i:%i] ", line_prolog, i, array[i]);
for(i = 0; i < length ; i++)
fprintf(stdout,"%s [%lu:%lf] ", line_prolog, i, array[i]);
fprintf(stdout,"\n");
}
#endif
@ -112,7 +114,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
int n, const int nodes[],
const int degrees[], const int targets[],
const int weights[],
struct opal_info_t *info, int reorder,
struct ompi_info_t *info, int reorder,
ompi_communicator_t **newcomm)
{
int err;
@ -155,6 +157,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
int num_nodes = 0;
int num_procs_in_node = 0;
int rank, size;
int *k = NULL;
int newrank = -1;
int hwloc_err;
int oversubscribing_objs = 0, oversubscribed_pus = 0;
int i, j, idx;
@ -250,6 +254,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
* all the calls that involve collective communications, so we have to lay the logic
* accordingly.
*/
if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */
#ifdef __DEBUG__
if (0 == rank)
@ -291,6 +296,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
num_objs_in_node,num_procs_in_node,
nodes_roots,lindex_to_grank,comm_old);
}
if (!oversubscribed_pus) {
/* Update the data used to compute the correct binding */
if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */
@ -306,17 +312,17 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
if( !oversubscribing_objs && !oversubscribed_pus ) {
if( hwloc_bitmap_isincluded(root_obj->cpuset,set) ) { /* processes are not bound on the machine */
obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
effective_depth = depth;
object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank);
if( NULL == object) FALLBACK();
obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
effective_depth = depth;
object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank);
if( NULL == object) FALLBACK();
hwloc_bitmap_copy(set,object->cpuset);
hwloc_bitmap_singlify(set); /* we don't want the process to move */
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
if( -1 == hwloc_err) FALLBACK();
hwloc_bitmap_copy(set,object->cpuset);
hwloc_bitmap_singlify(set); /* we don't want the process to move */
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
if( -1 == hwloc_err) FALLBACK();
#ifdef __DEBUG__
fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
#endif
} else {
#ifdef __DEBUG__
@ -385,7 +391,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
if (0 == mca_topo_treematch_component.reorder_mode) {
int *k = NULL;
int *obj_mapping = NULL;
int newrank = -1;
int num_objs_total = 0;
/* Gather comm pattern
@ -419,7 +424,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
tm_topology_t *tm_opt_topology = NULL;
int *obj_to_rank_in_comm = NULL;
int *hierarchies = NULL;
int hierarchy[MAX_LEVELS+1];
int hierarchy[TM_MAX_LEVELS+1];
int min;
/* create a table that derives the rank in comm_old from the object number */
@ -489,27 +494,27 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
free(obj_to_rank_in_comm);
hierarchy[0] = numlevels;
assert(numlevels < MAX_LEVELS);
assert(numlevels < TM_MAX_LEVELS);
for(i = 0 ; i < hierarchy[0]; i++)
hierarchy[i+1] = tracker[i]->arity;
for(; i < (MAX_LEVELS+1); i++) /* fill up everything else with -1 */
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with -1 */
hierarchy[i] = -1;
if( 0 == rank ) {
hierarchies = (int *)malloc(num_nodes*(MAX_LEVELS+1)*sizeof(int));
memcpy(hierarchies, hierarchy, (MAX_LEVELS+1)*sizeof(int));
hierarchies = (int *)malloc(num_nodes*(TM_MAX_LEVELS+1)*sizeof(int));
memcpy(hierarchies, hierarchy, (TM_MAX_LEVELS+1)*sizeof(int));
}
/* gather hierarchies iff more than 1 node! */
if ( num_nodes > 1 ) {
if( rank != 0 ) {
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(MAX_LEVELS+1), MPI_INT, 0,
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(TM_MAX_LEVELS+1), MPI_INT, 0,
111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
ERR_EXIT(err);
} else {
for(i = 1; i < num_nodes ; i++)
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(MAX_LEVELS+1), (MAX_LEVELS+1), MPI_INT,
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(TM_MAX_LEVELS+1), (TM_MAX_LEVELS+1), MPI_INT,
nodes_roots[i], 111, comm_old, &reqs[i-1])))){
free(hierarchies);
ERR_EXIT(err);
@ -524,23 +529,25 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
}
if ( 0 == rank ) {
tree_t *comm_tree = NULL;
tm_tree_t *comm_tree = NULL;
tm_solution_t *sol = NULL;
tm_affinity_mat_t *aff_mat = NULL;
double **comm_pattern = NULL;
int *matching = NULL;
#ifdef __DEBUG__
dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(MAX_LEVELS+1));
dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(TM_MAX_LEVELS+1));
#endif
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
tm_topology->nb_levels = hierarchies[0];
/* extract min depth */
for(i = 1 ; i < num_nodes ; i++)
if (hierarchies[i*(MAX_LEVELS+1)] < tm_topology->nb_levels)
tm_topology->nb_levels = hierarchies[i*(MAX_LEVELS+1)];
if (hierarchies[i*(TM_MAX_LEVELS+1)] < tm_topology->nb_levels)
tm_topology->nb_levels = hierarchies[i*(TM_MAX_LEVELS+1)];
/* Crush levels in hierarchies too long (ie > tm_topology->nb_levels)*/
for(i = 0; i < num_nodes ; i++) {
int *base_ptr = hierarchies + i*(MAX_LEVELS+1);
int *base_ptr = hierarchies + i*(TM_MAX_LEVELS+1);
int suppl = *base_ptr - tm_topology->nb_levels;
for(j = 1 ; j <= suppl ; j++)
*(base_ptr + tm_topology->nb_levels) *= *(base_ptr + tm_topology->nb_levels + j);
@ -553,8 +560,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
for(i = 1; i < tm_topology->nb_levels; i++) { /* compute the minimum for each level */
min = hierarchies[i];
for(j = 1; j < num_nodes ; j++)
if( hierarchies[j*(MAX_LEVELS+1) + i] < min)
min = hierarchies[j*(MAX_LEVELS+1) + i];
if( hierarchies[j*(TM_MAX_LEVELS+1) + i] < min)
min = hierarchies[j*(TM_MAX_LEVELS+1) + i];
tm_topology->arity[i] = min;
}
} else {
@ -568,24 +575,58 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
fprintf(stdout,"topo_arity[%i] = %i\n", i, tm_topology->arity[i]);
#endif
/* compute the number of processing elements */
tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels, sizeof(int));
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
tm_topology->nb_nodes[0] = 1;
for(i = 1 ; i < tm_topology->nb_levels; i++)
tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1];
/* Build process id tab */
tm_topology->node_id = (int **)calloc(tm_topology->nb_levels, sizeof(int*));
tm_topology->node_rank = (int **)malloc(sizeof(int *) * tm_topology->nb_levels);
for(i = 0; i < tm_topology->nb_levels; i++) {
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
for (j = 0; j < tm_topology->nb_nodes[i]; j++)
tm_topology->node_id[i][j] = obj_mapping[j];
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
/*note : we make the hypothesis that logical indexes in hwloc range from
0 to N, are contiguous and crescent. */
for( j = 0 ; j < tm_topology->nb_nodes[i] ; j++ ){
tm_topology->node_id[i][j] = j;
tm_topology->node_rank[i][j] = j;
/* Should use object->logical_index */
/* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
/*
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
topology->node_id[i][j] = id;
 topology->node_rank[i][id] = j;
*/
}
}
/* unused for now*/
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
tm_topology->nb_proc_units = num_objs_total;
tm_topology->nb_constraints = 0;
for(i = 0; i < tm_topology->nb_proc_units ; i++)
if (obj_mapping[i] != -1)
tm_topology->nb_constraints++;
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
for(idx = 0,i = 0; i < tm_topology->nb_proc_units ; i++)
if (obj_mapping[i] != -1)
tm_topology->constraints[idx++] = obj_mapping[i];
tm_topology->oversub_fact = 1;
#ifdef __DEBUG__
assert(num_objs_total == tm_topology->nb_nodes[tm_topology->nb_levels-1]);
for(i = 0; i < tm_topology->nb_levels ; i++) {
fprintf(stdout,"tm topo node_id for level [%i] : ",i);
dump_int_array("", "", obj_mapping, tm_topology->nb_nodes[i]);
}
display_topology(tm_topology);
tm_display_topology(tm_topology);
#endif
comm_pattern = (double **)malloc(size*sizeof(double *));
@ -600,32 +641,31 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
#ifdef __DEBUG__
fprintf(stdout,"==== COMM PATTERN ====\n");
for( i = 0 ; i < size ; i++) {
dump_double_array("", "", comm_pattern, size);
dump_double_array("", "", comm_pattern[i], size);
}
#endif
k = (int *)calloc(num_objs_total, sizeof(int));
matching = (int *)calloc(size, sizeof(int));
tm_optimize_topology(&tm_topology);
aff_mat = tm_build_affinity_mat(comm_pattern,size);
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
sol = tm_compute_mapping(tm_topology, comm_tree);
k = (int *)calloc(sol->k_length, sizeof(int));
for(idx = 0 ; idx < sol->k_length ; idx++)
k[idx] = sol->k[idx][0];
tm_opt_topology = optimize_topology(tm_topology);
comm_tree = build_tree_from_topology(tm_opt_topology, comm_pattern, size, NULL, NULL);
map_topology_simple(tm_opt_topology, comm_tree, matching, size, k);
#ifdef __DEBUG__
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
dump_int_array("Rank permutation sigma/k : ", "", k, num_objs_total);
dump_int_array("Matching : ", "", matching, size);
assert(size == sol->sigma_length);
dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length);
#endif
free(comm_pattern);
free(comm_tree);
free(matching);
free(obj_mapping);
for(i = 0 ; i < tm_topology->nb_levels ; i++)
free(tm_topology->node_id[i]);
free(tm_topology->node_id);
free(tm_topology->nb_nodes);
free(tm_topology->arity);
free(tm_topology);
FREE_topology(tm_opt_topology);
free(comm_pattern);
free(aff_mat->sum_row);
free(aff_mat);
tm_free_solution(sol);
tm_free_tree(comm_tree);
tm_free_topology(tm_topology);
}
}
@ -648,15 +688,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
(*newcomm)->c_topo = topo_module;
(*newcomm)->c_topo->reorder = reorder;
} else { /* partially distributed reordering */
ompi_communicator_t *localcomm = NULL;
int *matching = (int *)calloc(num_procs_in_node,sizeof(int));
int *lrank_to_grank = (int *)calloc(num_procs_in_node,sizeof(int));
int *grank_to_lrank = (int *)calloc(size,sizeof(int));
hwloc_obj_t object;
opal_hwloc_locality_t locality;
char set_as_string[64];
opal_value_t kv;
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, colors[rank], rank,
&localcomm, false)))
@ -696,8 +733,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
/* The root has now the entire information, so let's crunch it */
if (rank == lindex_to_grank[0]) {
tm_topology_t *tm_topology = NULL;
tm_topology_t *tm_opt_topology = NULL;
tree_t *comm_tree = NULL;
tm_tree_t *comm_tree = NULL;
tm_solution_t *sol = NULL;
tm_affinity_mat_t *aff_mat = NULL;
double **comm_pattern = NULL;
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
@ -717,7 +755,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
fprintf(stdout,"========== COMM PATTERN ============= \n");
for(i = 0 ; i < num_procs_in_node ; i++){
fprintf(stdout," %i : ",i);
dump_double_array("", "", comm_pattern, num_procs_in_node);
dump_double_array("", "", comm_pattern[i], num_procs_in_node);
}
fprintf(stdout,"======================= \n");
#endif
@ -725,92 +763,92 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
tm_topology->nb_levels = numlevels;
tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels, sizeof(int));
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
tm_topology->node_rank = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
for(i = 0 ; i < tm_topology->nb_levels ; i++){
int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth);
tm_topology->nb_nodes[i] = nb_objs;
tm_topology->arity[i] = tracker[i]->arity;
tm_topology->node_id[i] = (int*)malloc(sizeof(int)*nb_objs);
for(j = 0; j < num_procs_in_node; j++)
tm_topology->node_id[i][j] = localrank_to_objnum[j];
for(; j < nb_objs; tm_topology->node_id[i][j] = -1, j++); /* complete with empty */
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
for(j = 0; j < tm_topology->nb_nodes[i] ; j++){
tm_topology->node_id[i][j] = j;
tm_topology->node_rank[i][j] = j;
}
}
/* unused for now*/
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
tm_topology->nb_proc_units = num_objs_in_node;
//tm_topology->nb_proc_units = num_procs_in_node;
tm_topology->nb_constraints = 0;
for(i = 0; i < num_procs_in_node ; i++)
if (localrank_to_objnum[i] != -1)
tm_topology->nb_constraints++;
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
for(idx = 0,i = 0; i < num_procs_in_node ; i++)
if (localrank_to_objnum[i] != -1)
tm_topology->constraints[idx++] = localrank_to_objnum[i];
tm_topology->oversub_fact = 1;
#ifdef __DEBUG__
assert(num_objs_in_node == tm_topology->nb_nodes[tm_topology->nb_levels-1]);
fprintf(stdout,"Levels in topo : %i | num procs in node : %i\n",tm_topology->nb_levels,num_procs_in_node);
for(i = 0; i < tm_topology->nb_levels ; i++){
fprintf(stdout,"Nb objs for level %i : %i | arity %i\n ",i,tm_topology->nb_nodes[i],tm_topology->arity[i]);
dump_int_array("", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
}
display_topology(tm_topology);
tm_display_topology(tm_topology);
#endif
tm_optimize_topology(&tm_topology);
aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node);
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
sol = tm_compute_mapping(tm_topology, comm_tree);
tm_opt_topology = optimize_topology(tm_topology);
comm_tree = build_tree_from_topology(tm_opt_topology, comm_pattern, num_procs_in_node, NULL, NULL);
map_topology_simple(tm_opt_topology, comm_tree, matching, num_procs_in_node, NULL);
k = (int *)calloc(sol->k_length, sizeof(int));
for(idx = 0 ; idx < sol->k_length ; idx++)
k[idx] = sol->k[idx][0];
#ifdef __DEBUG__
dump_int_array("Matching:", "", matching, num_procs_in_node);
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
dump_int_array("Rank permutation sigma/k : ", "", k, num_procs_in_node);
assert(num_procs_in_node == sol->sigma_length);
dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length);
#endif
free(aff_mat->sum_row);
free(aff_mat);
free(comm_pattern);
for(i = 0; i < tm_topology->nb_levels; i++)
free(tm_topology->node_id[i]);
free(tm_topology->node_id);
free(tm_topology->nb_nodes);
free(tm_topology->arity);
free(tm_topology);
FREE_topology(tm_opt_topology);
tm_free_solution(sol);
tm_free_tree(comm_tree);
tm_free_topology(tm_topology);
}
/* Todo : Bcast + group creation */
/* scatter the ranks */
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_bcast(matching, num_procs_in_node,
MPI_INT,0,localcomm,
localcomm->c_coll->coll_bcast_module)))
MPI_INT,0,localcomm,
localcomm->c_coll->coll_bcast_module)))
ERR_EXIT(err);
object = hwloc_get_obj_by_depth(opal_hwloc_topology,
effective_depth, matching[ompi_process_info.my_local_rank]);
if( NULL == object) goto fallback;
hwloc_bitmap_copy(set, object->cpuset);
hwloc_bitmap_singlify(set);
err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
if( -1 == err) goto fallback;
if ( 0 == rank )
free(k);
/* Report new binding to ORTE/OPAL */
/* hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set); */
err = hwloc_bitmap_snprintf(set_as_string, 64, set);
#ifdef __DEBUG__
fprintf(stdout,"Bitmap str size : %i\n", err);
#endif
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_CPUSET);
kv.type = OPAL_STRING;
kv.data.string = strdup(set_as_string);
(void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv);
OBJ_DESTRUCT(&kv);
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
ompi_process_info.cpuset,set_as_string);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_LOCALITY);
kv.type = OPAL_UINT16;
kv.data.uint16 = locality;
(void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv);
OBJ_DESTRUCT(&kv);
if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old,
comm_old->c_local_group,
newcomm))) {
/* this needs to be optimized but will do for now */
if (OMPI_SUCCESS != (err = ompi_comm_split(localcomm, 0, newrank, newcomm, false)))
ERR_EXIT(err);
} else {
/* Attach the dist_graph to the newly created communicator */
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
(*newcomm)->c_topo = topo_module;
(*newcomm)->c_topo->reorder = reorder;
}
/* end of TODO */
/* Attach the dist_graph to the newly created communicator */
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
(*newcomm)->c_topo = topo_module;
(*newcomm)->c_topo->reorder = reorder;
free(matching);
free(grank_to_lrank);
free(lrank_to_grank);

Просмотреть файл

@ -2,13 +2,12 @@
#include <stdio.h>
#include "IntConstantInitializedVector.h"
int intCIV_isInitialized(int_CIVector * v, int i)
{
if(v->top == 0)
return 0;
if(v->from[i] >= 0)
if(v->from[i] < v->top && v->to[v->from[i]] == i)
if(v->from[i] < v->top && v->to[v->from[i]] == i)
return 1;
return 0;
}
@ -45,7 +44,7 @@ int intCIV_set(int_CIVector * v, int i, int val)
v->top++;
}
v->vec[i] = val;
return 0;
return 0;
}
int intCIV_get(int_CIVector * v, int i)

Просмотреть файл

@ -12,5 +12,4 @@ void intCIV_exit(int_CIVector * v);
int intCIV_set(int_CIVector * v, int i, int val);
int intCIV_get(int_CIVector * v, int i);
#endif /*INTEGER_CONSTANT_INITIALIZED_VECTOR*/

Просмотреть файл

@ -0,0 +1,174 @@
#include <stdlib.h>
#include "PriorityQueue.h"
/*
This comparison function is used to sort elements in key descending order.
*/
int compfunc(const FiboNode * const, const FiboNode * const);
int compFunc(const FiboNode * const node1, const FiboNode * const node2)
{
return
( ( ((QueueElement*)(node1))->key > ((QueueElement*)(node2))->key ) ? -1 : 1);
}
int PQ_init(PriorityQueue * const q, int size)
{
int i;
q->size = size;
q->elements = malloc(sizeof(QueueElement *) * size);
for(i=0; i < size; i++)
q->elements[i]=NULL;
return fiboTreeInit((FiboTree *)q, compFunc);
}
void PQ_exit(PriorityQueue * const q)
{
int i;
for(i = 0; i < q->size; i++)
{
if(q->elements[i] != NULL)
free(q->elements[i]);
}
if(q->elements != NULL)
free(q->elements);
fiboTreeExit((FiboTree *)q);
}
void PQ_free(PriorityQueue * const q)
{
int i;
for(i = 0; i < q->size; i++)
{
if(q->elements[i] != NULL)
free(q->elements[i]);
}
fiboTreeFree((FiboTree *)q);
}
int PQ_isEmpty(PriorityQueue * const q)
{
FiboTree * tree = (FiboTree *)q;
/* if the tree root is linked to itself then the tree is empty */
if(&(tree->rootdat) == (tree->rootdat.linkdat.nextptr))
return 1;
return 0;
}
void PQ_insertElement(PriorityQueue * const q, QueueElement * const e)
{
if(e->value >= 0 && e->value < q->size)
{
fiboTreeAdd((FiboTree *)q, (FiboNode *)(e));
q->elements[e->value] = e;
e->isInQueue = 1;
}
}
void PQ_deleteElement(PriorityQueue * const q, QueueElement * const e)
{
fiboTreeDel((FiboTree *)q, (FiboNode *)(e));
q->elements[e->value] = NULL;
e->isInQueue = 0;
}
void PQ_insert(PriorityQueue * const q, int val, double key)
{
if( val >= 0 && val < q->size)
{
QueueElement * e = malloc(sizeof(QueueElement));
e->value = val;
e->key = key;
PQ_insertElement(q, e);
}
}
void PQ_delete(PriorityQueue * const q, int val)
{
QueueElement * e = q->elements[val];
PQ_deleteElement(q, e);
free(e);
}
QueueElement * PQ_findMaxElement(PriorityQueue * const q)
{
QueueElement * e = (QueueElement *)(fiboTreeMin((FiboTree *)q));
return e;
}
QueueElement * PQ_deleteMaxElement(PriorityQueue * const q)
{
QueueElement * e = (QueueElement *)(fiboTreeMin((FiboTree *)q));
if(e != NULL)
{
PQ_deleteElement(q, e);
}
return e;
}
double PQ_findMaxKey(PriorityQueue * const q)
{
QueueElement * e = PQ_findMaxElement(q);
if(e!=NULL)
return e->key;
return 0;
}
int PQ_deleteMax(PriorityQueue * const q)
{
QueueElement * e = PQ_deleteMaxElement(q);
int res = -1;
if(e != NULL)
res = e->value;
free(e);
return res;
}
void PQ_increaseElementKey(PriorityQueue * const q, QueueElement * const e, double i)
{
if(e->isInQueue)
{
PQ_deleteElement(q, e);
e->key += i;
PQ_insertElement(q, e);
}
}
void PQ_decreaseElementKey(PriorityQueue * const q, QueueElement * const e, double i)
{
if(e->isInQueue)
{
PQ_deleteElement(q, e);
e->key -= i;
PQ_insertElement(q, e);
}
}
void PQ_adjustElementKey(PriorityQueue * const q, QueueElement * const e, double i)
{
if(e->isInQueue)
{
PQ_deleteElement(q, e);
e->key = i;
PQ_insertElement(q, e);
}
}
void PQ_increaseKey(PriorityQueue * const q, int val, double i)
{
QueueElement * e = q->elements[val];
if(e != NULL)
PQ_increaseElementKey(q, e, i);
}
void PQ_decreaseKey(PriorityQueue * const q, int val, double i)
{
QueueElement * e = q->elements[val];
if(e != NULL)
PQ_decreaseElementKey(q, e, i);
}
void PQ_adjustKey(PriorityQueue * const q, int val, double i)
{
QueueElement * e = q->elements[val];
if(e != NULL)
PQ_adjustElementKey(q, e, i);
}

Просмотреть файл

@ -0,0 +1,108 @@
#ifndef PRIORITY_QUEUE
#define PRIORITY_QUEUE
#include "fibo.h"
/*
This is the struct for our elements in a PriorityQueue.
The node is at first place so we only have to use a cast to switch between QueueElement's pointer and Fibonode's pointer.
*/
typedef struct QueueElement_
{
FiboNode node; /*the node used to insert the element in a FiboTree*/
double key; /*the key of the element, elements are sorted in a descending order according to their key*/
int value;
int isInQueue;
} QueueElement;
typedef struct PriorityQueue_
{
FiboTree tree;
QueueElement ** elements; /*a vector of element with their value as key so we can easily retreive an element from its value */
int size; /*the size allocated to the elements vector*/
} PriorityQueue;
/*
PQ_init initiates a PriorityQueue with a size given in argument and sets compFunc as comparison function. Note that you have to allocate memory to the PriorityQueue pointer before calling this function.
Returns :
0 if success
!0 if failed
PQ_free simply empties the PriorityQueue but does not free the memory used by its elements.
PQ_exit destroys the PriorityQueue without freeing elements. The PriorityQueue is no longer usable without using PQ_init again.
Note that the PriorityQueue pointer is not deallocated.
*/
int PQ_init(PriorityQueue * const, int size);
void PQ_free(PriorityQueue * const);
void PQ_exit(PriorityQueue * const);
/*
PQ_isEmpty returns 1 if the PriorityQueue is empty, 0 otherwise.
*/
int PQ_isEmpty(PriorityQueue * const);
/*
PQ_insertElement inserts the given QueueElement in the given PriorityQueue
*/
void PQ_insertElement(PriorityQueue * const, QueueElement * const);
/*
PQ_deleteElement delete the element given in argument from the PriorityQueue.
*/
void PQ_deleteElement(PriorityQueue * const, QueueElement * const);
/*
PQ_insert inserts an element in the PriorityQueue with the value and key given in argument.
*/
void PQ_insert(PriorityQueue * const, int val, double key);
/*
PQ_delete removes the first element found with the value given in argument and frees it.
*/
void PQ_delete(PriorityQueue * const, int val);
/*
PQ_findMaxElement returns the QueueElement with the greatest key in the given PriorityQueue
*/
QueueElement * PQ_findMaxElement(PriorityQueue * const);
/*
PQ_deleteMaxElement returns the QueueElement with the geatest key in the given PriorityQueue and removes it from the queue.
*/
QueueElement * PQ_deleteMaxElement(PriorityQueue * const);
/*
PQ_findMax returns the key of the element with the geatest key in the given PriorityQueue
*/
double PQ_findMaxKey(PriorityQueue * const);
/*
PQ_deleteMax returns the value of the element with the greatest key in the given PriorityQueue and removes it from the queue.
*/
int PQ_deleteMax(PriorityQueue * const);
/*
PQ_increaseElementKey adds the value of i to the key of the given QueueElement
*/
void PQ_increaseElementKey(PriorityQueue * const, QueueElement * const, double i);
/*
PQ_decreaseElementKey substracts the value of i from the key of the given QueueElement
*/
void PQ_decreaseElementKey(PriorityQueue * const, QueueElement * const, double i);
/*
PQ_adjustElementKey sets to i the key of the given QueueElement.
*/
void PQ_adjustElementKey(PriorityQueue * const, QueueElement * const, double i);
/*
PQ_increaseKey adds i to the key of the first element found with a value equal to val in the PriorityQueue.
*/
void PQ_increaseKey(PriorityQueue * const, int val, double i);
/*
PQ_decreaseKey substracts i from the key of the first element found with a value equal to val in the PriorityQueue.
*/
void PQ_decreaseKey(PriorityQueue * const, int val, double i);
/*
PQ_adjustKey sets to i the key of the first element found with a value equal to val in the PriorityQueue.
*/
void PQ_adjustKey(PriorityQueue * const, int val, double i);
#endif /*PRIORITY_QUEUE*/

372
ompi/mca/topo/treematch/treematch/fibo.c Обычный файл
Просмотреть файл

@ -0,0 +1,372 @@
/* Copyright 2010 IPB, INRIA & CNRS
**
** This file originally comes from the Scotch software package for
** static mapping, graph partitioning and sparse matrix ordering.
**
** This software is governed by the CeCILL-B license under French law
** and abiding by the rules of distribution of free software. You can
** use, modify and/or redistribute the software under the terms of the
** CeCILL-B license as circulated by CEA, CNRS and INRIA at the following
** URL: "http://www.cecill.info".
**
** As a counterpart to the access to the source code and rights to copy,
** modify and redistribute granted by the license, users are provided
** only with a limited warranty and the software's author, the holder of
** the economic rights, and the successive licensors have only limited
** liability.
**
** In this respect, the user's attention is drawn to the risks associated
** with loading, using, modifying and/or developing or reproducing the
** software by the user in light of its specific status of free software,
** that may mean that it is complicated to manipulate, and that also
** therefore means that it is reserved for developers and experienced
** professionals having in-depth computer knowledge. Users are therefore
** encouraged to load and test the software's suitability as regards
** their requirements in conditions enabling the security of their
** systems and/or data to be ensured and, more generally, to use and
** operate it in the same conditions as regards security.
**
** The fact that you are presently reading this means that you have had
** knowledge of the CeCILL-B license and that you accept its terms.
*/
/************************************************************/
/** **/
/** NAME : fibo.c **/
/** **/
/** AUTHOR : Francois PELLEGRINI **/
/** **/
/** FUNCTION : This module handles Fibonacci trees. **/
/** **/
/** DATES : # Version 1.0 : from : 01 may 2010 **/
/** to 12 may 2010 **/
/** **/
/************************************************************/
/*
** The defines and includes.
*/
#define FIBO
#include <stdlib.h>
#include <memory.h>
#include <stdio.h>
#include "fibo.h"
/* Helper macros which can be redefined at compile time. */
#ifndef INT
#define INT int /* "long long" can be used on 64-bit systems */
#endif /* INT */
#ifndef errorPrint
#define errorPrint(s) fprintf (stderr, s)
#endif /* errorPrint */
#ifndef memAlloc
#define memAlloc malloc
#define memSet memset
#define memFree free
#endif /* memAlloc */
/*********************************************/
/* */
/* These routines deal with Fibonacci trees. */
/* */
/*********************************************/
/* This routine initializes a Fibonacci
** tree structure.
** It returns:
** - 0 : in case of success.
** - !0 : on error.
*/
int
fiboTreeInit (
FiboTree * const treeptr,
int (* cmpfptr) (const FiboNode * const, const FiboNode * const))
{
if ((treeptr->degrtab = (FiboNode **) memAlloc ((sizeof (INT) << 3) * sizeof (FiboNode *))) == NULL) /* As many cells as there are bits in an INT */
return (1);
memSet (treeptr->degrtab, 0, (sizeof (INT) << 3) * sizeof (FiboNode *)); /* Make degree array ready for consolidation: all cells set to NULL */
treeptr->rootdat.linkdat.prevptr = /* Link root node to itself */
treeptr->rootdat.linkdat.nextptr = &treeptr->rootdat;
treeptr->cmpfptr = cmpfptr;
return (0);
}
/* This routine flushes the contents of
** the given Fibonacci tree.
** It returns:
** - VOID : in all cases.
*/
void
fiboTreeExit (
FiboTree * const treeptr)
{
if (treeptr->degrtab != NULL)
memFree (treeptr->degrtab);
}
/* This routine flushes the contents of
** the given Fibonacci tree. It does not
** free any of its contents, but instead
** makes the tree structure look empty again.
** It returns:
** - VOID : in all cases.
*/
void
fiboTreeFree (
FiboTree * const treeptr)
{
treeptr->rootdat.linkdat.prevptr = /* Link root node to itself */
treeptr->rootdat.linkdat.nextptr = &treeptr->rootdat;
}
/* This routine perform the consolidation
** of roots per degree. It returns the best
** element found because this element is not
** recorded in the data structure itself.
** It returns:
** - !NULL : pointer to best element found.
** - NULL : Fibonacci tree is empty.
*/
FiboNode *
fiboTreeConsolidate (
FiboTree * const treeptr)
{
FiboNode ** restrict degrtab;
int degrmax;
int degrval;
FiboNode * rootptr;
FiboNode * nextptr;
FiboNode * bestptr;
degrtab = treeptr->degrtab;
for (rootptr = treeptr->rootdat.linkdat.nextptr, nextptr = rootptr->linkdat.nextptr, degrmax = 0; /* For all roots in root list */
rootptr != &treeptr->rootdat; ) {
degrval = rootptr->deflval >> 1; /* Get degree, getting rid of flag part */
#ifdef FIBO_DEBUG
if (degrval >= (sizeof (INT) << 3))
errorPrint ("fiboTreeConsolidate: invalid node degree");
#endif /* FIBO_DEBUG */
if (degrtab[degrval] == NULL) { /* If no tree with same degree already found */
if (degrval > degrmax) /* Record highest degree found */
degrmax = degrval;
degrtab[degrval] = rootptr; /* Record tree as first tree with this degree */
rootptr = nextptr; /* Process next root in list during next iteration */
nextptr = rootptr->linkdat.nextptr;
}
else {
FiboNode * oldrptr; /* Root which will no longer be a root */
FiboNode * chldptr;
oldrptr = degrtab[degrval]; /* Assume old root is worse */
if (treeptr->cmpfptr (oldrptr, rootptr) <= 0) { /* If old root is still better */
oldrptr = rootptr; /* This root will be be linked to it */
rootptr = degrtab[degrval]; /* We will go on processing this root */
}
degrtab[degrval] = NULL; /* Remaining root changes degree so leaves this cell */
fiboTreeUnlink (oldrptr); /* Old root is no longer a root */
oldrptr->deflval &= ~1; /* Whatever old root flag was, it is reset to 0 */
oldrptr->pareptr = rootptr; /* Remaining root is now father of old root */
chldptr = rootptr->chldptr; /* Get first child of remaining root */
if (chldptr != NULL) { /* If remaining root had already some children, link old root with them */
rootptr->deflval += 2; /* Increase degree by 1, that is, by 2 with left shift in deflval */
fiboTreeLinkAfter (chldptr, oldrptr);
}
else { /* Old root becomes first child of remaining root */
rootptr->deflval = 2; /* Real degree set to 1, and flag set to 0 */
rootptr->chldptr = oldrptr;
oldrptr->linkdat.prevptr = /* Chain old root to oneself as only child */
oldrptr->linkdat.nextptr = oldrptr;
}
} /* Process again remaining root as its degree has changed */
}
bestptr = NULL;
for (degrval = 0; degrval <= degrmax; degrval ++) {
if (degrtab[degrval] != NULL) { /* If some tree is found */
bestptr = degrtab[degrval]; /* Record it as potential best */
degrtab[degrval] = NULL; /* Clean-up used part of array */
degrval ++; /* Go on at next cell in next loop */
break;
}
}
for ( ; degrval <= degrmax; degrval ++) { /* For remaining roots once a potential best root has been found */
if (degrtab[degrval] != NULL) {
if (treeptr->cmpfptr (degrtab[degrval], bestptr) < 0) /* If new root is better */
bestptr = degrtab[degrval]; /* Record new root as best root */
degrtab[degrval] = NULL; /* Clean-up used part of array */
}
}
return (bestptr);
}
/* This routine returns the node of minimum
** key in the given tree. The node is searched
** for each time this routine is called, so this
** information should be recorded if needed.
** This is the non-macro version, for testing
** and setting up breakpoints.
** It returns:
** - !NULL : pointer to best element found.
** - NULL : Fibonacci tree is empty.
*/
#ifndef fiboTreeMin
FiboNode *
fiboTreeMin (
FiboTree * const treeptr)
{
FiboNode * bestptr;
bestptr = fiboTreeMinMacro (treeptr);
#ifdef FIBO_DEBUG
fiboTreeCheck (treeptr);
#endif /* FIBO_DEBUG */
return (bestptr);
}
#endif /* fiboTreeMin */
/* This routine adds the given node to the
** given tree. This is the non-macro version,
** for testing and setting up breakpoints.
** It returns:
** - void : in all cases.
*/
#ifndef fiboTreeAdd
void
fiboTreeAdd (
FiboTree * const treeptr,
FiboNode * const nodeptr)
{
fiboTreeAddMacro (treeptr, nodeptr);
#ifdef FIBO_DEBUG
fiboTreeCheck (treeptr);
#endif /* FIBO_DEBUG */
}
#endif /* fiboTreeAdd */
/* This routine deletes the given node from
** the given tree, whatever ths node is (root
** or non root). This is the non-macro version,
** for testing and setting up breakpoints.
** It returns:
** - void : in all cases.
*/
#ifndef fiboTreeDel
void
fiboTreeDel (
FiboTree * const treeptr,
FiboNode * const nodeptr)
{
fiboTreeDelMacro (treeptr, nodeptr);
#ifdef FIBO_DEBUG
nodeptr->pareptr =
nodeptr->chldptr =
nodeptr->linkdat.prevptr =
nodeptr->linkdat.nextptr = NULL;
fiboTreeCheck (treeptr);
#endif /* FIBO_DEBUG */
}
#endif /* fiboTreeDel */
/* This routine checks the consistency of the
** given linked list.
** It returns:
** - !NULL : pointer to the vertex.
** - NULL : if no such vertex available.
*/
#ifdef FIBO_DEBUG
static
int
fiboTreeCheck2 (
const FiboNode * const nodeptr)
{
FiboNode * chldptr;
int degrval;
degrval = 0;
chldptr = nodeptr->chldptr;
if (chldptr != NULL) {
do {
if (chldptr->linkdat.nextptr->linkdat.prevptr != chldptr) {
errorPrint ("fiboTreeCheck: bad child linked list");
return (1);
}
if (chldptr->pareptr != nodeptr) {
errorPrint ("fiboTreeCheck: bad child parent");
return (1);
}
if (fiboTreeCheck2 (chldptr) != 0)
return (1);
degrval ++;
chldptr = chldptr->linkdat.nextptr;
} while (chldptr != nodeptr->chldptr);
}
if (degrval != (nodeptr->deflval >> 1)) { /* Real node degree is obtained by discarding lowest bit */
errorPrint ("fiboTreeCheck2: invalid child information");
return (1);
}
return (0);
}
int
fiboTreeCheck (
const FiboTree * const treeptr)
{
FiboNode * nodeptr;
for (nodeptr = treeptr->rootdat.linkdat.nextptr;
nodeptr != &treeptr->rootdat; nodeptr = nodeptr->linkdat.nextptr) {
if (nodeptr->linkdat.nextptr->linkdat.prevptr != nodeptr) {
errorPrint ("fiboTreeCheck: bad root linked list");
return (1);
}
if (nodeptr->pareptr != NULL) {
errorPrint ("fiboTreeCheck: bad root parent");
return (1);
}
if (fiboTreeCheck2 (nodeptr) != 0)
return (1);
}
return (0);
}
#endif /* FIBO_DEBUG */

205
ompi/mca/topo/treematch/treematch/fibo.h Обычный файл
Просмотреть файл

@ -0,0 +1,205 @@
/* Copyright 2010 IPB, INRIA & CNRS
**
** This file originally comes from the Scotch software package for
** static mapping, graph partitioning and sparse matrix ordering.
**
** This software is governed by the CeCILL-B license under French law
** and abiding by the rules of distribution of free software. You can
** use, modify and/or redistribute the software under the terms of the
** CeCILL-B license as circulated by CEA, CNRS and INRIA at the following
** URL: "http://www.cecill.info".
**
** As a counterpart to the access to the source code and rights to copy,
** modify and redistribute granted by the license, users are provided
** only with a limited warranty and the software's author, the holder of
** the economic rights, and the successive licensors have only limited
** liability.
**
** In this respect, the user's attention is drawn to the risks associated
** with loading, using, modifying and/or developing or reproducing the
** software by the user in light of its specific status of free software,
** that may mean that it is complicated to manipulate, and that also
** therefore means that it is reserved for developers and experienced
** professionals having in-depth computer knowledge. Users are therefore
** encouraged to load and test the software's suitability as regards
** their requirements in conditions enabling the security of their
** systems and/or data to be ensured and, more generally, to use and
** operate it in the same conditions as regards security.
**
** The fact that you are presently reading this means that you have had
** knowledge of the CeCILL-B license and that you accept its terms.
*/
/************************************************************/
/** **/
/** NAME : fibo.h **/
/** **/
/** AUTHOR : Francois PELLEGRINI **/
/** **/
/** FUNCTION : This module contains the definitions of **/
/** the generic Fibonacci trees. **/
/** **/
/** DATES : # Version 1.0 : from : 01 may 2010 **/
/** to 12 may 2010 **/
/** **/
/** NOTES : # Since this module has originally been **/
/** designed as a gain keeping data **/
/** structure for local optimization **/
/** algorithms, the computation of the **/
/** best node is only done when actually **/
/** searching for it. **/
/** This is most useful when many **/
/** insertions and deletions can take **/
/** place in the mean time. This is why **/
/** this data structure does not keep **/
/** track of the best node, unlike most **/
/** implementations do. **/
/** **/
/************************************************************/
/*
** The type and structure definitions.
*/
/* The doubly linked list structure. */
typedef struct FiboLink_ {
struct FiboNode_ * prevptr; /*+ Pointer to previous sibling element +*/
struct FiboNode_ * nextptr; /*+ Pointer to next sibling element +*/
} FiboLink;
/* The tree node data structure. The deflval
variable merges degree and flag variables.
The degree of a node is smaller than
"bitsizeof (INT)", so it can hold on an
"int". The flag value is stored in the
lowest bit of the value. */
typedef struct FiboNode_ {
struct FiboNode_ * pareptr; /*+ Pointer to parent element, if any +*/
struct FiboNode_ * chldptr; /*+ Pointer to first child element, if any +*/
FiboLink linkdat; /*+ Pointers to sibling elements +*/
int deflval; /*+ Lowest bit: flag value; other bits: degree value +*/
} FiboNode;
/* The tree data structure. The fake dummy node aims
at handling root node insertion without any test.
This is important as many insertions have to be
performed. */
typedef struct FiboTree_ {
FiboNode rootdat; /*+ Dummy node for fast root insertion +*/
FiboNode ** restrict degrtab; /*+ Consolidation array of size "bitsizeof (INT)" +*/
int (* cmpfptr) (const FiboNode * const, const FiboNode * const); /*+ Comparison routine +*/
} FiboTree;
/*
** The marco definitions.
*/
/* This is the core of the module. All of
the algorithms have been de-recursived
and written as macros. */
#define fiboTreeLinkAfter(o,n) do { \
FiboNode * nextptr; \
nextptr = (o)->linkdat.nextptr; \
(n)->linkdat.nextptr = nextptr; \
(n)->linkdat.prevptr = (o); \
nextptr->linkdat.prevptr = (n); \
(o)->linkdat.nextptr = (n); \
} while (0)
#define fiboTreeUnlink(n) do { \
(n)->linkdat.prevptr->linkdat.nextptr = (n)->linkdat.nextptr; \
(n)->linkdat.nextptr->linkdat.prevptr = (n)->linkdat.prevptr; \
} while (0)
#define fiboTreeAddMacro(t,n) do { \
(n)->pareptr = NULL; \
(n)->chldptr = NULL; \
(n)->deflval = 0; \
fiboTreeLinkAfter (&((t)->rootdat), (n)); \
} while (0)
#define fiboTreeMinMacro(t) (fiboTreeConsolidate (t))
#define fiboTreeCutChildren(t,n) do { \
FiboNode * chldptr; \
chldptr = (n)->chldptr; \
if (chldptr != NULL) { \
FiboNode * cendptr; \
cendptr = chldptr; \
do { \
FiboNode * nextptr; \
nextptr = chldptr->linkdat.nextptr; \
chldptr->pareptr = NULL; \
fiboTreeLinkAfter (&((t)->rootdat), chldptr); \
chldptr = nextptr; \
} while (chldptr != cendptr); \
} \
} while (0)
#define fiboTreeDelMacro(t,n) do { \
FiboNode * pareptr; \
FiboNode * rghtptr; \
pareptr = (n)->pareptr; \
fiboTreeUnlink (n); \
fiboTreeCutChildren ((t), (n)); \
if (pareptr == NULL) \
break; \
rghtptr = (n)->linkdat.nextptr; \
while (1) { \
FiboNode * gdpaptr; \
int deflval; \
deflval = pareptr->deflval - 2; \
pareptr->deflval = deflval | 1; \
gdpaptr = pareptr->pareptr; \
pareptr->chldptr = (deflval <= 1) ? NULL : rghtptr; \
if (((deflval & 1) == 0) || (gdpaptr == NULL)) \
break; \
rghtptr = pareptr->linkdat.nextptr; \
fiboTreeUnlink (pareptr); \
pareptr->pareptr = NULL; \
fiboTreeLinkAfter (&((t)->rootdat), pareptr); \
pareptr = gdpaptr; \
} \
} while (0)
/*
** The function prototypes.
*/
/* This set of definitions allows the user
to specify whether he prefers to use
the fibonacci routines as macros or as
regular functions, for instance for
debugging. */
#define fiboTreeAdd fiboTreeAddMacro
/* #define fiboTreeDel fiboTreeDelMacro */
/* #define fiboTreeMin fiboTreeMinMacro */
#ifndef FIBO
#define static
#endif
int fiboTreeInit (FiboTree * const, int (*) (const FiboNode * const, const FiboNode * const));
void fiboTreeExit (FiboTree * const);
void fiboTreeFree (FiboTree * const);
FiboNode * fiboTreeConsolidate (FiboTree * const);
#ifndef fiboTreeAdd
void fiboTreeAdd (FiboTree * const, FiboNode * const);
#endif /* fiboTreeAdd */
#ifndef fiboTreeDel
void fiboTreeDel (FiboTree * const, FiboNode * const);
#endif /* fiboTreeDel */
#ifndef fiboTreeMin
FiboNode * fiboTreeMin (FiboTree * const);
#endif /* fiboTreeMin */
#ifdef FIBO_DEBUG
int fiboTreeCheck (const FiboTree * const);
static int fiboTreeCheck2 (const FiboNode * const);
#endif /* FIBO_DEBUG */
#undef static

Просмотреть файл

@ -0,0 +1,339 @@
#include <stdlib.h>
#include <stdio.h>
#include "k-partitioning.h"
#include "tm_mt.h"
#include "tm_verbose.h"
void memory_allocation(PriorityQueue ** Q, PriorityQueue ** Qinst, double *** D, int n, int k);
void initialization(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int k, int * const deficit, int * const surplus);
void algo(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int * const deficit, int * const surplus);
double nextGain(PriorityQueue * const Qpart, PriorityQueue * const Q, int * const deficit, int * const surplus);
void balancing(int n, int deficit, int surplus, double ** const D, int * const part);
void destruction(PriorityQueue * Qpart, PriorityQueue * Q, PriorityQueue * Qinst, double ** D, int n, int k);
void allocate_vertex2(int u, int *res, double **comm, int n, int *size, int max_size);
double eval_cost2(int *,int,double **);
int *kpartition_greedy2(int k, double **comm, int n, int nb_try_max, int *constraints, int nb_constraints);
int* build_p_vector(double **comm, int n, int k, int greedy_trials, int * constraints, int nb_constraints);
int* kPartitioning(double ** comm, int n, int k, int * constraints, int nb_constraints, int greedy_trials)
{
/* ##### declarations & allocations ##### */
PriorityQueue Qpart, *Q = NULL, *Qinst = NULL;
double **D = NULL;
int deficit, surplus, *part = NULL;
int real_n = n-nb_constraints;
part = build_p_vector(comm, n, k, greedy_trials, constraints, nb_constraints);
memory_allocation(&Q, &Qinst, &D, real_n, k);
/* ##### Initialization ##### */
initialization(part, comm, &Qpart, Q, Qinst, D, real_n, k, &deficit, &surplus);
/* ##### Main loop ##### */
while((nextGain(&Qpart, Q, &deficit, &surplus))>0)
{
algo(part, comm, &Qpart, Q, Qinst, D, real_n, &deficit, &surplus);
}
/* ##### Balancing the partition ##### */
balancing(real_n, deficit, surplus, D, part); /*if partition isn't balanced we have to make one last move*/
/* ##### Memory deallocation ##### */
destruction(&Qpart, Q, Qinst, D, real_n, k);
return part;
}
void memory_allocation(PriorityQueue ** Q, PriorityQueue ** Qinst, double *** D, int n, int k)
{
int i;
*Q = calloc(k, sizeof(PriorityQueue)); /*one Q for each partition*/
*Qinst = calloc(n, sizeof(PriorityQueue)); /*one Qinst for each vertex*/
*D = malloc(sizeof(double *) * n); /*D's size is n * k*/
for(i=0; i < n; ++i)
(*D)[i] = calloc(k, sizeof(double));
}
void initialization(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int k, int * const deficit, int * const surplus)
{
int i,j;
/* ##### PriorityQueue initializations ##### */
/* We initialize Qpart with a size of k because it contains the subsets's indexes. */
PQ_init(Qpart, k);
/* We initialize each Q[i] with a size of n because each vertex is in one of these queue at any time. */
/* However we could set a size of (n/k)+1 as this is the maximum size of a subset when the partition is not balanced. */
for(i=0; i<k; ++i)
PQ_init(&Q[i], n);
/* We initialize each Qinst[i] with a size of k because fo each vertex i, Qinst[i] contains the D(i,j) values for j = 0...(k-1) */
for(i=0; i<n; ++i)
PQ_init(&Qinst[i], k);
/* ##### Computing the D(i,j) values ##### */
for(i=0; i < n; ++i) /*for each vertex i*/
{
for(j=0; j < n; ++j) /*and for each vertex j*/
{
D[i][part[j]] += matrice[i][j];
}
}
/* ##### Filling up the queues ##### */
/* ### Qinst ### */
for(i=0; i < n; ++i) /*for each vertex i*/
for(j=0; j < k; ++j) /*and for each subset j*/
PQ_insert(&Qinst[i], j, D[i][j]); /*we insert the corresponding D(i,j) value in Qinst[i]*/
/* ### Q ### */
for(i=0; i<n; ++i) /*for each vertex i*/
PQ_insert(&Q[part[i]], i, PQ_findMaxKey(&Qinst[i])-D[i][part[i]]); /*we insert in Q[part[i]] the vertex i with its highest possible gain*/
/* ### Qpart ### */
for(i=0; i < k; ++i) /*for each subset i*/
PQ_insert(Qpart, i, PQ_findMaxKey(&Q[i])); /*we insert it in Qpart with the highest possible gain by one of its vertex as key*/
/* ##### Initialization of deficit/surplus ##### */
*surplus = *deficit = 0;
}
void algo(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int * const deficit, int * const surplus)
{
int p,u,v,j;
double d;
if(*deficit == *surplus) /*if the current partition is balanced*/
{
p = PQ_deleteMax(Qpart); /*we get the subset with the highest possible gain in p and remove it from Qpart*/
u = PQ_deleteMax(&Q[p]); /*then we get the vertex with this highest possible gain in u and remove it from Q[p] */
*deficit = part[u]; /*p becomes the deficit */
}
else /*the current partition is not balanced*/
{
u = PQ_deleteMax(&Q[*surplus]); /*we get the vertex with the highest possible gain in surplus and remove it from Q[surplus] */
PQ_delete(Qpart, part[u]); /*then we remove surplus from Qpart (note that u is from surplus so part[u] is surplus) */
}
d = PQ_findMaxKey(&Q[part[u]]); /*we get the next highest possible gain in part[u] (without taking u in account as we already removed it from Q[part[u])*/
PQ_insert(Qpart, part[u], d); /*we put part[u] back in Qpart with its new highest possible gain*/
j = PQ_deleteMax(&Qinst[u]); /*we get from Qinst[u] the subset in which we have to move u to get the highest gain.*/
if ( j < 0){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Error Max element in priority queue negative!\n");
exit(-1);
}
*surplus = j; /*this subset becomes surplus*/
for(v=0; v < n; ++v) /*we scan though all edges (u,v) */
{
j = part[u]; /*we set j to the starting subset */
D[v][j]= D[v][j] - matrice[u][v]; /*we compute the new D[v, i] (here j has the value of the starting subset of u, that's why we say i) */
PQ_adjustKey(&Qinst[v], j, D[v][j]); /*we update this gain in Qinst[v]*/
j = *surplus; /*we put back the arrival subset in j*/
D[v][j] = D[v][j] + matrice[u][v]; /*matrice[u][v]; we compute the new D[v, j]*/
PQ_adjustKey(&Qinst[v], j, D[v][j]);/*we update this gain in Qinst[v]*/
d = PQ_findMaxKey(&Qinst[v]) - D[v][part[v]]; /*we compute v's new highest possible gain*/
PQ_adjustKey(&Q[part[v]], v, d); /*we update it in Q[p[v]]*/
d = PQ_findMaxKey(&Q[part[v]]); /*we get the highest possible gain in v's subset*/
PQ_adjustKey(Qpart, part[v], d); /*we update it in Qpart*/
}
part[u] = *surplus; /*we move u from i to j (here surplus has the value of j the arrival subset)*/
d = PQ_findMaxKey(&Qinst[u]) - D[u][part[u]]; /*we compute the new u's highest possible gain*/
if(!PQ_isEmpty(&Qinst[u])) /*if at least one more move of u is possible*/
PQ_insert(&Q[part[u]], u, d); /*we insert u in the Q queue of its new subset*/
PQ_adjustKey(Qpart, part[u], d); /*we update the new highest possible gain in u's subset*/
}
double nextGain(PriorityQueue * const Qpart, PriorityQueue * const Q, int * const deficit, int * const surplus)
{
double res;
if(*deficit == *surplus) /*if the current partition is balanced*/
res = PQ_findMaxKey(Qpart); /*we get the highest possible gain*/
else /*the current partition is not balanced*/
res = PQ_findMaxKey(&Q[*surplus]); /*we get the highest possible gain from surplus*/
return res;
}
void balancing(int n, int deficit, int surplus, double ** const D, int * const part)
{
if(surplus != deficit) /*if the current partition is not balanced*/
{
int i;
PriorityQueue moves; /*we use a queue to store the possible moves from surplus to deficit*/
PQ_init(&moves, n);
for(i=0; i<n; ++i) /*for each vertex*/
{
if(part[i] == surplus) /*if i is from surplus*/
PQ_insert(&moves, i, D[i][deficit]-D[i][surplus]); /*we insert i in moves with the gain we get from moving i from surplus to deficit as key */
}
part[PQ_deleteMax(&moves)] = deficit; /*we put the i from moves with the highest gain in deficit*/
PQ_exit(&moves);
}
}
void destruction(PriorityQueue * Qpart, PriorityQueue * Q, PriorityQueue * Qinst, double ** D, int n, int k)
{
int i;
PQ_exit(Qpart);
for(i=0; i<k; ++i)
PQ_exit(&Q[i]);
free(Q);
for(i=0; i<n; ++i)
{
PQ_exit(&Qinst[i]);
}
free(Qinst);
for(i=0; i<n; ++i)
free(D[i]);
free(D);
}
int *kpartition_greedy2(int k, double **comm, int n, int nb_try_max, int *constraints, int nb_constraints)
{
int *res = NULL, *best_res=NULL, *size = NULL;
int i,j,nb_trials;
int max_size;
double cost, best_cost = -1;
for( nb_trials = 0 ; nb_trials < nb_try_max ; nb_trials++ ){
res = (int *)malloc(sizeof(int)*n);
for ( i = 0 ; i < n ; ++i )
res[i] = -1;
size = (int *)calloc(k,sizeof(int));
max_size = n/k;
/* put "dumb" vertices in the correct partition if there are any*/
if (nb_constraints){ /*if there are at least one constraint*/
int nb_real_nodes = n-nb_constraints; /*this is the number of "real" nodes by opposition to the dumb ones*/
for(i=0; i<nb_constraints; ++i) /*for each constraint*/
{
int i_part = constraints[i]/max_size; /*we compute its partition*/
res[nb_real_nodes+i] = i_part; /*and we set it in partition vector*/
size[i_part]++; /*we update the partition's size*/
}
}
/* choose k initial "true" vertices at random and put them in a different partition */
for ( i = 0 ; i < k ; ++i ){
/* if the partition is full of dumb vertices go to next partition*/
if(size[i] >= max_size)
continue;
/* find a vertex not already partitionned*/
do{
/* call the mersenne twister PRNG of tm_mt.c*/
j = genrand_int32() % n;
} while ( res[j] != -1 );
/* allocate and update size of partition*/
res[j] = i;
/* printf("random: %d -> %d\n",j,i); */
size[i]++;
}
/* allocate each unallocated vertices in the partition that maximize the communication*/
for( i = 0 ; i < n ; ++i )
if( res[i] == -1)
allocate_vertex2(i, res, comm, n-nb_constraints, size, max_size);
cost = eval_cost2(res,n-nb_constraints,comm);
/*print_1D_tab(res,n);
printf("cost=%.2f\n",cost);*/
if((cost<best_cost) || (best_cost == -1)){
best_cost=cost;
free(best_res);
best_res=res;
}else
free(res);
free(size);
}
/*print_1D_tab(best_res,n);
printf("best_cost=%.2f\n",best_cost);
*/
return best_res;
}
void allocate_vertex2(int u, int *res, double **comm, int n, int *size, int max_size)
{
int i,best_part = -1;
double cost, best_cost = -1;
/*printf("\n");
print_1D_tab(res,n);*/
for( i = 0 ; i < n ; ++i){
if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
cost = comm[u][i];
if (( cost > best_cost)){
best_cost = cost;
best_part = res[i];
}
}
}
/* printf("size[%d]: %d\n",best_part, size[best_part]);*/
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
res[u] = best_part;
size[best_part]++;
}
double eval_cost2(int *partition, int n, double **comm)
{
double cost = 0;
int i,j;
for( i = 0 ; i < n ; ++i )
for( j = i+1 ; j < n ; ++j )
if(partition[i] != partition[j])
cost += comm[i][j];
return cost;
}
int* build_p_vector(double **comm, int n, int k, int greedy_trials, int * constraints, int nb_constraints)
{
int * part = NULL;
if(greedy_trials>0) /*if greedy_trials > 0 then we use kpartition_greedy with greedy_trials trials*/
{
part = kpartition_greedy2(k, comm, n, greedy_trials, constraints, nb_constraints);
}
else
{
int * size = calloc(k, sizeof(int));
int i,j;
int nodes_per_part = n/k;
int nb_real_nodes = n-nb_constraints;
part = malloc(sizeof(int) * n);
for(i=0; i<nb_constraints; i++) /*for each constraints*/
{
int i_part = constraints[i]/nodes_per_part; /*we compute the partition where we have to put this constraint*/
part[nb_real_nodes+i] = i_part;
size[i_part]++;
}
j=0;
/* now we have to fill the partitions with the "real" nodes */
for(i=0; i<nb_real_nodes; i++) /*for each node*/
{
if(size[j] < nodes_per_part) /*if j partition isn't full*/
{
size[j]++;
part[i] = j; /*then we put the node in this part*/
}
else /*otherwise we decrement i to get the same node in the next loop*/
{
i--;
}
j = (j+1)%k; /*and we change j to the next partition*/
}
free(size);
}
return part;
}

Просмотреть файл

@ -0,0 +1,20 @@
#ifndef K_PARTITIONING
#define K_PARTITIONING
#include "PriorityQueue.h"
/*
kPartitioning : function to call the k-partitioning algorithm
- comm : the communication matrix
- n : the number of vertices (including dumb vertices)
- k : the number of partitions
- constraints : the list of constraints
- nb_constraints : the number of constraints
- greedy_trials : the number of trials to build the partition vector with kpartition_greedy
- 0 : cyclic distribution of vertices
- > 0 : use of kpartition_greedy with greedy_trials number of trials
*/
int* kPartitioning(double ** comm, int n, int k, int * const constraints, int nb_constraints, int greedy_trials);
#endif /*K_PARTITIONING*/

Просмотреть файл

@ -1,56 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
//#include "tm_hwloc.h"
#include "tm_tree.h"
#include "tm_mapping.h"
#include "tm_timings.h"
int main(int argc, char**argv){;
tree_t *comm_tree=NULL;
double **comm,**arch;
tm_topology_t *topology;
int nb_processes,nb_cores;
int *sol,*k;
if(argc<3){
fprintf(stderr,"Usage: %s <Architecture tgt> <communication partern file>\n",argv[0]);
return -1;
}
topology=tgt_to_tm(argv[1],&arch);
optimize_topology(&topology);
nb_processes=build_comm(argv[2],&comm);
sol=(int*)MALLOC(sizeof(int)*nb_processes);
nb_cores=nb_processing_units(topology);
k=(int*)MALLOC(sizeof(int)*nb_cores);
// TreeMatchMapping(nb_processes,nb_cores,comm,sol);
if(nb_processes>nb_cores){
fprintf(stderr,"Error: to many processes (%d) for this topology (%d nodes)\n",nb_processes,nb_cores);
exit(-1);
}
TIC;
comm_tree=build_tree_from_topology(topology,comm,nb_processes,NULL,NULL);
map_topology_simple(topology,comm_tree,sol,k);
double duration=TOC;
printf("mapping duration: %f\n",duration);
printf("TreeMatch: ");
print_sol_inv(nb_processes,sol,comm,arch);
//print_1D_tab(k,nb_cores);
// display_other_heuristics(topology,nb_processes,comm,arch);
//display_tab(arch,nb_cores);
FREE_topology(topology);
//FREE_tree(comm_tree);
FREE(sol);
FREE(comm);
FREE(arch);
return 0;
}

Просмотреть файл

@ -1,31 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "tm_hwloc.h"
#include "tm_tree.h"
#include "tm_mapping.h"
#include "tm_timings.h"
int main(int argc, char**argv){;
tm_topology_t *topology;
int nb_cores;
double **arch;
if(argc<2){
fprintf(stderr,"Usage: %s <Architecture tgt>\n",argv[0]);
return -1;
}
topology=tgt_to_tm(argv[1],&arch);
nb_cores=nb_nodes(topology);
display_tab(arch,nb_cores);
FREE_topology(topology);
FREE(arch);
return 0;
}

Просмотреть файл

@ -31,7 +31,7 @@ static int ilog2(int val)
static int verbose_level = ERROR;
bucket_list_t global_bl = {0};
bucket_list_t global_bl;
int tab_cmp(const void*,const void*);
int old_bucket_id(int,int,bucket_list_t);
@ -47,12 +47,12 @@ void fill_buckets(bucket_list_t);
int is_power_of_2(int);
void partial_sort(bucket_list_t *,double **,int);
void next_bucket_elem(bucket_list_t,int *,int *);
int add_edge_3(tree_t *,tree_t *,int,int,int *);
void FREE_bucket(bucket_t *);
void FREE_tab_bucket(bucket_t **,int);
void FREE_bucket_list(bucket_list_t);
void partial_update_val (int nb_args, void **args);
int add_edge_3(tm_tree_t *,tm_tree_t *,int,int,int *);
void free_bucket(bucket_t *);
void free_tab_bucket(bucket_t **,int);
void free_bucket_list(bucket_list_t);
void partial_update_val (int nb_args, void **args, int thread_id);
double bucket_grouping(tm_affinity_mat_t *,tm_tree_t *, tm_tree_t *, int ,int);
int tab_cmp(const void* x1,const void* x2)
{
int *e1 = NULL,*e2 = NULL,i1,i2,j1,j2;
@ -146,7 +146,7 @@ void check_bucket(bucket_t *b,double **tab,double inf, double sup)
j = b->bucket[k].j;
if((tab[i][j] < inf) || (tab[i][j] > sup)){
if(verbose_level >= CRITICAL)
printf("[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
fprintf(stderr,"[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
exit(-1);
}
}
@ -197,15 +197,20 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
n = bucket_list->nb_buckets;
size = N*N/n;
/* display_bucket(bucket);*/
bucket->bucket = (coord*)realloc(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
if(verbose_level >= DEBUG){
printf("Extending bucket %d (%p) from size %d to size %d!\n",
id,bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
}
bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
bucket->bucket_len += size;
if(verbose_level >= DEBUG){
printf("MALLOC/realloc: %d\n",id);
printf("(%d,%d)\n",i,j);
display_bucket(bucket);
printf("\n");
}
/* if(verbose_level >= DEBUG){ */
/* printf("MALLOC/realloc: %d\n",id); */
/* printf("(%d,%d)\n",i,j); */
/* display_bucket(bucket); */
/* printf("\n"); */
/* } */
}
@ -289,7 +294,13 @@ void partial_sort(bucket_list_t *bl,double **tab,int N)
bucket_list_t bucket_list;
int nb_buckets, nb_bits;
/* after these operations, nb_bucket is a power of 2 interger close to log2(N)*/
if( N <= 0){
if(verbose_level >= ERROR )
fprintf(stderr,"Error: tryng to group a matrix of size %d<=0!\n",N);
return;
}
/* after these operations, nb_buckets is a power of 2 interger close to log2(N)*/
nb_buckets = (int)floor(CmiLog2(N));
@ -404,7 +415,7 @@ void next_bucket_elem(bucket_list_t bucket_list,int *i,int *j)
}
int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups)
int add_edge_3(tm_tree_t *tab_node, tm_tree_t *parent,int i,int j,int *nb_groups)
{
/* printf("%d <-> %d ?\n",tab_node[i].id,tab_node[j].id); */
if((!tab_node[i].parent) && (!tab_node[j].parent)){
@ -453,7 +464,7 @@ int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups)
return 0;
}
int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups)
int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups)
{
assert( i != j );
@ -481,40 +492,40 @@ int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_
}
}
void FREE_bucket(bucket_t *bucket)
void free_bucket(bucket_t *bucket)
{
FREE(bucket->bucket);
FREE(bucket);
}
void FREE_tab_bucket(bucket_t **bucket_tab,int N)
void free_tab_bucket(bucket_t **bucket_tab,int N)
{
int i;
for( i = 0 ; i < N ; i++ )
FREE_bucket(bucket_tab[i]);
free_bucket(bucket_tab[i]);
FREE(bucket_tab);
}
void FREE_bucket_list(bucket_list_t bucket_list)
void free_bucket_list(bucket_list_t bucket_list)
{
/* Do not FREE the tab field it is used elsewhere */
FREE_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
/* Do not free the tab field it is used elsewhere */
free_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
FREE(bucket_list->pivot);
FREE(bucket_list->pivot_tree);
FREE(bucket_list);
}
void partial_update_val (int nb_args, void **args){
void partial_update_val (int nb_args, void **args, int thread_id){
int inf = *(int*)args[0];
int sup = *(int*)args[1];
affinity_mat_t *aff_mat = (affinity_mat_t*)args[2];
tree_t *new_tab_node = (tree_t*)args[3];
tm_affinity_mat_t *aff_mat = (tm_affinity_mat_t*)args[2];
tm_tree_t *new_tab_node = (tm_tree_t*)args[3];
double *res=(double*)args[4];
int l;
if(nb_args != 6){
if(nb_args != 5){
if(verbose_level >= ERROR)
fprintf(stderr,"Wrong number of args in %s: %d\n",__func__, nb_args);
fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __FUNCTION__, nb_args);
exit(-1);
}
@ -524,7 +535,7 @@ void partial_update_val (int nb_args, void **args){
}
}
void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node,
int arity,int M)
{
bucket_list_t bucket_list;
@ -536,10 +547,12 @@ void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_n
int N = aff_mat->order;
double **mat = aff_mat->mat;
verbose_level = get_verbose_level();
verbose_level = tm_get_verbose_level();
if(verbose_level >= INFO )
printf("starting sort of N=%d elements\n",N);
TIC;
partial_sort(&bucket_list,mat,N);
duration = TOC;
@ -662,8 +675,8 @@ void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_n
printf("Bucket: %d, indice:%d\n",bucket_list->cur_bucket,bucket_list->bucket_indice);
printf("val=%f\n",val);
}
FREE_bucket_list(bucket_list);
free_bucket_list(bucket_list);
/* exit(-1); */
/* display_grouping(new_tab_node,M,arity,val); */
return val;
}

Просмотреть файл

@ -28,7 +28,8 @@ typedef struct{
typedef _bucket_list_t *bucket_list_t;
void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
int arity,int M);
int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups);
double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node,
int arity,int M);
int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups);
#endif

Просмотреть файл

@ -1,286 +0,0 @@
#include "opal/mca/hwloc/hwloc-internal.h"
#include "tm_tree.h"
#include "tm_mapping.h"
#include <ctype.h>
#include "tm_verbose.h"
double ** tm_topology_to_arch(tm_topology_t *topology,double *cost);
tm_topology_t * tgt_to_tm(char *filename,double **pcost);
int topo_nb_proc(hwloc_topology_t topology,int N);
double ** topology_to_arch(hwloc_topology_t topology);
int symetric(hwloc_topology_t topology);
tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
tm_topology_t* get_local_topo_with_hwloc(void);
/* transform a tgt scotch file into a topology file*/
tm_topology_t * tgt_to_tm(char *filename, double **pcost)
{
tm_topology_t *topology = NULL;
FILE *pf = NULL;
char line[1024];
char *s = NULL;
double *cost = NULL;
int i;
pf = fopen(filename,"r");
if(!pf){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot open %s\n",filename);
exit(-1);
}
if(get_verbose_level() >= INFO)
printf("Reading TGT file: %s\n",filename);
fgets(line,1024,pf);
s = strstr(line,"tleaf");
if(!s){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
exit(-1);
}
s += 5;
while(isspace(*s))
s++;
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
topology->nb_levels = atoi(strtok(s," "))+1;
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
topology->arity[i] = atoi(strtok(NULL," "));
cost[i] = atoi(strtok(NULL," "));
}
topology->arity[topology->nb_levels-1] = 0;
/* cost[topology->nb_levels-1]=0; */
/*aggregate costs*/
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
cost[i] += cost[i+1];
build_synthetic_proc_id(topology);
*pcost = cost;
fclose(pf);
/*
topology->arity[0]=nb_proc;
topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
printf("levels=%d\n",topology->nb_levels);
*/
if(get_verbose_level() >= INFO)
printf("Topology built from %s!\n",filename);
return topology;
}
int topo_nb_proc(hwloc_topology_t topology,int N)
{
hwloc_obj_t *objs = NULL;
int nb_proc;
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
FREE(objs);
return nb_proc;
}
double ** topology_to_arch(hwloc_topology_t topology)
{
int nb_proc,i,j;
hwloc_obj_t obj_proc1,obj_proc2,obj_res;
double **arch = NULL;
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
for( i = 0 ; i < nb_proc ; i++ ){
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
for( j = 0 ; j < nb_proc ; j++ ){
obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
/* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
arch[obj_proc1->os_index][obj_proc2->os_index]=speed(obj_res->depth+1);
}
}
return arch;
}
int symetric(hwloc_topology_t topology)
{
int depth,i,topodepth = hwloc_topology_get_depth(topology);
unsigned int arity;
hwloc_obj_t obj;
for ( depth = 0; depth < topodepth-1 ; depth++ ) {
int N = hwloc_get_nbobjs_by_depth(topology, depth);
obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
arity = obj->arity;
/* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
for (i = 1; i < N; i++ ){
obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
if( obj->arity != arity){
/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
return 0;
}
}
}
return 1;
}
tm_topology_t* hwloc_to_tm(char *filename,double **pcost)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
int nb_nodes,i;
double *cost;
int err;
/* Build the topology */
hwloc_topology_init(&topology);
err = hwloc_topology_set_xml(topology,filename);
if(err == -1){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
exit(-1);
}
#if HWLOC_API_VERSION < 0x20000
hwloc_topology_ignore_all_keep_structure(topology);
#else
#warning FIXME hwloc v2
#endif
hwloc_topology_load(topology);
/* Test if symetric */
if(!symetric(topology)){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"%s not symetric!\n",filename);
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
if(get_verbose_level() >= INFO)
printf("topodepth = %d\n",topodepth);
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
res->arity[depth] = objs[0]->arity;
if(get_verbose_level() >= INFO)
printf("%d(%d):",res->arity[depth],nb_nodes);
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
res->node_id[depth][i] = objs[i]->os_index;
/* if(depth==topodepth-1) */
}
FREE(objs);
}
cost = (double*)CALLOC(res->nb_levels,sizeof(double));
for(i=0; i<res->nb_levels; i++){
cost[i] = speed(i);
}
*pcost = cost;
/* Destroy topology object. */
hwloc_topology_destroy(topology);
if(get_verbose_level() >= INFO)
printf("\n");
return res;
}
tm_topology_t* get_local_topo_with_hwloc(void)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
int nb_nodes,i;
/* Build the topology */
hwloc_topology_init(&topology);
#if HWLOC_API_VERSION < 0x20000
hwloc_topology_ignore_all_keep_structure(topology);
#else
#warning FIXME hwloc v2
#endif
hwloc_topology_load(topology);
/* Test if symetric */
if(!symetric(topology)){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Local toplogy not symetric!\n");
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
res->arity[depth] = objs[0]->arity;
/* printf("%d:",res->arity[depth]); */
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
res->node_id[depth][i] = objs[i]->os_index;
/* if(depth==topodepth-1) */
}
FREE(objs);
}
/* Destroy HWLOC topology object. */
hwloc_topology_destroy(topology);
/* printf("\n"); */
return res;
}

Просмотреть файл

@ -1,7 +0,0 @@
#include "opal/mca/hwloc/hwloc-internal.h"
#include "tm_tree.h"
void hwloc_topology_tag(hwloc_topology_t topology);
tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
tm_topology_t * tgt_to_tm(char *filename,double **pcost);
tm_topology_t* get_local_topo_with_hwloc(void);

Просмотреть файл

@ -1,13 +1,12 @@
#include "tm_mapping.h"
#include "tm_mt.h"
#include "tm_kpartitioning.h"
#include "k-partitioning.h"
#include <stdlib.h>
#include <stdio.h>
#include "config.h"
#define USE_KL_KPART 0
#if USE_KL_KPART
#include "k-partitioning.h"
#endif /* USE_KL_KPART */
#define KL_KPART_GREEDY_TRIALS 0
static int verbose_level = ERROR;
@ -15,25 +14,23 @@ static int verbose_level = ERROR;
#define MAX_TRIALS 10
#define USE_KL_STRATEGY 1
#if !defined(MIN)
#define MIN(a,b) ((a)<(b)?(a):(b))
#endif
int fill_tab(int **,int *,int,int,int,int);
void complete_com_mat(double ***,int,int);
void complete_obj_weight(double **,int,int);
void allocate_vertex(int,int *,com_mat_t *,int,int *,int);
double eval_cost(int *, com_mat_t *);
int *kpartition_greedy(int, com_mat_t *,int,int *,int);
constraint_t *split_constraints (int *,int,int,tm_topology_t *,int);
constraint_t *split_constraints (int *,int,int,tm_topology_t *,int, int);
com_mat_t **split_com_mat(com_mat_t *,int,int,int *);
int **split_vertices(int *,int,int,int *);
void FREE_tab_com_mat(com_mat_t **,int);
void FREE_tab_local_vertices(int **,int);
void FREE_const_tab(constraint_t *,int);
void kpartition_build_level_topology(tree_t *,com_mat_t *,int,int,tm_topology_t *,
void free_tab_com_mat(com_mat_t **,int);
void free_tab_local_vertices(int **,int);
void free_const_tab(constraint_t *,int);
void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *,
int *,int *,int,double *,double *);
@ -51,10 +48,14 @@ void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int
best_part = res[i];
break;
}
}else{
for( i = 0 ; i < n ; i++){
if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
cost = (((i)<com_mat->n)) ?com_mat->comm[u][i]:0;
/* if((n<=16) && (u==8)){ */
/* printf("u=%d, i=%d: %f\n",u, i, cost); */
/* } */
if (( cost > best_cost)){
best_cost = cost;
best_part = res[i];
@ -62,8 +63,10 @@ void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int
}
}
}
/* printf("size[%d]: %d\n",best_part, size[best_part]);*/
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
/* if(n<=16){ */
/* printf("size[%d]: %d\n",best_part, size[best_part]); */
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
/* } */
res[u] = best_part;
size[best_part]++;
@ -84,25 +87,45 @@ double eval_cost(int *partition, com_mat_t *com_mat)
int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
{
int *res = NULL, *best_res=NULL, *size = NULL;
int *partition = NULL, *best_partition=NULL, *size = NULL;
int i,j,nb_trials;
int max_size, max_val;
double cost, best_cost = -1;
int start, end;
int dumb_id, nb_dumb;
int vl = tm_get_verbose_level();
if(nb_constraints > n){
if(vl >= ERROR){
fprintf(stderr,"Error more constraints (%d) than the problem size (%d)!\n",nb_constraints, n);
}
return NULL;
}
max_size = n/k;
if(vl >= DEBUG){
printf("max_size = %d (n=%d,k=%d)\ncom_mat->n-1=%d\n",max_size,n,k,com_mat->n-1);
printf("nb_constraints = %d\n",nb_constraints);
if(n<=16){
printf("Constraints: ");print_1D_tab(constraints,nb_constraints);
}
}
/* if(com_mat->n){ */
/* printf ("val [n-1][0]= %f\n",com_mat->comm[com_mat->n-1][0]); */
/* } */
for( nb_trials = 0 ; nb_trials < MAX_TRIALS ; nb_trials++ ){
res = (int *)MALLOC(sizeof(int)*n);
partition = (int *)MALLOC(sizeof(int)*n);
for ( i = 0 ; i < n ; i ++ )
res[i] = -1;
partition[i] = -1;
size = (int *)CALLOC(k,sizeof(int));
max_size = n/k;
/*printf("Constraints: ");print_1D_tab(constraints,nb_constraints);*/
/* put "dumb" vertices in the correct partition if there are any*/
if (nb_constraints){
@ -121,12 +144,13 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
number of leaves of the subtree (n/k) and the number of constraints
*/
nb_dumb = n/k - (end-start);
/*printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k);*/
/* if(n<=16){ */
/* printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k); */
/* } */
/* dumb vertices are the one with highest indices:
put them in the ith partitions*/
for( j = 0; j < nb_dumb; j ++ ){
res[dumb_id] = i;
partition[dumb_id] = i;
dumb_id--;
}
/* increase the size of the ith partition accordingly*/
@ -134,7 +158,10 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
start=end;
}
}
/*printf("After dumb vertices mapping: ");print_1D_tab(res,n);*/
/* if(n<=16){ */
/* printf("After dumb vertices mapping: ");print_1D_tab(partition,n); */
/* } */
/* choose k initial "true" vertices at random and put them in a different partition */
for ( i = 0 ; i < k ; i ++ ){
@ -145,35 +172,39 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
do{
/* call the mersenne twister PRNG of tm_mt.c*/
j = genrand_int32() % n;
} while ( res[j] != -1 );
} while ( partition[j] != -1 );
/* allocate and update size of partition*/
res[j] = i;
/* printf("random: %d -> %d\n",j,i); */
partition[j] = i;
/* if(n<=16){ */
/* printf("random: %d -> %d\n",j,i); */
/* } */
size[i]++;
}
/* allocate each unaloacted vertices in the partition that maximize the communication*/
for( i = 0 ; i < n ; i ++)
if( res[i] == -1)
allocate_vertex(i, res, com_mat, n, size, max_size);
if( partition[i] == -1)
allocate_vertex(i, partition, com_mat, n, size, max_size);
cost = eval_cost(res,com_mat);
/*print_1D_tab(res,n);
printf("cost=%.2f\n",cost);*/
cost = eval_cost(partition,com_mat);
/* if(n<=16){ */
/* print_1D_tab(partition,n); */
/* printf("cost=%.2f\n",cost); */
/* } */
if((cost<best_cost) || (best_cost == -1)){
best_cost=cost;
FREE(best_res);
best_res=res;
FREE(best_partition);
best_partition=partition;
}else
FREE(res);
FREE(partition);
FREE(size);
}
/*print_1D_tab(best_res,n);
/*print_1D_tab(best_partition,n);
printf("best_cost=%.2f\n",best_cost);
*/
return best_res;
return best_partition;
}
int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
@ -189,16 +220,24 @@ int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_const
/* if(USE_KL_KPART) */
/* res = kPartitioning(comm, n, k, constraints, nb_constraints, KL_KPART_GREEDY_TRIALS); */
/* else */
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
#if HAVE_LIBSCOTCH
printf("Using Scotch\n");
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
#else
printf("Using default\n");
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
#endif
return res;
}
constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth)
constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth, int N)
{
constraint_t *const_tab = NULL;
int nb_leaves, start, end;
int i;
int vl = tm_get_verbose_level();
const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
@ -211,11 +250,27 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
each sub-contraints 'i' contains constraints of value in [i*nb_leaves,(i+1)*nb_leaves[
*/
start = 0;
for( i = 0; i < k; i++ ){
/*returns the indice in contsraints that contains the smallest value not copied
/*returns the indice in constraints that contains the smallest value not copied
end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
end = fill_tab(&(const_tab[i].constraints), constraints, nb_constraints,start, (i+1) * nb_leaves, i * nb_leaves);
const_tab[i].length = end-start;
if(vl>=DEBUG){
printf("Step %d\n",i);
printf("\tConstraint: "); print_1D_tab(constraints, nb_constraints);
printf("\tSub constraint: "); print_1D_tab(const_tab[i].constraints, end-start);
}
if(end-start > N/k){
if(vl >= ERROR){
fprintf(stderr, "Error in spliting constraint at step %d. N=%d k= %d, length = %d\n", i, N, k, end-start);
}
FREE(const_tab);
return NULL;
}
const_tab[i].id = i;
start = end;
}
@ -224,6 +279,7 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
}
/* split the com_mat of order n in k partiton according to parmutition table*/
com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
{
com_mat_t **res = NULL, *sub_com_mat;
@ -237,6 +293,8 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
if(verbose_level >= DEBUG){
printf("Partition: "); print_1D_tab(partition,n);
display_tab(com_mat->comm,com_mat->n);
printf("m=%d,n=%d,k=%d\n",m,n,k);
printf("perm=%p\n",perm);
}
perm = (int*)MALLOC(sizeof(int)*m);
@ -244,10 +302,22 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
/* build perm such that submat[i][j] correspond to com_mat[perm[i]][perm[j]] according to the partition*/
s = 0;
for( j = 0; j < com_mat->n; j ++) /* check only non zero element of of com_mat*/
/* The partition is of size n. n can be larger than the communication matrix order
as only the input problem are in the communication matrix while n is of the size
of all the element (including the added one where it is possible to map computation) :
we can have more compute units than processes*/
for( j = 0; j < com_mat->n; j ++)
if ( partition[j] == cur_part )
perm[s++] = j;
if(s>m){
if(verbose_level >= CRITICAL){
fprintf(stderr,"Partition: "); print_1D_tab(partition,n);
display_tab(com_mat->comm,com_mat->n);
fprintf(stderr,"too many elements of the partition for the permuation (s=%d>%d=m). n=%d, k=%d, cur_part= %d\n",s,m,n,k, cur_part);
}
exit(-1);
}
/* s is now the size of the non zero sub matrix for this partition*/
/* built a sub-matrix for partition cur_part*/
sub_mat = (double **) MALLOC(sizeof(double *) * s);
@ -264,7 +334,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
}
}
sub_com_mat = (com_mat_t *)malloc(sizeof(com_mat_t));
sub_com_mat = (com_mat_t *)MALLOC(sizeof(com_mat_t));
sub_com_mat -> n = s;
sub_com_mat -> comm = sub_mat;
@ -275,7 +345,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
res[cur_part] = sub_com_mat;
}
FREE(perm);
FREE(perm);
return res;
}
@ -311,7 +381,7 @@ int **split_vertices( int *vertices, int n, int k, int *partition)
return res;
}
void FREE_tab_com_mat(com_mat_t **mat,int k)
void free_tab_com_mat(com_mat_t **mat,int k)
{
int i,j;
if( !mat )
@ -321,11 +391,13 @@ void FREE_tab_com_mat(com_mat_t **mat,int k)
for ( j = 0 ; j < mat[i]->n ; j ++)
FREE( mat[i]->comm[j] );
FREE( mat[i]->comm );
FREE(mat[i]);
}
FREE(mat);
}
void FREE_tab_local_vertices(int **mat, int k)
void free_tab_local_vertices(int **mat, int k)
{
int i; /* m=n/k; */
if( !mat )
@ -338,7 +410,7 @@ void FREE_tab_local_vertices(int **mat, int k)
}
void FREE_const_tab(constraint_t *const_tab, int k)
void free_const_tab(constraint_t *const_tab, int k)
{
int i;
@ -353,19 +425,33 @@ void FREE_const_tab(constraint_t *const_tab, int k)
FREE(const_tab);
}
void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
void check_com_mat(com_mat_t *com_mat){
int i,j;
for( i = 0 ; i < com_mat->n ; i++ )
for( j = 0 ; j < com_mat->n ; j++ )
if(com_mat->comm[i][j]<0){
printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]);
exit(-1);
}
}
void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
tm_topology_t *topology, int *local_vertices,
int *constraints, int nb_constraints,
double *obj_weight, double *comm_speed)
{
com_mat_t **tab_com_mat = NULL; /* table of comunication matrix. We will have k of such comunication matrix, one for each subtree */
int k = topology->arity[depth];
tree_t **tab_child = NULL;
tm_tree_t **tab_child = NULL;
int *partition = NULL;
int **tab_local_vertices = NULL;
constraint_t *const_tab = NULL;
int i;
verbose_level = get_verbose_level();
verbose_level = tm_get_verbose_level();
/* if we are at the bottom of the tree set cur_node
and return*/
@ -377,8 +463,14 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N
}
if(verbose_level >= DEBUG){
printf("Partitionning Matrix of size %d (problem size= %d) in %d partitions\n", com_mat->n, N, k);
}
/* check_com_mat(com_mat); */
/* partition the com_matrix in k partitions*/
partition = kpartition(topology->arity[depth], com_mat, N, constraints, nb_constraints);
partition = kpartition(k, com_mat, N, constraints, nb_constraints);
/* split the communication matrix in k parts according to the partition just found above */
tab_com_mat = split_com_mat( com_mat, N, k, partition);
@ -387,12 +479,12 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N
tab_local_vertices = split_vertices( local_vertices, N, k, partition);
/* construct a tab of constraints of size k: one for each partitions*/
const_tab = split_constraints (constraints, nb_constraints, k, topology, depth);
const_tab = split_constraints (constraints, nb_constraints, k, topology, depth, N);
/* create the table of k nodes of the resulting sub-tree */
tab_child = (tree_t **) CALLOC (k,sizeof(tree_t*));
tab_child = (tm_tree_t **) CALLOC (k,sizeof(tm_tree_t*));
for( i = 0 ; i < k ; i++){
tab_child[i] = (tree_t *) MALLOC(sizeof(tree_t));
tab_child[i] = (tm_tree_t *) MALLOC(sizeof(tm_tree_t));
}
/* for each child, proceeed recursively*/
@ -408,28 +500,30 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N
/* link the node with its child */
set_node( cur_node, tab_child, k, NULL, cur_node->id, 0, NULL, depth);
/* FREE local data*/
/* free local data*/
FREE(partition);
FREE_tab_com_mat(tab_com_mat,k);
FREE_tab_local_vertices(tab_local_vertices,k);
FREE_const_tab(const_tab,k);
free_tab_com_mat(tab_com_mat,k);
free_tab_local_vertices(tab_local_vertices,k);
free_const_tab(const_tab,k);
}
tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
{
int depth,i, K;
tree_t *root = NULL;
tm_tree_t *root = NULL;
int *local_vertices = NULL;
int nb_cores;
com_mat_t com_mat;
verbose_level = get_verbose_level();
verbose_level = tm_get_verbose_level();
nb_cores=nb_processing_units(topology)*topology->oversub_fact;
if(verbose_level>=INFO)
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
nb_cores=nb_processing_units(topology);
printf("Number of constraints: %d, N=%d, nb_cores = %d, K=%d\n", nb_constraints, N, nb_cores, nb_cores-N);
if((constraints == NULL) && (nb_constraints != 0)){
if(verbose_level>=ERROR)
@ -449,7 +543,6 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
if((K=nb_cores - N)>0){
/* add K element to the object weight*/
complete_obj_weight(&obj_weight,N,K);
/* display_tab(tab,N+K);*/
} else if( K < 0){
if(verbose_level>=ERROR)
fprintf(stderr,"Not enough cores!\n");
@ -463,7 +556,7 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
local_vertices is the array of vertices that can be used
the min(N,nb_contraints) 1st element are number from 0 to N
the last ones have value -1
the value of this array will be used to number the leaves of the tree_t tree
the value of this array will be used to number the leaves of the tm_tree_t tree
that start at "root"
min(N,nb_contraints) is used to takle the case where thre is less processes than constraints
@ -479,18 +572,20 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
/* we assume all objects have the same arity*/
/* assign the root of the tree*/
root = (tree_t*) MALLOC (sizeof(tree_t));
root->id = 0;
root = (tm_tree_t*) MALLOC (sizeof(tm_tree_t));
root -> id = 0;
/*build the tree downward from the root*/
kpartition_build_level_topology(root, &com_mat, N+K, depth, topology, local_vertices,
constraints, nb_constraints, obj_weight, com_speed);
constraints, nb_constraints, obj_weight, com_speed);
/*print_1D_tab(local_vertices,K+N);*/
if(verbose_level>=INFO)
printf("Build (bottom-up) tree done!\n");
FREE(local_vertices);

Просмотреть файл

@ -1,9 +1,9 @@
typedef struct _com_mat_t{
double **comm;
double **comm;
int n; /*comm is of size n by n the other element are zeroes*/
} com_mat_t;
int *kpartition(int, com_mat_t*, int, int *, int);
tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);
tm_tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);

Просмотреть файл

@ -1,35 +1,60 @@
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <stdint.h>
#include "uthash.h"
#include <stdio.h>
#include "tm_verbose.h"
#include "tm_malloc.h"
#include "opal/util/alfg.h"
#include "tm_tree.h"
#include "tm_mt.h"
#define MIN(a,b) ((a)<(b)?(a):(b))
#define EXTRA_BYTE 100
typedef signed char byte;
typedef uint8_t byte;
/* static int verbose_level = ERROR;*/
typedef struct _hash_t {
void *key; /* we'll use this field as the key */
size_t size;
UT_hash_handle hh; /* makes this structure hashable */
void *key; /* we'll use this field as the key */
size_t size;
char *file;
int line;
UT_hash_handle hh; /* makes this structure hashable */
}hash_t;
static hash_t *size_hash = NULL;
static char extra_data[EXTRA_BYTE];
static void save_size(void *ptr, size_t size);
static void save_ptr(void *ptr, size_t size, char *file, int line);
static size_t retreive_size(void *someaddr);
static void init_extra_data(void);
void save_size(void *ptr, size_t size) {
char *my_strdup(char* string){
int size = 1+strlen(string);
char *res = (char*)malloc(size*sizeof(char));
if(res)
memcpy(res, string, size*sizeof(char));
return res;
}
void save_ptr(void *ptr, size_t size, char *file, int line) {
hash_t *elem;
elem = (hash_t*) malloc(sizeof(hash_t));
elem -> key = ptr;
elem -> key = ptr;
elem -> size = size;
if(get_verbose_level() >= DEBUG)
elem -> line = line;
elem -> file = my_strdup(file);
if(tm_get_verbose_level() >= DEBUG)
printf("Storing (%p,%ld)\n",ptr,size);
HASH_ADD_PTR( size_hash, key, elem );
}
@ -40,72 +65,76 @@ size_t retreive_size(void *someaddr){
hash_t *elem = NULL;
HASH_FIND_PTR(size_hash, &someaddr, elem);
if(!elem){
fprintf(stderr,"cannot find ptr %p to free!\n",someaddr);
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot find ptr %p to free!\n",someaddr);
abort();
return 0;
}
res = elem->size;
if(get_verbose_level()>=DEBUG)
if(tm_get_verbose_level()>=DEBUG)
printf("Retreiving (%p,%ld)\n",someaddr, res);
free(elem->file);
HASH_DEL( size_hash, elem);
return res;
}
void my_mem_check(void){
void tm_mem_check(void){
#ifdef __DEBUG_TM_MALLOC__
hash_t *s;
int nb_errors = 0;
for(s=size_hash; s != NULL; s=s->hh.next) {
if(get_verbose_level() >= ERROR) {
printf("pointer %p of size %ld has not been freed!\n", s->key, s->size);
}
nb_errors ++;
if(tm_get_verbose_level()>=ERROR)
printf("pointer %p of size %ld (%s: %d) has not been freed!\n", s->key, s->size, s->file, s->line);
nb_errors ++;
}
if(get_verbose_level() >= INFO)
if(tm_get_verbose_level() >= INFO)
printf ("Number of errors in managing memory: %d\n",nb_errors);
#endif
}
void init_extra_data(void){
static int done = 0;
opal_rng_buff_t rng;
int i;
if(done)
return;
opal_srand(&rng,0);
init_genrand(0);
for( i = 0 ; i < EXTRA_BYTE; i++)
extra_data[i] = (char) opal_rand(&rng) % 256;
extra_data[i] = (char) genrand_int32() % 256;
done = 1;
}
void *my_malloc(size_t size, char *file, int line){
void *tm_malloc(size_t size, char *file, int line){
byte *ptr;
init_extra_data();
size+=2*EXTRA_BYTE;
ptr = malloc(size);
if(get_verbose_level()>=DEBUG)
printf("my_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,(void*)ptr,file,line);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,ptr,file,line);
save_size(ptr,size);
save_ptr(ptr, size, file, line);
memcpy(ptr, extra_data, EXTRA_BYTE);
memcpy(ptr + size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
if(get_verbose_level()>=DEBUG)
printf("my_malloc returning: %p\n",(void*)(ptr+EXTRA_BYTE));
if(tm_get_verbose_level()>=DEBUG)
printf("tm_malloc returning: %p\n",ptr+EXTRA_BYTE);
return (void *)(ptr + EXTRA_BYTE);
}
void *my_calloc(size_t count, size_t size, char *file, int line){
void *tm_calloc(size_t count, size_t size, char *file, int line){
byte *ptr;
size_t full_size;
@ -115,22 +144,72 @@ void *my_calloc(size_t count, size_t size, char *file, int line){
ptr = malloc(full_size);
bzero(ptr,full_size);
save_size(ptr, full_size);
save_ptr(ptr, full_size, file, line);
if(get_verbose_level()>=DEBUG)
printf("my_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void*)ptr, file, line);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line);
memcpy(ptr, extra_data, EXTRA_BYTE);
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
if(get_verbose_level()>=DEBUG)
printf("my_calloc returning: %p\n",(void*)(ptr+EXTRA_BYTE));
if(tm_get_verbose_level()>=DEBUG)
printf("tm_calloc returning: %p\n",ptr+EXTRA_BYTE);
return (void *)(ptr+EXTRA_BYTE);
}
void my_free(void *ptr){
void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
byte *ptr;
size_t full_size;
init_extra_data();
full_size = size + 2 * EXTRA_BYTE;
ptr = malloc(full_size);
save_ptr(ptr, full_size, file, line);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line);
memcpy(ptr, extra_data, EXTRA_BYTE);
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
if(old_ptr){
byte *original_ptr = ((byte *)old_ptr) - EXTRA_BYTE;
size_t old_ptr_size = retreive_size(original_ptr);
memcpy(ptr + EXTRA_BYTE, old_ptr, MIN(old_ptr_size - 2 * EXTRA_BYTE, size));
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if((bcmp(original_ptr + old_ptr_size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if(tm_get_verbose_level()>=DEBUG)
printf("tm_free freeing: %p\n",original_ptr);
free(original_ptr);
}
if(tm_get_verbose_level()>=DEBUG)
printf("tm_realloc returning: %p (----- %p)\n",ptr+EXTRA_BYTE, ((byte *)ptr) - EXTRA_BYTE);
return (void *)(ptr+EXTRA_BYTE);
}
void tm_free(void *ptr){
byte *original_ptr = ((byte *)ptr) - EXTRA_BYTE;
size_t size;
@ -139,18 +218,18 @@ void my_free(void *ptr){
size = retreive_size(original_ptr);
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
fprintf(stderr,"cannot find special string ***before*** %p!\n",ptr);
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
fprintf(stderr,"cannot find special string ***after*** %p!\n",ptr);
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if(get_verbose_level()>=DEBUG)
printf("my_free freeing: %p\n",(void*)original_ptr);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_free freeing: %p\n",original_ptr);
free(original_ptr);

Просмотреть файл

@ -1,5 +1,29 @@
#ifndef _TM_MALLOC_H_
#define _TM_MALLOC_H_
#include <stdlib.h>
void *my_malloc(size_t size, char *, int);
void *my_calloc(size_t count, size_t size, char *, int);
void my_free(void *ptr);
void my_mem_check(void);
void *tm_malloc(size_t size, char *, int);
void *tm_calloc(size_t count, size_t size, char *, int);
void *tm_realloc(void *ptr, size_t size, char *, int);
void tm_free(void *ptr);
void tm_mem_check(void);
/* for debugging malloc */
/* #define __DEBUG_TM_MALLOC__ */
#undef __DEBUG_TM_MALLOC__
#ifdef __DEBUG_TM_MALLOC__
#define MALLOC(x) tm_malloc(x,__FILE__,__LINE__)
#define CALLOC(x,y) tm_calloc(x,y,__FILE__,__LINE__)
#define REALLOC(x,y) tm_realloc(x,y,__FILE__,__LINE__)
#define FREE tm_free
#define MEM_CHECK tm_mem_check
#else
#define MALLOC malloc
#define CALLOC calloc
#define FREE free
#define REALLOC realloc
#define MEM_CHECK tm_mem_check
#endif
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,43 +1,34 @@
#ifndef __TM_MAPPING_H__
#define __TM_MAPPING_H__
#include "tm_tree.h"
#include "tm_hwloc.h"
#include "tm_topology.h"
#include "tm_timings.h"
#include "tm_verbose.h"
int build_comm(char *filename,double ***pcomm);
void TreeMatchMapping(int nb_obj, int nb_proc,double **comm_mat, double * obj_weigth, double *com_speed, int d, int *sol);
/*Map topology to cores:
sigma_i is such that process i is mapped on core sigma_i
k_i is such that core i exectutes process k_i
size of sigma is the number of process (nb_objs)
size of k is the number of cores/nodes (nb_proc)
We must have numbe of process<=number of cores
k_i =-1 if no process is mapped on core i
*/
void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma, int nb_processes, int *k);
int nb_processing_units(tm_topology_t *topology);
void free_topology(tm_topology_t *topology);
void display_other_heuristics(tm_topology_t *topology,int N,double **comm,int TGT_flag, int *constraints, double *cost);
void print_1D_tab(int *tab,int N);
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order);
void build_synthetic_proc_id(tm_topology_t *topology);
void display_topology(tm_topology_t *topology);
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_node);
tm_topology_t *optimize_topology(tm_topology_t *topology);
double print_sol_inv(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
double print_sol(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
int build_binding_constraints(char *filename, int **ptab);
void canonize_constraints(tm_topology_t *topology, int *constraints, int **canonical, int n, int **perm, int *m);
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes);
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
void FREE_topology(tm_topology_t *);
void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i);
int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift);
void init_mat(char *filename,int N, double **mat, double *sum_row);
void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
int *sigma, int nb_processes, int **k, int nb_compute_units);
int nb_leaves(tm_tree_t *comm_tree);
int nb_lines(char *filename);
int nb_processing_units(tm_topology_t *topology);
void print_1D_tab(int *tab,int N);
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
void tm_finalize();
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
void update_comm_speed(double **comm_speed,int old_size,int new_size);
/* use to split a constaint into subconstraint according the tree*/
typedef struct _constraint{
typedef struct{
int *constraints; /* the subconstraints*/
int length; /*length of *constraints*/
int id; /* id of the corresponding subtree*/
}constraint_t;
#endif

Просмотреть файл

@ -2,8 +2,7 @@ void init_genrand(unsigned long s);
void init_by_array(unsigned long init_key[], int key_length);
/* generates a random number on the interval [0,0x7fffffff] */
unsigned long genrand_int32(void);
unsigned long genrand_int32(void);
long genrand_int31(void);
double genrand_real1(void);
double genrand_real2(void);

Просмотреть файл

@ -0,0 +1,525 @@
#include <ctype.h>
#include <float.h>
#include "tm_solution.h"
#include "tm_mt.h"
#include "tm_mapping.h"
typedef struct {
int val;
long key;
} hash_t;
void tm_free_solution(tm_solution_t *sol);
int distance(tm_topology_t *topology,int i, int j);
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma);
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric);
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
tm_metric_t metric);
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
int in_tab(int *tab, int n, int val);
void map_Packed(tm_topology_t *topology, int N, int *sigma);
void map_RR(tm_topology_t * topology, int N, int *sigma);
int hash_asc(const void* x1,const void* x2);
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed);
double eval_sol(int *sol,int N,double **comm, double **arch);
void exchange(int *sol,int i,int j);
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
void select_max(int *l,int *m,double **gain,int N,int *state);
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch);
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch);
void tm_free_solution(tm_solution_t *sol){
int i,n;
n = sol->k_length;
if(sol->k)
for(i=0 ; i<n ; i++)
FREE(sol->k[i]);
FREE(sol->k);
FREE(sol->sigma);
FREE(sol);
}
/*
Compute the distance in the tree
between node i and j : the farther away node i and j, the
larger the returned value.
The algorithm looks at the largest level, starting from the top,
for which node i and j are still in the same subtree. This is done
by iteratively dividing their numbering by the arity of the levels
*/
int distance(tm_topology_t *topology,int i, int j)
{
int level = 0;
int arity;
int f_i, f_j ;
int vl = tm_get_verbose_level();
int depth = topology->nb_levels-1;
f_i = topology->node_rank[depth][i];
f_j = topology->node_rank[depth][j];
if(vl >= DEBUG)
printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j);
do{
level++;
arity = topology->arity[level];
if( arity == 0 )
arity = 1;
f_i = f_i/arity;
f_j = f_j/arity;
} while((f_i!=f_j) && (level < depth));
if(vl >= DEBUG)
printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level);
/* exit(-1); */
return level;
}
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
{
double a,c,sol;
int i,j;
double *cost = topology->cost;
double **mat = aff_mat->mat;
int N = aff_mat->order;
int depth = topology->nb_levels - 1;
sol = 0;
for ( i = 0 ; i < N ; i++ )
for ( j = i+1 ; j < N ; j++){
c = mat[i][j];
/*
Compute cost in funvtion of the inverse of the distance
This is due to the fact that the cost matrix is numbered
from top to bottom : cost[0] is the cost of the longest distance.
*/
a = cost[depth-distance(topology,sigma[i],sigma[j])];
if(tm_get_verbose_level() >= DEBUG)
printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
sol += c*a;
}
for (i = 0; i < N; i++) {
printf("%d", sigma[i]);
if(i<N-1)
printf(",");
}
printf(" : %g\n",sol);
return sol;
}
double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
{
double a,c,sol;
int i,j;
double *cost = topology->cost;
double **mat = aff_mat->mat;
int N = aff_mat->order;
int vl = tm_get_verbose_level();
int depth = topology->nb_levels - 1;
sol = 0;
for ( i = 0 ; i < N ; i++ )
for ( j = i+1 ; j < N ; j++){
c = mat[i][j];
/*
Compute cost in funvtion of the inverse of the distance
This is due to the fact that the cost matrix is numbered
from top to bottom : cost[0] is the cost of the longest distance.
*/
a = cost[depth-distance(topology,sigma[i],sigma[j])];
if(vl >= DEBUG)
printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
if(c*a > sol)
sol = c*a;
}
for (i = 0; i < N; i++) {
printf("%d", sigma[i]);
if(i<N-1)
printf(",");
}
printf(" : %g\n",sol);
return sol;
}
double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
{
double c,sol;
int nb_hops;
int i,j;
double **mat = aff_mat->mat;
int N = aff_mat->order;
sol = 0;
for ( i = 0 ; i < N ; i++ )
for ( j = i+1 ; j < N ; j++){
c = mat[i][j];
nb_hops = 2*distance(topology,sigma[i],sigma[j]);
if(tm_get_verbose_level() >= DEBUG)
printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops);
sol += c*nb_hops;
}
for (i = 0; i < N; i++) {
printf("%d", sigma[i]);
if(i<N-1)
printf(",");
}
printf(" : %g\n",sol);
return sol;
}
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric){
switch (metric){
case TM_METRIC_SUM_COM:
return display_sol_sum_com(topology, aff_mat, sigma);
case TM_METRIC_MAX_COM:
return display_sol_max_com(topology, aff_mat, sigma);
case TM_METRIC_HOP_BYTE:
return display_sol_hop_byte(topology, aff_mat, sigma);
default:
if(tm_get_verbose_level() >= ERROR){
fprintf(stderr,"Error printing solution: metric %d not implemented\n",metric);
return -1;
}
}
return -1;
}
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
tm_metric_t metric){
int i,j;
int **k = sol->k;
if(tm_get_verbose_level() >= DEBUG){
printf("k: \n");
for( i = 0 ; i < nb_processing_units(topology) ; i++ ){
if(k[i][0] != -1){
printf("\tProcessing unit %d: ",i);
for (j = 0 ; j<topology->oversub_fact; j++){
if( k[i][j] == -1)
break;
printf("%d ",k[i][j]);
}
printf("\n");
}
}
}
return display_sol(topology, aff_mat, sol->sigma, metric);
}
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric)
{
int *sigma = NULL;
int N = aff_mat->order;
sigma = (int*)MALLOC(sizeof(int)*N);
map_Packed(topology, N, sigma);
printf("Packed: ");
display_sol(topology, aff_mat, sigma, metric);
map_RR(topology, N, sigma);
printf("RR: ");
display_sol(topology, aff_mat, sigma, metric);
/* double duration; */
/* CLOCK_T time1,time0; */
/* CLOCK(time0); */
/* map_MPIPP(topology,1,N,sigma,comm,arch); */
/* CLOCK(time1); */
/* duration=CLOCK_DIFF(time1,time0); */
/* printf("MPIPP-1-D:%f\n",duration); */
/* printf("MPIPP-1: "); */
/* if (TGT_flag == 1) */
/* print_sigma_inv(N,sigma,comm,arch); */
/* else */
/* print_sigma(N,sigma,comm,arch); */
/* CLOCK(time0); */
/* map_MPIPP(topology,5,N,sigma,comm,arch); */
/* CLOCK(time1); */
/* duration=CLOCK_DIFF(time1,time0); */
/* printf("MPIPP-5-D:%f\n",duration); */
/* printf("MPIPP-5: "); */
/* if (TGT_flag == 1) */
/* print_sigma_inv(N,sigma,comm,arch); */
/* else */
/* print_sigma(N,sigma,comm,arch); */
FREE(sigma);
}
int in_tab(int *tab, int n, int val){
int i;
for( i = 0; i < n ; i++)
if(tab[i] == val)
return 1;
return 0;
}
void map_Packed(tm_topology_t *topology, int N, int *sigma)
{
size_t i;
int j = 0,depth;
int vl = tm_get_verbose_level();
depth = topology->nb_levels-1;
for( i = 0 ; i < topology->nb_nodes[depth] ; i++){
/* printf ("%d -> %d\n",objs[i]->os_index,i); */
if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){
if(vl >= DEBUG)
printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]);
sigma[j++]=topology->node_id[depth][i];
if(j == N)
break;
}
}
}
void map_RR(tm_topology_t *topology, int N,int *sigma)
{
int i;
int vl = tm_get_verbose_level();
for( i = 0 ; i < N ; i++ ){
if(topology->constraints)
sigma[i]=topology->constraints[i%topology->nb_constraints];
else
sigma[i]=i%topology->nb_proc_units;
if(vl >= DEBUG)
printf ("%d -> %d (%d)\n",i,sigma[i],topology->nb_proc_units);
}
}
int hash_asc(const void* x1,const void* x2)
{
hash_t *e1 = NULL,*e2 = NULL;
e1 = ((hash_t*)x1);
e2 = ((hash_t*)x2);
return (e1->key < e2->key) ? -1 : 1;
}
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed)
{
hash_t *hash_tab = NULL;
int *sol = NULL;
int *nodes_id= NULL;
int i;
nodes_id = topology->node_id[level];
hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N);
sol = (int*)MALLOC(sizeof(int)*N);
init_genrand(seed);
for( i = 0 ; i < N ; i++ ){
hash_tab[i].val = nodes_id[i];
hash_tab[i].key = genrand_int32();
}
qsort(hash_tab,N,sizeof(hash_t),hash_asc);
for( i = 0 ; i < N ; i++ )
sol[i] = hash_tab[i].val;
FREE(hash_tab);
return sol;
}
double eval_sol(int *sol,int N,double **comm, double **arch)
{
double a,c,res;
int i,j;
res = 0;
for ( i = 0 ; i < N ; i++ )
for ( j = i+1 ; j < N ; j++ ){
c = comm[i][j];
a = arch[sol[i]][sol[j]];
res += c/a;
}
return res;
}
void exchange(int *sol,int i,int j)
{
int tmp;
tmp = sol[i];
sol[i] = sol[j];
sol[j] = tmp;
}
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch)
{
double eval2;
if( l == m )
return 0;
exchange(sol,l,m);
eval2 = eval_sol(sol,N,comm,arch);
exchange(sol,l,m);
return eval1-eval2;
}
void select_max(int *l,int *m,double **gain,int N,int *state)
{
double max;
int i,j;
max = -DBL_MAX;
for( i = 0 ; i < N ; i++ )
if(!state[i])
for( j = 0 ; j < N ; j++ )
if( (i != j) && (!state[j]) ){
if(gain[i][j] > max){
*l = i;
*m = j;
max=gain[i][j];
}
}
}
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch)
{
double eval1;
int i,j;
eval1 = eval_sol(sol,N,comm,arch);
for( i = 0 ; i < N ; i++ )
for( j = 0 ; j <= i ; j++)
gain[i][j] = gain[j][i] = gain_exchange(sol,i,j,eval1,N,comm,arch);
}
/* Randomized Algorithm of
Hu Chen, Wenguang Chen, Jian Huang ,Bob Robert,and H.Kuhn. Mpipp: an automatic profile-guided
parallel process placement toolset for smp clusters and multiclusters. In
Gregory K. Egan and Yoichi Muraoka, editors, ICS, pages 353-360. ACM, 2006.
*/
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch)
{
int *sol = NULL;
int *state = NULL;
double **gain = NULL;
int **history = NULL;
double *temp = NULL;
int i,j,t,l=0,m=0,seed=0;
double max,sum,best_eval,eval;
gain = (double**)MALLOC(sizeof(double*)*N);
history = (int**)MALLOC(sizeof(int*)*N);
for( i = 0 ; i < N ; i++){
gain[i] = (double*)MALLOC(sizeof(double)*N);
history[i] = (int*)MALLOC(sizeof(int)*3);
}
state = (int*)MALLOC(sizeof(int)*N);
temp = (double*)MALLOC(sizeof(double)*N);
sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++);
for( i = 0 ; i < N ; i++)
sigma[i] = sol[i];
best_eval = DBL_MAX;
while(seed <= nb_seed){
do{
for( i = 0 ; i < N ; i++ ){
state[i] = 0;
/* printf("%d ",sol[i]); */
}
/* printf("\n"); */
compute_gain(sol,N,gain,comm,arch);
/*
display_tab(gain,N);
exit(-1);
*/
for( i = 0 ; i < N/2 ; i++ ){
select_max(&l,&m,gain,N,state);
/* printf("%d: %d <=> %d : %f\n",i,l,m,gain[l][m]); */
state[l] = 1;
state[m] = 1;
exchange(sol,l,m);
history[i][1] = l;
history[i][2] = m;
temp[i] = gain[l][m];
compute_gain(sol,N,gain,comm,arch);
}
t = -1;
max = 0;
sum = 0;
for(i = 0 ; i < N/2 ; i++ ){
sum += temp[i];
if( sum > max ){
max = sum;
t = i;
}
}
/*for(j=0;j<=t;j++)
printf("exchanging: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */
for( j = t+1 ; j < N/2 ; j++ ){
exchange(sol,history[j][1],history[j][2]);
/* printf("Undoing: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */
}
/* printf("max=%f\n",max); */
/*for(i=0;i<N;i++){
printf("%d ",sol[i]);
}
printf("\n");*/
eval = eval_sol(sol,N,comm,arch);
if(eval < best_eval){
best_eval = eval;
for(i = 0 ; i < N ; i++)
sigma[i] = sol[i];
/* print_sol(N); */
}
}while( max > 0 );
FREE(sol);
sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
}
FREE(sol);
FREE(temp);
FREE(state);
for( i = 0 ; i < N ; i++){
FREE(gain[i]);
FREE(history[i]);
}
FREE(gain);
FREE(history);
}

Просмотреть файл

@ -0,0 +1,26 @@
#ifndef TM_SOLUION_H
#define TM_SOLUION_H
#include "treematch.h"
void tm_free_solution(tm_solution_t *sol);
int distance(tm_topology_t *topology,int i, int j);
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma);
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric);
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
tm_metric_t metric);
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
int in_tab(int *tab, int n, int val);
void map_Packed(tm_topology_t *topology, int N, int *sigma);
void map_RR(tm_topology_t *topology, int N, int *sigma);
int hash_asc(const void* x1,const void* x2);
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed);
double eval_sol(int *sol,int N,double **comm, double **arch);
void exchange(int *sol,int i,int j);
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
void select_max(int *l,int *m,double **gain,int N,int *state);
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch);
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch);
#endif

Просмотреть файл

@ -1,13 +1,18 @@
#include <pthread.h>
#include "tm_thread_pool.h"
#include "tm_verbose.h"
#include "opal/mca/hwloc/hwloc-internal.h"
#include <hwloc.h>
#include "tm_verbose.h"
#include "tm_tree.h"
#include <errno.h>
#include <limits.h>
typedef enum _mapping_policy {COMPACT, SCATTER} mapping_policy_t;
static mapping_policy_t mapping_policy = COMPACT;
static int verbose_level = ERROR;
static thread_pool_t *pool = NULL;
static unsigned int max_nb_threads = INT_MAX;
static thread_pool_t *get_thread_pool(void);
static void execute_work(work_t *work);
@ -16,39 +21,21 @@ static void *thread_loop(void *arg);
static void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work);
static thread_pool_t *create_threads(void);
static void f1 (int nb_args, void **args);
static void f2 (int nb_args, void **args);
static void f1 (int nb_args, void **args, int thread_id);
static void f2 (int nb_args, void **args, int thread_id);
static void destroy_work(work_t *work);
#define MIN(a, b) ((a)<(b)?(a):(b))
#define MAX(a, b) ((a)>(b)?(a):(b))
void f1 (int nb_args, void **args){
int a, b;
a = *(int*)args[0];
b = *(int*)args[1];
printf("nb_args=%d, a=%d, b=%d\n",nb_args,a,b);
void tm_set_max_nb_threads(unsigned int val){
max_nb_threads = val;
}
void f2 (int nb_args, void **args){
int n, *tab;
int *res;
int i,j;
n = *(int*)args[0];
tab = (int*)args[1];
res=(int*)args[2];
for(j=0;j<1000000;j++){
*res=0;
for (i=0;i<n;i++)
*res+=tab[i];
}
printf("done: %d!\n",nb_args);
}
void execute_work(work_t *work){
work->task(work->nb_args, work->args);
work->task(work->nb_args, work->args, work->thread_id);
}
int bind_myself_to_core(hwloc_topology_t topology, int id){
@ -57,10 +44,29 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
char *str;
int binding_res;
int depth = hwloc_topology_get_depth(topology);
int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
int my_core;
int nb_threads = get_nb_threads();
/* printf("depth=%d\n",depth); */
switch (mapping_policy){
case SCATTER:
my_core = id*(nb_cores/nb_threads);
break;
default:
if(verbose_level>=WARNING){
printf("Wrong scheduling policy. Using COMPACT\n");
}
case COMPACT:
my_core = id%nb_cores;
}
if(verbose_level>=INFO){
printf("Mapping thread %d on core %d\n",id,my_core);
}
/* Get my core. */
obj = hwloc_get_obj_by_depth(topology, depth-1, id);
obj = hwloc_get_obj_by_depth(topology, depth-1, my_core);
if (obj) {
/* Get a copy of its cpuset that we may modify. */
cpuset = hwloc_bitmap_dup(obj->cpuset);
@ -71,7 +77,7 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
/*hwloc_bitmap_asprintf(&str, cpuset);
printf("Binding thread %d to cpuset %s\n", id,str);
printf("Binding thread %d to cpuset %s\n", my_core,str);
FREE(str);
*/
@ -81,8 +87,8 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
int error = errno;
hwloc_bitmap_asprintf(&str, obj->cpuset);
if(verbose_level>=WARNING)
fprintf(stderr,"%d Couldn't bind to cpuset %s: %s\n", id, str, strerror(error));
FREE(str);
printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error));
free(str); /* str is allocated by hlwoc, free it normally*/
return 0;
}
/* FREE our cpuset copy */
@ -90,7 +96,7 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
return 1;
}else{
if(verbose_level>=WARNING)
fprintf(stderr,"No valid object for core id %d!\n",id);
printf("No valid object for core id %d!\n",my_core);
return 0;
}
}
@ -161,6 +167,7 @@ void wait_work_completion(work_t *work){
int submit_work(work_t *work, int thread_id){
if( (thread_id>=0) && (thread_id< pool->nb_threads)){
work->thread_id = thread_id;
add_work(&pool->list_lock[thread_id], &pool->cond_var[thread_id], &pool->working_list[thread_id], work);
return 1;
}
@ -171,11 +178,11 @@ thread_pool_t *create_threads(){
hwloc_topology_t topology;
int i;
local_thread_t *local;
int nb_cores;
int nb_threads;
unsigned int nb_cores;
int depth;
verbose_level = get_verbose_level();
verbose_level = tm_get_verbose_level();
/*Get number of cores: set 1 thread per core*/
/* Allocate and initialize topology object. */
@ -187,7 +194,7 @@ thread_pool_t *create_threads(){
depth = hwloc_topology_get_depth(topology);
if (depth == -1 ) {
if(verbose_level>=CRITICAL)
fprintf(stderr,"Error: topology with unknown depth\n");
fprintf(stderr,"Error: HWLOC unable to find the depth of the topology of this node!\n");
exit(-1);
}
@ -195,19 +202,23 @@ thread_pool_t *create_threads(){
/* at depth 'depth' it is necessary a PU/core where we can execute things*/
nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
nb_threads = MIN(nb_cores, max_nb_threads);
if(verbose_level>=INFO)
printf("nb_threads = %d\n",nb_threads);
pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t));
pool -> topology = topology;
pool -> nb_threads = nb_cores;
pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_cores);
pool -> working_list = (work_t*)CALLOC(nb_cores,sizeof(work_t));
pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_cores);
pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_cores);
pool -> nb_threads = nb_threads;
pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_threads);
pool -> working_list = (work_t*)CALLOC(nb_threads,sizeof(work_t));
pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_threads);
pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_threads);
local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_cores);
local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_threads);
pool->local = local;
for (i=0;i<nb_cores;i++){
for (i=0;i<nb_threads;i++){
local[i].topology = topology;
local[i].id = i;
local[i].working_list = &pool->working_list[i];
@ -245,11 +256,12 @@ void terminate_thread_pool(){
for (id=0;id<pool->nb_threads;id++){
pthread_join(pool->thread_list[id],(void **) &ret);
FREE(ret);
pthread_cond_destroy(pool->cond_var +id);
pthread_mutex_destroy(pool->list_lock +id);
if (pool->working_list[id].next != NULL)
if(verbose_level >= WARNING)
fprintf(stderr,"Working list of thread %d not empty!\n",id);
printf("Working list of thread %d not empty!\n",id);
}
hwloc_topology_destroy(pool->topology);
@ -272,7 +284,7 @@ int get_nb_threads(){
}
work_t *create_work(int nb_args, void **args, void (*task) (int, void **)){
work_t *create_work(int nb_args, void **args, void (*task) (int, void **, int)){
work_t *work;
work = MALLOC(sizeof(work_t));
work -> nb_args = nb_args;
@ -293,6 +305,34 @@ void destroy_work(work_t *work){
FREE(work);
}
/* CODE example 2 functions and test driver*/
void f1 (int nb_args, void **args, int thread_id){
int a, b;
a = *(int*)args[0];
b = *(int*)args[1];
printf("id: %d, nb_args=%d, a=%d, b=%d\n",thread_id, nb_args,a,b);
}
void f2 (int nb_args, void **args, int thread_id){
int n, *tab;
int *res;
int i,j;
n = *(int*)args[0];
tab = (int*)args[1];
res=(int*)args[2];
for(j=0;j<1000000;j++){
*res=0;
for (i=0;i<n;i++)
*res+=tab[i];
}
printf("id: %d, done: %d!\n",thread_id, nb_args);
}
int test_main(void){

Просмотреть файл

@ -2,17 +2,18 @@
#define THREAD_POOL_H
#include <pthread.h>
#include "opal/mca/hwloc/hwloc-internal.h"
#include <hwloc.h>
typedef struct _work_t{
int nb_args;
void (*task)(int nb_args, void **args);
void (*task)(int nb_args, void **args, int thread_id);
void **args;
struct _work_t *next;
pthread_cond_t work_done;
pthread_mutex_t mutex;
int done;
int thread_id;
}work_t;
typedef struct {
@ -38,8 +39,10 @@ int get_nb_threads(void);
int submit_work(work_t *work, int thread_id);
void wait_work_completion(work_t *work);
void terminate_thread_pool(void);
work_t *create_work(int nb_args, void **args, void (int, void **));
work_t *create_work(int nb_args, void **args, void (int, void **, int));
int test_main(void);
#endif /* THREAD_POOL_H */

Просмотреть файл

@ -12,6 +12,7 @@ void get_time(void)
CLOCK(time_tab[clock_num]);
}
double time_diff(void)
{
CLOCK_T t2,t1;
@ -22,7 +23,7 @@ double time_diff(void)
}
if(clock_num < 0){
return -1.0;
return -2.0;
}
CLOCK(t2);

Просмотреть файл

@ -1,4 +1,3 @@
#ifndef TIMINGS_H
#define TIMINGS_H
#include <stdio.h>

Просмотреть файл

@ -0,0 +1,842 @@
#include <hwloc.h>
#include <hwloc/helper.h>
#include "tm_tree.h"
#include "tm_mapping.h"
#include <ctype.h>
#include "tm_verbose.h"
#include "tm_solution.h"
tm_topology_t* get_local_topo_with_hwloc(void);
tm_topology_t* hwloc_to_tm(char *filename);
int int_cmp_inc(const void* x1,const void* x2);
void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
int symetric(hwloc_topology_t topology);
tm_topology_t * tgt_to_tm(char *filename);
void tm_display_arity(tm_topology_t *topology);
void tm_display_topology(tm_topology_t *topology);
void tm_free_topology(tm_topology_t *topology);
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
void tm_optimize_topology(tm_topology_t **topology);
int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology);
int topo_nb_proc(hwloc_topology_t topology,int N);
void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels);
void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints);
void topology_cost_cpy(tm_topology_t *topology,double **cost);
void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes);
double ** topology_to_arch(hwloc_topology_t topology);
void build_synthetic_proc_id(tm_topology_t *topology);
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
#define LINE_SIZE (1000000)
/* transform a tgt scotch file into a topology file*/
tm_topology_t * tgt_to_tm(char *filename)
{
tm_topology_t *topology = NULL;
FILE *pf = NULL;
char line[1024];
char *s = NULL;
double *cost = NULL;
int i;
pf = fopen(filename,"r");
if(!pf){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot open %s\n",filename);
exit(-1);
}
if(tm_get_verbose_level() >= INFO)
printf("Reading TGT file: %s\n",filename);
fgets(line,1024,pf);
fclose(pf);
s = strstr(line,"tleaf");
if(!s){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
exit(-1);
}
s += 5;
while(isspace(*s))
s++;
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
topology->nb_constraints = 0;
topology->oversub_fact = 1;
topology->constraints = NULL;
topology->nb_levels = atoi(strtok(s," "))+1;
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
topology->arity[i] = atoi(strtok(NULL," "));
cost[i] = atoi(strtok(NULL," "));
}
topology->arity[topology->nb_levels-1] = 0;
/* cost[topology->nb_levels-1]=0; */
/*aggregate costs*/
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
cost[i] += cost[i+1];
build_synthetic_proc_id(topology);
if(tm_get_verbose_level() >= INFO)
printf("Topology built from %s!\n",filename);
topology->cost=cost;
return topology;
}
int topo_nb_proc(hwloc_topology_t topology,int N)
{
hwloc_obj_t *objs = NULL;
int nb_proc;
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
FREE(objs);
return nb_proc;
}
double link_cost(int depth)
{
/*
Bertha values
double tab[5]={21,9,4.5,2.5,0.001};
double tab[5]={1,1,1,1,1};
double tab[6]={100000,10000,1000,500,100,10};
*/
double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1};
return tab[depth];
/*
return 10*log(depth+2);
return (depth+1);
return (long int)pow(100,depth);
*/
}
double ** topology_to_arch(hwloc_topology_t topology)
{
int nb_proc,i,j;
hwloc_obj_t obj_proc1,obj_proc2,obj_res;
double **arch = NULL;
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
for( i = 0 ; i < nb_proc ; i++ ){
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
for( j = 0 ; j < nb_proc ; j++ ){
obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
/* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
arch[obj_proc1->os_index][obj_proc2->os_index]=link_cost(obj_res->depth+1);
}
}
return arch;
}
int symetric(hwloc_topology_t topology)
{
int depth,i,topodepth = hwloc_topology_get_depth(topology);
unsigned int arity;
hwloc_obj_t obj;
for ( depth = 0; depth < topodepth-1 ; depth++ ) {
int N = hwloc_get_nbobjs_by_depth(topology, depth);
obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
arity = obj->arity;
/* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
for (i = 1; i < N; i++ ){
obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
if( obj->arity != arity){
/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
return 0;
}
}
}
return 1;
}
tm_topology_t* hwloc_to_tm(char *filename)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
unsigned int nb_nodes;
double *cost;
int err, l;
unsigned int i;
int vl = tm_get_verbose_level();
/* Build the topology */
hwloc_topology_init(&topology);
err = hwloc_topology_set_xml(topology,filename);
if(err == -1){
if(vl >= CRITICAL)
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
exit(-1);
}
hwloc_topology_ignore_all_keep_structure(topology);
hwloc_topology_load(topology);
/* Test if symetric */
if(!symetric(topology)){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"%s not symetric!\n",filename);
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->oversub_fact = 1;
res->nb_constraints = 0;
res->constraints = NULL;
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
if(vl >= INFO)
printf("topodepth = %d\n",topodepth);
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
res->arity[depth] = objs[0]->arity;
if (depth == topodepth -1){
res->nb_constraints = nb_nodes;
res->nb_proc_units = nb_nodes;
}
if(vl >= DEBUG)
printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]);
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
if(objs[i]->os_index > nb_nodes){
if(vl >= CRITICAL){
fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n",
i, depth, objs[i]->os_index, nb_nodes);
}
exit(-1);
}
res->node_id[depth][i] = objs[i]->os_index;
res->node_rank[depth][objs[i]->os_index] = i;
/* if(depth==topodepth-1) */
}
FREE(objs);
}
cost = (double*)CALLOC(res->nb_levels,sizeof(double));
for(l=0; l<res->nb_levels; l++){
cost[l] = link_cost(l);
}
res->cost = cost;
/* Destroy topology object. */
hwloc_topology_destroy(topology);
if(tm_get_verbose_level() >= INFO)
printf("\n");
return res;
}
tm_topology_t* get_local_topo_with_hwloc(void)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
int nb_nodes,i;
/* Build the topology */
hwloc_topology_init(&topology);
hwloc_topology_ignore_all_keep_structure(topology);
hwloc_topology_load(topology);
/* Test if symetric */
if(!symetric(topology)){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Local toplogy not symetric!\n");
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->nb_constraints = 0;
res->constraints = NULL;
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
res->arity[depth] = objs[0]->arity;
if (depth == topodepth -1){
res->nb_constraints = nb_nodes;
res->nb_proc_units = nb_nodes;
}
/* printf("%d:",res->arity[depth]); */
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
res->node_id[depth][i] = objs[i]->os_index;
res->node_rank[depth][objs[i]->os_index] = i;
/* if(depth==topodepth-1) */
}
FREE(objs);
}
/* Destroy HWLOC topology object. */
hwloc_topology_destroy(topology);
/* printf("\n"); */
return res;
}
void tm_free_topology(tm_topology_t *topology)
{
int i;
for( i = 0 ; i < topology->nb_levels ; i++ ){
FREE(topology->node_id[i]);
FREE(topology->node_rank[i]);
}
FREE(topology->constraints);
FREE(topology->node_id);
FREE(topology->node_rank);
FREE(topology->nb_nodes);
FREE(topology->arity);
FREE(topology->cost);
FREE(topology);
}
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type){
switch(arch_file_type){
case TM_FILE_TYPE_TGT:
return tgt_to_tm(arch_filename);
case TM_FILE_TYPE_XML:
return hwloc_to_tm(arch_filename);
default:
if(tm_get_verbose_level() >= ERROR){
fprintf(stderr,"Error loading topology. Filetype %d unknown\n", arch_file_type);
}
exit(-1);
}
}
void tm_display_topology(tm_topology_t *topology)
{
int i;
unsigned int j;
unsigned long id;
for( i = 0 ; i < topology->nb_levels ; i++ ){
printf("%d: ",i);
for( j = 0 ; j < topology->nb_nodes[i] ; j++)
printf("%d ",topology->node_id[i][j]);
printf("\n");
}
printf("Last level: ");
for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++)
printf("%d ",topology->node_rank[topology->nb_levels-1][id]);
printf("\n");
if(topology->constraints){
printf("Constraints: ");
for(i = 0; i < topology->nb_constraints; i++)
printf("%d ",topology->constraints[i]);
printf("\n");
}
printf("\tnb_levels=%d\n\tnb_constraints=%d\n\toversub_fact=%d\n\tnb proc units=%d\n\n",
topology->nb_levels, topology->nb_constraints, topology->oversub_fact, topology->nb_proc_units);
}
void tm_display_arity(tm_topology_t *topology){
int depth;
for(depth=0; depth < topology->nb_levels; depth++)
printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]);
printf("\n");
}
int int_cmp_inc(const void* x1,const void* x2)
{
return *((int *)x1) < *((int *)x2) ? -1 : 1;
}
int topo_check_constraints(tm_topology_t *topology){
int n = topology->nb_constraints;
int i;
int depth = topology->nb_levels-1;
for (i=0;i<n;i++){
if(!in_tab(topology->node_id[depth], topology->nb_nodes[depth], topology->constraints[i])){
if(tm_get_verbose_level() >= CRITICAL){
fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]);
}
return 0;
}
}
return 1;
}
/* cpy flag tells if we need to copy the array.
Set to 1 when called from the application level and 0 when called from inside the library*/
int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
topology -> nb_constraints = nb_constraints;
if(cpy_flag){
topology -> constraints = (int*)MALLOC(nb_constraints*sizeof(int));
memcpy(topology -> constraints, constraints, nb_constraints*sizeof(int));
}else{
topology -> constraints = constraints;
}
return topo_check_constraints(topology);
}
int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology){
return tm_topology_set_binding_constraints_cpy(constraints, nb_constraints, topology, 1);
}
int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology)
{
int *tab = NULL;
FILE *pf = NULL;
char line[LINE_SIZE],*l = NULL;
char *ptr = NULL;
int i,n;
unsigned int vl = tm_get_verbose_level();
if (!(pf = fopen(constraints_filename,"r"))) {
if(vl >= CRITICAL)
fprintf(stderr,"Cannot open %s\n",constraints_filename);
exit(-1);
}
/* compute the size of the array to store the constraints*/
n = 0;
fgets(line, LINE_SIZE, pf);
l = line;
while((ptr=strtok(l," \t"))){
l = NULL;
if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr))
n++;
}
tab = (int*)MALLOC(n*sizeof(int));
rewind(pf);
fgets(line, LINE_SIZE, pf);
fclose(pf);
l = line;
i = 0;
while((ptr=strtok(l," \t"))){
l = NULL;
if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)){
if(i < n)
tab[i] = atoi(ptr);
else{
if(vl >= CRITICAL)
fprintf(stderr, "More than %d entries in %s\n", n, constraints_filename);
exit(-1);
}
i++;
}
}
if( i != n ){
if(vl >= CRITICAL)
fprintf(stderr, "Read %d entries while expecting %d ones\n", i, n);
exit(-1);
}
qsort(tab,n,sizeof(int),int_cmp_inc);
return tm_topology_set_binding_constraints_cpy(tab, n, topology, 0);
}
void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes)
{
int nb_levels;
unsigned int vl = tm_get_verbose_level();
nb_levels = topology->nb_levels;
*nb_nodes = topology->nb_nodes[nb_levels-1];
if(vl >= INFO)
printf("nb_nodes=%d\n",*nb_nodes);
*numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes));
memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes));
}
void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels)
{
*nb_levels = topology->nb_levels;
*arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
memcpy(*arity,topology->arity,sizeof(int)*(*nb_levels));
}
void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints)
{
*nb_constraints = topology->nb_constraints;
if(topology->constraints){
*constraints = (int*)MALLOC(sizeof(int)*(*nb_constraints));
memcpy(*constraints,topology->constraints,sizeof(int)*(*nb_constraints));
}else{
*constraints = NULL;
}
}
void topology_cost_cpy(tm_topology_t *topology,double **cost)
{
*cost = (double*)MALLOC(sizeof(double)*(topology->nb_levels));
memcpy(*cost,topology->cost,sizeof(double)*(topology->nb_levels));
}
void optimize_arity(int **arity, double **cost, int *nb_levels,int n)
{
int a,i;
int *new_arity = NULL;
double *new_cost = NULL;
if( n < 0 )
return;
/* printf("n=%d\tnb_levels=%d\n",n,*nb_levels); */
/* for(i=0;i<*nb_levels;i++) */
/* printf("%d:",(*arity)[i]); */
/* printf("\n"); */
/* if(n==(*nb_levels)-3) */
/* exit(-1); */
a = (*arity)[n];
if( (a%3 == 0) && (a > 3) ){
/*
check if the arity of level n devides 3
If this is the case:
Add a level
*/
(*nb_levels)++;
/* Build a new arity and cost arrays */
new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels));
/* Copy the begining if the old arrays */
for( i = 0 ; i < n ; i++){
new_arity[i] = (*arity)[i];
new_cost[i] = (*cost)[i];
}
/* set the nth level to arity 3 */
new_arity[n] = 3;
/* copy the cost to this level*/
new_cost[n] = (*cost)[n];;
/* printf("a=%d\n",a); */
/* Set the (n+1) level to arity a/3 */
new_arity[n+1] = a/3;
/*Dupliacte the cost as it is the same level originally*/
new_cost[n+1] = (*cost)[n];
/* Copy the end of the arrays */
for( i = n+2 ; i < *nb_levels ; i++){
new_arity[i] = (*arity)[i-1];
new_cost[i] = (*cost)[i-1];
}
FREE(*arity);
FREE(*cost);
/* if a/3 =3 then go to the next level */
if(new_arity[n+1] == 3)
optimize_arity(&new_arity,&new_cost,nb_levels,n);
else /* continue to this level (remember we just add a new level */
optimize_arity(&new_arity,&new_cost,nb_levels,n+1);
*arity=new_arity;
*cost=new_cost;
}else if( (a%2==0) && (a>2) ){/* same as above but for arity == 2 instead of 3 */
(*nb_levels)++;
new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels));
for( i = 0 ; i < n ; i++ ){
new_arity[i] = (*arity)[i];
new_cost[i] = (*cost)[i];
}
new_arity[n] = 2;
new_cost[n] = (*cost)[n];;
/* printf("a=%d\n",a); */
new_arity[n+1] = a/2;
new_cost[n+1] = (*cost)[n];
for( i = n+2 ; i < *nb_levels ; i++ ){
new_arity[i] = (*arity)[i-1];
new_cost[i] = (*cost)[i-1];
}
FREE(*arity);
FREE(*cost);
if(new_arity[n+1] == 2)
optimize_arity(&new_arity, &new_cost, nb_levels, n);
else
optimize_arity(&new_arity, &new_cost, nb_levels, n+1);
*arity = new_arity;
*cost= new_cost;
}else /* if nothing works go to next level. */
optimize_arity(arity, cost, nb_levels,n-1);
}
void tm_optimize_topology(tm_topology_t **topology){
int *arity = NULL,nb_levels;
int *numbering = NULL,nb_nodes;
tm_topology_t *new_topo;
double *cost;
unsigned int vl = tm_get_verbose_level();
int *constraints = NULL, nb_constraints;
int i;
if(vl >= DEBUG)
tm_display_arity(*topology);
topology_arity_cpy(*topology,&arity,&nb_levels);
topology_numbering_cpy(*topology,&numbering,&nb_nodes);
topology_constraints_cpy(*topology,&constraints,&nb_constraints);
topology_cost_cpy(*topology,&cost);
optimize_arity(&arity,&cost,&nb_levels,nb_levels-2);
new_topo = tm_build_synthetic_topology(arity, NULL, nb_levels,numbering,nb_nodes);
new_topo->cost = cost;
new_topo->constraints = constraints;
new_topo->nb_constraints = nb_constraints;
new_topo->nb_proc_units = (*topology)->nb_proc_units;
new_topo->oversub_fact = (*topology)->oversub_fact;
if(vl >= DEBUG){
if(constraints){
printf("Constraints: ");
for(i=0;i<nb_constraints;i++)
printf("%d - ",constraints[i]);
printf("\n");
}
tm_display_arity(new_topo);
}
FREE(arity);
FREE(numbering);
tm_free_topology(*topology);
*topology = new_topo;
/* exit(-1); */
}
/*
Build a synthetic balanced topology
arity : array of arity of the first nb_level (of size nb_levels)
cost : array of costs between the levels (of size nb_levels)
core_numbering: numbering of the core by the system. Array of size nb_core_per_node
nb_core_per_nodes: number of cores of a given node size of the array core_numbering
The numbering of the cores is done in round robin fashion after a width traversal of the topology.
for example:
{0,1,2,3} becomes 0,1,2,3,4,5,6,7...
and
{0,2,1,3} becomes 0,2,1,3,4,6,5,7,...
*/
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes)
{
tm_topology_t *topology = NULL;
int i,j,n;
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
topology->nb_constraints = 0;
topology->oversub_fact = 1;
topology->constraints = NULL;
topology->nb_levels = nb_levels;
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels);
if(cost)
topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
else
topology->cost = NULL;
memcpy(topology->arity, arity, sizeof(int)*nb_levels);
if(cost)
memcpy(topology->cost, cost, sizeof(double)*nb_levels);
n = 1;
for( i = 0 ; i < topology->nb_levels ; i++ ){
topology->nb_nodes[i] = n;
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
if( i < topology->nb_levels-1){
for( j = 0 ; j < n ; j++ ){
topology->node_id[i][j] = j;
topology->node_rank[i][j]=j;
}
}else{
for( j = 0 ; j < n ; j++ ){
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
topology->node_id[i][j] = id;
topology->node_rank[i][id] = j;
}
}
if (i == topology->nb_levels-1){
topology->nb_constraints = n;
topology->nb_proc_units = n;
}
n *= topology->arity[i];
}
if(cost){
/*aggregate costs*/
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
topology->cost[i] += topology->cost[i+1];
}
return topology;
}
void build_synthetic_proc_id(tm_topology_t *topology)
{
int i;
size_t j,n = 1;
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels);
for( i = 0 ; i < topology->nb_levels ; i++ ){
/* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */
topology->nb_nodes[i] = n;
topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n);
topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n);
if ( !topology->node_id[i] ){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n);
exit(-1);
}
if (i == topology->nb_levels-1){
topology->nb_constraints = n;
topology->nb_proc_units = n;
}
for( j = 0 ; j < n ; j++ ){
topology->node_id[i][j] = j;
topology->node_rank[i][j] = j;
}
n *= topology->arity[i];
}
}
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){
{
int i,j,n;
if(oversub_fact <=1)
return;
topology -> nb_levels ++;
topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels);
topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels);
topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels);
topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels);
topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels);
topology -> oversub_fact = oversub_fact;
i = topology->nb_levels - 1;
n = topology->nb_nodes[i-1] * oversub_fact;
topology->arity[i-1] = oversub_fact;
topology->cost[i-1] = 0;
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
topology->nb_nodes[i] = n;
for( j = 0 ; j < n ; j++ ){
int id = topology->node_id[i-1][j/oversub_fact];
topology->node_id[i][j] = id;
topology->node_rank[i][id] = j;
}
}
}

Просмотреть файл

@ -0,0 +1,22 @@
#include <hwloc.h>
#include "tm_tree.h"
tm_topology_t* get_local_topo_with_hwloc(void);
tm_topology_t* hwloc_to_tm(char *filename);
int int_cmp_inc(const void* x1,const void* x2);
void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
int symetric(hwloc_topology_t topology);
tm_topology_t * tgt_to_tm(char *filename);
void tm_display_arity(tm_topology_t *topology);
void tm_display_topology(tm_topology_t *topology);
void tm_free_topology(tm_topology_t *topology);
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
void tm_optimize_topology(tm_topology_t **topology);
int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology);
int topo_nb_proc(hwloc_topology_t topology,int N);
void topology_arity(tm_topology_t *topology,int **arity,int *nb_levels);
void topology_constraints(tm_topology_t *topology,int **constraints,int *nb_constraints);
void topology_cost(tm_topology_t *topology,double **cost);
void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes);
double ** topology_to_arch(hwloc_topology_t topology);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,69 +1,22 @@
#ifndef __TREE_H__
#define __TREE_H__
#ifndef __TM_TREE_H__
#define __TM_TREE_H__
#include <stdlib.h>
#include "treematch.h"
typedef struct _node_info_t{
int submit_date;
int job_id;
int finish_date;
} job_info_t;
typedef struct _tree_t{
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not. usefull for freeing it. needs to be set on the root only*/
struct _tree_t **child;
struct _tree_t *parent;
struct _tree_t *tab_child; /*the pointer to be freed*/
double val;
int arity;
int depth;
int id;
int uniq;
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
job_info_t *job_info;
}tree_t;
/* Maximum number of levels in the tree*/
#define MAX_LEVELS 100
typedef struct {
int *arity; /* arity of the nodes of each level*/
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
int *nb_nodes; /*nb of nodes of each level*/
int *nb_free_nodes; /*nb of available nodes of each level*/
int **node_id; /*ID of the nodes of the tree for each level*/
int **free_nodes; /*ID of the nodes of the tree for each level*/
}tm_topology_t;
typedef struct {
double ** mat;
double * sum_row;
int order;
} affinity_mat_t;
tree_t * build_tree(double **tab,int N);
tree_t * build_tree_from_topology(tm_topology_t *topology,double **tab,int N, double *obj_weight, double *comm_speed);
void map_tree(tree_t *,tree_t*);
void update_val(tm_affinity_mat_t *aff_mat,tm_tree_t *parent);
void display_tab(double **tab,int N);
double speed(int depth);
void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,int id,double val,tree_t *deb_tab_child, int depth);
void free_constraint_tree(tree_t *tree);
void free_tree(tree_t *tree);
void free_tab_double(double**tab,int N);
void free_tab_int(int**tab,int N);
void update_val(affinity_mat_t *aff_mat,tree_t *parent);
void FREE_tree(tree_t *tree);
void FREE_tab_double(double**,int);
void set_node(tm_tree_t *node,tm_tree_t ** child, int arity,tm_tree_t *parent,
int id,double val,tm_tree_t *tab_child,int depth);
typedef struct _group_list_t{
struct _group_list_t *next;
tree_t **tab;
tm_tree_t **tab;
double val;
double sum_neighbour;
double wg;
int id;
double *bound;
}group_list_t;
@ -74,21 +27,13 @@ typedef struct{
}adjacency_t;
/* for debugging malloc */
/* #define __DEBUG_MY_MALLOC__ */
#undef __DEBUG_MY_MALLOC__
#ifdef __DEBUG_MY_MALLOC__
#include "tm_malloc.h"
#define MALLOC(x) my_malloc(x,__FILE__,__LINE__)
#define CALLOC(x,y) my_calloc(x,y,__FILE__,__LINE__)
#define FREE my_free
#define MEM_CHECK my_mem_check
#else
#define MALLOC malloc
#define CALLOC calloc
#define FREE free
#define MEM_CHECK my_mem_check
#endif
typedef struct _work_unit_t{
int nb_groups;
int *tab_group;
int done;
int nb_work;
struct _work_unit_t *next;
}work_unit_t;
#endif

Просмотреть файл

@ -1,11 +1,34 @@
#include "tm_verbose.h"
#include <stdio.h>
static unsigned int verbose_level = ERROR;
static FILE *output = NULL;
void set_verbose_level(unsigned int level){
void tm_set_verbose_level(unsigned int level){
verbose_level = level;
}
unsigned int get_verbose_level(){
unsigned int tm_get_verbose_level(){
return verbose_level;
}
int tm_open_verbose_file(char *filename){
output = fopen(filename,"w");
if(output == NULL)
return 0;
else
return 1;
}
int tm_close_verbose_file(void){
if(output != NULL)
return fclose(output);
return 0;
}
FILE *tm_get_verbose_output(){
if(!output)
return stdout;
else
return output;
}

Просмотреть файл

@ -1,11 +1,22 @@
#include <stdio.h>
#define NONE 0
/* output in stderr*/
#define CRITICAL 1
#define ERROR 2
/* output in stdout*/
#define WARNING 3
#define INFO 4
#define DEBUG 5
void set_verbose_level(unsigned int level);
unsigned int get_verbose_level(void);
#define TIMING 4
#define INFO 5
#define DEBUG 6
/* return 0 on errror and 1 on success */
int tm_open_verbose_file(char *filename);
int tm_close_verbose_file(void);
void tm_set_verbose_level(unsigned int level);
unsigned int tm_get_verbose_level(void);
FILE * tm_get_verbose_output(void);
#define tm_verbose_printf(level, ...) level <= tm_get_verbose_level()?fprintf(tm_get_verbose_output(),__VA_ARGS__):0

188
ompi/mca/topo/treematch/treematch/treematch.h Обычный файл
Просмотреть файл

@ -0,0 +1,188 @@
#ifndef __TREEMATCH_H__
#define __TREEMATCH_H__
/* size_t definition */
#include <stddef.h>
#include "tm_verbose.h"
/********* TreeMatch Public Enum **********/
/*type of topology files that can be read*/
typedef enum{
TM_FILE_TYPE_UNDEF,
TM_FILE_TYPE_XML,
TM_FILE_TYPE_TGT
} tm_file_type_t;
/* different metrics to evaluate the solution */
typedef enum{
TM_METRIC_SUM_COM = 1,
TM_METRIC_MAX_COM = 2,
TM_METRIC_HOP_BYTE = 3
} tm_metric_t;
/********* TreeMatch Public Structures **********/
typedef struct _job_info_t{
int submit_date;
int job_id;
int finish_date;
} tm_job_info_t;
typedef struct _tree_t{
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not.
Usefull for freeing it. needs to be set on the root only*/
struct _tree_t **child;
struct _tree_t *parent;
struct _tree_t *tab_child; /*the pointer to be freed*/
double val;
int arity;
int depth;
int id;
int uniq;
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
tm_job_info_t *job_info;
int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only*/
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ?*/
/* Maximum number of levels in the tree*/
#define TM_MAX_LEVELS 100
typedef struct {
int *arity; /* arity of the nodes of each level*/
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
size_t *nb_nodes; /*nb of nodes of each level*/
int **node_id; /*ID of the nodes of the tree for each level*/
int **node_rank ; /*rank of the nodes of the tree for each level given its ID: this is the inverse tab of node_id*/
size_t *nb_free_nodes; /*nb of available nodes of each level*/
int **free_nodes; /*tab of node that are free: useful to simulate batch scheduler*/
double *cost; /*cost of the communication depending on the distance:
cost[i] is the cost for communicating at distance nb_levels-i*/
int *constraints; /* array of constraints: id of the nodes where it is possible to map processes */
int nb_constraints; /* Size of the above array */
int oversub_fact; /* maximum number of processes to be mapped on a given node */
int nb_proc_units; /* the real number of units used for computation */
}tm_topology_t;
typedef struct {
double ** mat;
double * sum_row;
int order;
} tm_affinity_mat_t;
/*
sigma_i is such that process i is mapped on core sigma_i
k_i is such that core i exectutes process k_i_j (0<=j<<=oversubscribing factor - 1)
size of sigma is the number of processes (nb_objs)
size of k is the number of cores/nodes (nb_compute_units)
size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing)
We must have numbe of process<=number of cores
k[i] == NULL if no process is mapped on core i
*/
typedef struct {
int *sigma;
size_t sigma_length;
int **k;
size_t k_length;
int oversub_fact;
}tm_solution_t;
/************ TreeMatch Public API ************/
/* load XML or TGT topology */
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
/*
Alternatively, build a synthetic balanced topology.
nb_levels : number of levels of the topology +1 (the last level must be of cost 0 and arity 0).
arity : array of arity of the first nb_level (of size nb_levels)
cost : array of costs between the levels (of size nb_levels)
core_numbering: numbering of the core by the system. Array of size nb_core_per_node
nb_core_per_nodes: number of cores of a given node. Size of the array core_numbering
both arity and cost are copied inside tm_build_synthetic_topology
The numbering of the cores is done in round robin fashion after a width traversal of the topology.
for example:
{0,1,2,3} becomes 0,1,2,3,4,5,6,7...
and
{0,2,1,3} becomes 0,2,1,3,4,6,5,7,...
Example of call to build the 128.tgt file: tleaf 4 16 500 2 100 2 50 2 10
double cost[5] = {500,100,50,10,0};
int arity[5] = {16,2,2,2,0};
int cn[5]={0,1};
topology = tm_build_synthetic_topology(arity,cost,5,cn,2);
*/
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
/* load affinity matrix */
tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
/*
Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
For performance reason mat is not copied.
*/
tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
/* Add constraints to toplogy
Return 1 on success and 0 if the constari,ts id are not compatible withe nodes id */
int tm_topology_add_binding_constraints(char *bind_filename, tm_topology_t *topology);
/* Alternatively, set the constraints from an array.
Return 1 on success and 0 if the constari,ts id are not compatible withe nodes id
The array constraints is copied inside tm_topology_set_binding_constraints
*/
int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology);
/* display arity of the topology */
void tm_display_arity(tm_topology_t *topology);
/* display the full topology */
void tm_display_topology(tm_topology_t *topology);
/* Optimize the topology by decomposing arities */
void tm_optimize_topology(tm_topology_t **topology);
/* Manage oversubscribing */
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact);
/* core of the treematch: compute the solution tree */
tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed);
/* compute the mapping according to teh tree an dthe core numbering*/
tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree);
/* display the solution*/
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric);
/* display RR, packed, MPIPP*/
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
/* free TM strutures*/
void tm_free_topology(tm_topology_t *topology);
void tm_free_tree(tm_tree_t *comm_tree);
void tm_free_solution(tm_solution_t *sol);
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
/* manage verbosity of TM*/
void tm_set_verbose_level(unsigned int level);
unsigned int tm_get_verbose_level(void);
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
void tm_finalize();
/*
Ask for exhaustive search: may be very long
new_val == 0 : no exhuative search
new_val != 0 : exhuative search
*/
void tm_set_exhaustive_search_flag(int new_val);
int tm_get_exhaustive_search_flag();
/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
void tm_set_max_nb_threads(unsigned int val);
#include "tm_malloc.h"
#endif

Просмотреть файл

@ -22,7 +22,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTHASH_H
#define UTHASH_H
#define UTHASH_H
#include <string.h> /* memcmp,strlen */
#include <stddef.h> /* ptrdiff_t */
@ -49,7 +49,7 @@ do {
char **_da_dst = (char**)(&(dst)); \
*_da_dst = (char*)(src); \
} while(0)
#else
#else
#define DECLTYPE_ASSIGN(dst,src) \
do { \
(dst) = DECLTYPE(dst)(src); \
@ -121,9 +121,9 @@ do {
HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#else
#define HASH_BLOOM_MAKE(tbl)
#define HASH_BLOOM_FREE(tbl)
#define HASH_BLOOM_ADD(tbl,hashv)
#define HASH_BLOOM_MAKE(tbl)
#define HASH_BLOOM_FREE(tbl)
#define HASH_BLOOM_ADD(tbl,hashv)
#define HASH_BLOOM_TEST(tbl,hashv) (1)
#endif
@ -148,7 +148,7 @@ do {
#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
do { \
unsigned _ha_bkt; \
@ -300,10 +300,10 @@ do {
} \
} while (0)
#else
#define HASH_FSCK(hh,head)
#define HASH_FSCK(hh,head)
#endif
/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
* the descriptor to which this macro is defined for tuning the hash function.
* The app can #include <unistd.h> to get the prototype for write(2). */
#ifdef HASH_EMIT_KEYS
@ -313,12 +313,12 @@ do {
write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
write(HASH_EMIT_KEYS, keyptr, fieldlen); \
} while (0)
#else
#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
#else
#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
#endif
/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
#ifdef HASH_FUNCTION
#ifdef HASH_FUNCTION
#define HASH_FCN HASH_FUNCTION
#else
#define HASH_FCN HASH_JEN
@ -335,7 +335,7 @@ do {
} while (0)
/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
do { \
@ -356,7 +356,7 @@ do {
hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
bkt = hashv & (num_bkts-1); \
} while(0);
#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
do { \
unsigned _ho_i; \
@ -485,14 +485,14 @@ do {
#ifdef HASH_USING_NO_STRICT_ALIASING
/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
* For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
* MurmurHash uses the faster approach only on CPU's where we know it's safe.
* MurmurHash uses the faster approach only on CPU's where we know it's safe.
*
* Note the preprocessor built-in defines can be emitted using:
*
* gcc -m64 -dM -E - < /dev/null (on gcc)
* cc -## a.c (where a.c is a simple test file) (Sun Studio)
*/
#if (defined(__i386__) || defined(__x86_64__))
#if (defined(__i386__) || defined(__x86_64__))
#define MUR_GETBLOCK(p,i) p[i]
#else /* non intel */
#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
@ -562,7 +562,7 @@ do { \
#endif /* HASH_USING_NO_STRICT_ALIASING */
/* key comparison function; return 0 if keys equal */
#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
/* iterate over items in a known bucket to find desired item */
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
@ -603,36 +603,36 @@ do {
} \
if (hh_del->hh_next) { \
hh_del->hh_next->hh_prev = hh_del->hh_prev; \
}
}
/* Bucket expansion has the effect of doubling the number of buckets
* and redistributing the items into the new buckets. Ideally the
* items will distribute more or less evenly into the new buckets
* (the extent to which this is true is a measure of the quality of
* the hash function as it applies to the key domain).
*
* the hash function as it applies to the key domain).
*
* With the items distributed into more buckets, the chain length
* (item count) in each bucket is reduced. Thus by expanding buckets
* the hash keeps a bound on the chain length. This bounded chain
* the hash keeps a bound on the chain length. This bounded chain
* length is the essence of how a hash provides constant time lookup.
*
*
* The calculation of tbl->ideal_chain_maxlen below deserves some
* explanation. First, keep in mind that we're calculating the ideal
* maximum chain length based on the *new* (doubled) bucket count.
* In fractions this is just n/b (n=number of items,b=new num buckets).
* Since the ideal chain length is an integer, we want to calculate
* Since the ideal chain length is an integer, we want to calculate
* ceil(n/b). We don't depend on floating point arithmetic in this
* hash, so to calculate ceil(n/b) with integers we could write
*
*
* ceil(n/b) = (n/b) + ((n%b)?1:0)
*
*
* and in fact a previous version of this hash did just that.
* But now we have improved things a bit by recognizing that b is
* always a power of two. We keep its base 2 log handy (call it lb),
* so now we can write this with a bit shift and logical AND:
*
*
* ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
*
*
*/
#define HASH_EXPAND_BUCKETS(tbl) \
do { \
@ -684,7 +684,7 @@ do {
/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
/* Note that HASH_SORT assumes the hash handle name to be hh.
/* Note that HASH_SORT assumes the hash handle name to be hh.
* HASH_SRT was added to allow the hash handle name to be passed in. */
#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
#define HASH_SRT(hh,head,cmpfcn) \
@ -766,10 +766,10 @@ do {
} \
} while (0)
/* This function selects items from one hash into another hash.
* The end result is that the selected items have dual presence
* in both hashes. There is no copy of the items made; rather
* they are added into the new hash through a secondary hash
/* This function selects items from one hash into another hash.
* The end result is that the selected items have dual presence
* in both hashes. There is no copy of the items made; rather
* they are added into the new hash through a secondary hash
* hash handle that must be present in the structure. */
#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
do { \
@ -823,7 +823,7 @@ do {
#ifdef NO_DECLTYPE
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
#else
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
@ -831,7 +831,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);
#endif
/* obtain a count of items in the hash */
#define HASH_COUNT(head) HASH_CNT(hh,head)
#define HASH_COUNT(head) HASH_CNT(hh,head)
#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
typedef struct UT_hash_bucket {
@ -840,7 +840,7 @@ typedef struct UT_hash_bucket {
/* expand_mult is normally set to 0. In this situation, the max chain length
* threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
* the bucket's chain exceeds this length, bucket expansion is triggered).
* the bucket's chain exceeds this length, bucket expansion is triggered).
* However, setting expand_mult to a non-zero value delays bucket expansion
* (that would be triggered by additions to this particular bucket)
* until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
@ -848,7 +848,7 @@ typedef struct UT_hash_bucket {
* multiplier is to reduce bucket expansions, since they are expensive, in
* situations where we know that a particular bucket tends to be overused.
* It is better to let its chain length grow to a longer yet-still-bounded
* value, than to do an O(n) bucket expansion too often.
* value, than to do an O(n) bucket expansion too often.
*/
unsigned expand_mult;
@ -874,7 +874,7 @@ typedef struct UT_hash_table {
* hash distribution; reaching them in a chain traversal takes >ideal steps */
unsigned nonideal_items;
/* ineffective expands occur when a bucket doubling was performed, but
/* ineffective expands occur when a bucket doubling was performed, but
* afterward, more than half the items in the hash had nonideal chain
* positions. If this happens on two consecutive expansions we inhibit any
* further expansion, as it's not helping; this happens when the hash