diff --git a/ompi/mca/topo/treematch/Makefile.am b/ompi/mca/topo/treematch/Makefile.am index 6019a786e8..7411d30087 100644 --- a/ompi/mca/topo/treematch/Makefile.am +++ b/ompi/mca/topo/treematch/Makefile.am @@ -13,20 +13,25 @@ if topo_treematch_local extra_treematch_files = treematch/tm_bucket.h \ - treematch/tm_hwloc.h treematch/tm_mapping.h \ + treematch/tm_mapping.h \ treematch/tm_timings.h treematch/tm_tree.h \ treematch/tm_kpartitioning.h treematch/uthash.h\ treematch/IntConstantInitializedVector.h \ - treematch/tm_mt.h \ + treematch/tm_mt.h treematch/fibo.h \ treematch/tm_thread_pool.h treematch/tm_verbose.h \ - treematch/tm_malloc.h \ + treematch/tm_malloc.h treematch/k-partitioning.h\ + treematch/tm_solution.h treematch/tm_topology.h\ + treematch/PriorityQueue.h \ treematch/IntConstantInitializedVector.c \ - treematch/tm_mt.c \ + treematch/tm_mt.c treematch/fibo.c \ treematch/tm_thread_pool.c treematch/tm_verbose.c \ - treematch/tm_malloc.c \ + treematch/tm_malloc.c treematch/treematch.h \ treematch/tm_mapping.c treematch/tm_timings.c \ treematch/tm_bucket.c treematch/tm_tree.c \ - treematch/tm_hwloc.c treematch/tm_kpartitioning.c + treematch/tm_topology.c treematch/tm_kpartitioning.c \ + treematch/tm_solution.c treematch/k-partitioning.c \ + treematch/PriorityQueue.c +EXTRA_DIST = treematch/COPYING treematch/LICENSE endif sources = \ diff --git a/ompi/mca/topo/treematch/topo_treematch.h b/ompi/mca/topo/treematch/topo_treematch.h index bcc4d748bf..2a73118dec 100644 --- a/ompi/mca/topo/treematch/topo_treematch.h +++ b/ompi/mca/topo/treematch/topo_treematch.h @@ -70,7 +70,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* module, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - struct opal_info_t *info, int reorder, + struct ompi_info_t *info, int reorder, ompi_communicator_t **newcomm); /* * ****************************************************************** diff --git a/ompi/mca/topo/treematch/topo_treematch_component.c b/ompi/mca/topo/treematch/topo_treematch_component.c index 6062bf1ed3..80f631b830 100644 --- a/ompi/mca/topo/treematch/topo_treematch_component.c +++ b/ompi/mca/topo/treematch/topo_treematch_component.c @@ -62,6 +62,9 @@ mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component = static int init_query(bool enable_progress_threads, bool enable_mpi_threads) { + if(NULL == opal_hwloc_topology) { + return OPAL_ERR_NOT_SUPPORTED; + } return OMPI_SUCCESS; } @@ -95,3 +98,4 @@ static int mca_topo_treematch_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_topo_treematch_component.reorder_mode); return OMPI_SUCCESS; } + diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index 7f7574f4c9..daf406dc01 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -3,8 +3,8 @@ * Copyright (c) 2011-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2011-2015 INRIA. All rights reserved. - * Copyright (c) 2012-2015 Bordeaux Poytechnic Institute + * Copyright (c) 2011-2016 INRIA. All rights reserved. + * Copyright (c) 2012-2017 Bordeaux Poytechnic Institute * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -25,6 +25,7 @@ #include "opal/mca/hwloc/hwloc-internal.h" #include "ompi/mca/topo/treematch/topo_treematch.h" +#include "ompi/mca/topo/treematch/treematch/treematch.h" #include "ompi/mca/topo/treematch/treematch/tm_mapping.h" #include "ompi/mca/topo/base/base.h" @@ -46,6 +47,7 @@ #define FALLBACK() \ do { free(nodes_roots); \ + free(lindex_to_grank); \ if( NULL != set) hwloc_bitmap_free(set); \ goto fallback; } \ while(0); @@ -92,8 +94,8 @@ static void dump_int_array( char* prolog, char* line_prolog, int* array, size_t size_t i; fprintf(stdout,"%s : ", prolog); - for(i = 0; i < num_procs_in_node ; i++) - fprintf(stdout,"[$s%i:%i] ", line_prolog, i, array[i]); + for(i = 0; i < length ; i++) + fprintf(stdout,"%s [%lu:%i] ", line_prolog, i, array[i]); fprintf(stdout,"\n"); } static void dump_double_array( char* prolog, char* line_prolog, double* array, size_t length ) @@ -101,8 +103,8 @@ static void dump_double_array( char* prolog, char* line_prolog, double* array, s size_t i; fprintf(stdout,"%s : ", prolog); - for(i = 0; i < num_procs_in_node ; i++) - fprintf(stdout,"%s [%i:%i] ", line_prolog, i, array[i]); + for(i = 0; i < length ; i++) + fprintf(stdout,"%s [%lu:%lf] ", line_prolog, i, array[i]); fprintf(stdout,"\n"); } #endif @@ -112,7 +114,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - struct opal_info_t *info, int reorder, + struct ompi_info_t *info, int reorder, ompi_communicator_t **newcomm) { int err; @@ -155,6 +157,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, int num_nodes = 0; int num_procs_in_node = 0; int rank, size; + int *k = NULL; + int newrank = -1; int hwloc_err; int oversubscribing_objs = 0, oversubscribed_pus = 0; int i, j, idx; @@ -250,6 +254,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, * all the calls that involve collective communications, so we have to lay the logic * accordingly. */ + if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */ #ifdef __DEBUG__ if (0 == rank) @@ -291,6 +296,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, num_objs_in_node,num_procs_in_node, nodes_roots,lindex_to_grank,comm_old); } + if (!oversubscribed_pus) { /* Update the data used to compute the correct binding */ if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */ @@ -306,17 +312,17 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, if( !oversubscribing_objs && !oversubscribed_pus ) { if( hwloc_bitmap_isincluded(root_obj->cpuset,set) ) { /* processes are not bound on the machine */ - obj_rank = ompi_process_info.my_local_rank%num_objs_in_node; - effective_depth = depth; - object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank); - if( NULL == object) FALLBACK(); + obj_rank = ompi_process_info.my_local_rank%num_objs_in_node; + effective_depth = depth; + object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank); + if( NULL == object) FALLBACK(); - hwloc_bitmap_copy(set,object->cpuset); - hwloc_bitmap_singlify(set); /* we don't want the process to move */ - hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0); - if( -1 == hwloc_err) FALLBACK(); + hwloc_bitmap_copy(set,object->cpuset); + hwloc_bitmap_singlify(set); /* we don't want the process to move */ + hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0); + if( -1 == hwloc_err) FALLBACK(); #ifdef __DEBUG__ - fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank); + fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank); #endif } else { #ifdef __DEBUG__ @@ -385,7 +391,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, if (0 == mca_topo_treematch_component.reorder_mode) { int *k = NULL; int *obj_mapping = NULL; - int newrank = -1; int num_objs_total = 0; /* Gather comm pattern @@ -419,7 +424,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, tm_topology_t *tm_opt_topology = NULL; int *obj_to_rank_in_comm = NULL; int *hierarchies = NULL; - int hierarchy[MAX_LEVELS+1]; + int hierarchy[TM_MAX_LEVELS+1]; int min; /* create a table that derives the rank in comm_old from the object number */ @@ -489,27 +494,27 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, free(obj_to_rank_in_comm); hierarchy[0] = numlevels; - assert(numlevels < MAX_LEVELS); + assert(numlevels < TM_MAX_LEVELS); for(i = 0 ; i < hierarchy[0]; i++) hierarchy[i+1] = tracker[i]->arity; - for(; i < (MAX_LEVELS+1); i++) /* fill up everything else with -1 */ + for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with -1 */ hierarchy[i] = -1; if( 0 == rank ) { - hierarchies = (int *)malloc(num_nodes*(MAX_LEVELS+1)*sizeof(int)); - memcpy(hierarchies, hierarchy, (MAX_LEVELS+1)*sizeof(int)); + hierarchies = (int *)malloc(num_nodes*(TM_MAX_LEVELS+1)*sizeof(int)); + memcpy(hierarchies, hierarchy, (TM_MAX_LEVELS+1)*sizeof(int)); } /* gather hierarchies iff more than 1 node! */ if ( num_nodes > 1 ) { if( rank != 0 ) { - if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(MAX_LEVELS+1), MPI_INT, 0, + if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(TM_MAX_LEVELS+1), MPI_INT, 0, 111, MCA_PML_BASE_SEND_STANDARD, comm_old)))) ERR_EXIT(err); } else { for(i = 1; i < num_nodes ; i++) - if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(MAX_LEVELS+1), (MAX_LEVELS+1), MPI_INT, + if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(TM_MAX_LEVELS+1), (TM_MAX_LEVELS+1), MPI_INT, nodes_roots[i], 111, comm_old, &reqs[i-1])))){ free(hierarchies); ERR_EXIT(err); @@ -524,23 +529,25 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, } if ( 0 == rank ) { - tree_t *comm_tree = NULL; + tm_tree_t *comm_tree = NULL; + tm_solution_t *sol = NULL; + tm_affinity_mat_t *aff_mat = NULL; double **comm_pattern = NULL; - int *matching = NULL; #ifdef __DEBUG__ - dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(MAX_LEVELS+1)); + dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(TM_MAX_LEVELS+1)); #endif tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t)); tm_topology->nb_levels = hierarchies[0]; /* extract min depth */ for(i = 1 ; i < num_nodes ; i++) - if (hierarchies[i*(MAX_LEVELS+1)] < tm_topology->nb_levels) - tm_topology->nb_levels = hierarchies[i*(MAX_LEVELS+1)]; + if (hierarchies[i*(TM_MAX_LEVELS+1)] < tm_topology->nb_levels) + tm_topology->nb_levels = hierarchies[i*(TM_MAX_LEVELS+1)]; + /* Crush levels in hierarchies too long (ie > tm_topology->nb_levels)*/ for(i = 0; i < num_nodes ; i++) { - int *base_ptr = hierarchies + i*(MAX_LEVELS+1); + int *base_ptr = hierarchies + i*(TM_MAX_LEVELS+1); int suppl = *base_ptr - tm_topology->nb_levels; for(j = 1 ; j <= suppl ; j++) *(base_ptr + tm_topology->nb_levels) *= *(base_ptr + tm_topology->nb_levels + j); @@ -553,8 +560,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, for(i = 1; i < tm_topology->nb_levels; i++) { /* compute the minimum for each level */ min = hierarchies[i]; for(j = 1; j < num_nodes ; j++) - if( hierarchies[j*(MAX_LEVELS+1) + i] < min) - min = hierarchies[j*(MAX_LEVELS+1) + i]; + if( hierarchies[j*(TM_MAX_LEVELS+1) + i] < min) + min = hierarchies[j*(TM_MAX_LEVELS+1) + i]; tm_topology->arity[i] = min; } } else { @@ -568,24 +575,58 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, fprintf(stdout,"topo_arity[%i] = %i\n", i, tm_topology->arity[i]); #endif /* compute the number of processing elements */ - tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels, sizeof(int)); + tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t)); tm_topology->nb_nodes[0] = 1; for(i = 1 ; i < tm_topology->nb_levels; i++) tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1]; /* Build process id tab */ tm_topology->node_id = (int **)calloc(tm_topology->nb_levels, sizeof(int*)); + tm_topology->node_rank = (int **)malloc(sizeof(int *) * tm_topology->nb_levels); for(i = 0; i < tm_topology->nb_levels; i++) { tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int)); - for (j = 0; j < tm_topology->nb_nodes[i]; j++) - tm_topology->node_id[i][j] = obj_mapping[j]; + tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int)); + /*note : we make the hypothesis that logical indexes in hwloc range from + 0 to N, are contiguous and crescent. */ + + for( j = 0 ; j < tm_topology->nb_nodes[i] ; j++ ){ + tm_topology->node_id[i][j] = j; + tm_topology->node_rank[i][j] = j; + + /* Should use object->logical_index */ + /* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node); + id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/ + /* + int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes); + topology->node_id[i][j] = id; +  topology->node_rank[i][id] = j; + */ + } } + /* unused for now*/ + tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double)); + + tm_topology->nb_proc_units = num_objs_total; + + tm_topology->nb_constraints = 0; + for(i = 0; i < tm_topology->nb_proc_units ; i++) + if (obj_mapping[i] != -1) + tm_topology->nb_constraints++; + tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int)); + for(idx = 0,i = 0; i < tm_topology->nb_proc_units ; i++) + if (obj_mapping[i] != -1) + tm_topology->constraints[idx++] = obj_mapping[i]; + + tm_topology->oversub_fact = 1; + #ifdef __DEBUG__ + assert(num_objs_total == tm_topology->nb_nodes[tm_topology->nb_levels-1]); + for(i = 0; i < tm_topology->nb_levels ; i++) { fprintf(stdout,"tm topo node_id for level [%i] : ",i); dump_int_array("", "", obj_mapping, tm_topology->nb_nodes[i]); } - display_topology(tm_topology); + tm_display_topology(tm_topology); #endif comm_pattern = (double **)malloc(size*sizeof(double *)); @@ -600,32 +641,31 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, #ifdef __DEBUG__ fprintf(stdout,"==== COMM PATTERN ====\n"); for( i = 0 ; i < size ; i++) { - dump_double_array("", "", comm_pattern, size); + dump_double_array("", "", comm_pattern[i], size); } #endif - k = (int *)calloc(num_objs_total, sizeof(int)); - matching = (int *)calloc(size, sizeof(int)); + tm_optimize_topology(&tm_topology); + aff_mat = tm_build_affinity_mat(comm_pattern,size); + comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL); + sol = tm_compute_mapping(tm_topology, comm_tree); + + k = (int *)calloc(sol->k_length, sizeof(int)); + for(idx = 0 ; idx < sol->k_length ; idx++) + k[idx] = sol->k[idx][0]; - tm_opt_topology = optimize_topology(tm_topology); - comm_tree = build_tree_from_topology(tm_opt_topology, comm_pattern, size, NULL, NULL); - map_topology_simple(tm_opt_topology, comm_tree, matching, size, k); #ifdef __DEBUG__ - fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels); dump_int_array("Rank permutation sigma/k : ", "", k, num_objs_total); - dump_int_array("Matching : ", "", matching, size); + assert(size == sol->sigma_length); + dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length); #endif - free(comm_pattern); - free(comm_tree); - free(matching); free(obj_mapping); - for(i = 0 ; i < tm_topology->nb_levels ; i++) - free(tm_topology->node_id[i]); - free(tm_topology->node_id); - free(tm_topology->nb_nodes); - free(tm_topology->arity); - free(tm_topology); - FREE_topology(tm_opt_topology); + free(comm_pattern); + free(aff_mat->sum_row); + free(aff_mat); + tm_free_solution(sol); + tm_free_tree(comm_tree); + tm_free_topology(tm_topology); } } @@ -648,15 +688,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, (*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH; (*newcomm)->c_topo = topo_module; (*newcomm)->c_topo->reorder = reorder; + } else { /* partially distributed reordering */ ompi_communicator_t *localcomm = NULL; int *matching = (int *)calloc(num_procs_in_node,sizeof(int)); int *lrank_to_grank = (int *)calloc(num_procs_in_node,sizeof(int)); int *grank_to_lrank = (int *)calloc(size,sizeof(int)); - hwloc_obj_t object; - opal_hwloc_locality_t locality; - char set_as_string[64]; - opal_value_t kv; if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, colors[rank], rank, &localcomm, false))) @@ -696,8 +733,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, /* The root has now the entire information, so let's crunch it */ if (rank == lindex_to_grank[0]) { tm_topology_t *tm_topology = NULL; - tm_topology_t *tm_opt_topology = NULL; - tree_t *comm_tree = NULL; + tm_tree_t *comm_tree = NULL; + tm_solution_t *sol = NULL; + tm_affinity_mat_t *aff_mat = NULL; double **comm_pattern = NULL; comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *)); @@ -717,7 +755,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, fprintf(stdout,"========== COMM PATTERN ============= \n"); for(i = 0 ; i < num_procs_in_node ; i++){ fprintf(stdout," %i : ",i); - dump_double_array("", "", comm_pattern, num_procs_in_node); + dump_double_array("", "", comm_pattern[i], num_procs_in_node); } fprintf(stdout,"======================= \n"); #endif @@ -725,92 +763,92 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t)); tm_topology->nb_levels = numlevels; tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int)); - tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels, sizeof(int)); + tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t)); tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *)); + tm_topology->node_rank = (int **)malloc(tm_topology->nb_levels*sizeof(int *)); + for(i = 0 ; i < tm_topology->nb_levels ; i++){ int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth); tm_topology->nb_nodes[i] = nb_objs; tm_topology->arity[i] = tracker[i]->arity; - tm_topology->node_id[i] = (int*)malloc(sizeof(int)*nb_objs); - for(j = 0; j < num_procs_in_node; j++) - tm_topology->node_id[i][j] = localrank_to_objnum[j]; - for(; j < nb_objs; tm_topology->node_id[i][j] = -1, j++); /* complete with empty */ + tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int)); + tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int)); + for(j = 0; j < tm_topology->nb_nodes[i] ; j++){ + tm_topology->node_id[i][j] = j; + tm_topology->node_rank[i][j] = j; + } } + /* unused for now*/ + tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double)); + + tm_topology->nb_proc_units = num_objs_in_node; + //tm_topology->nb_proc_units = num_procs_in_node; + tm_topology->nb_constraints = 0; + for(i = 0; i < num_procs_in_node ; i++) + if (localrank_to_objnum[i] != -1) + tm_topology->nb_constraints++; + + tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int)); + for(idx = 0,i = 0; i < num_procs_in_node ; i++) + if (localrank_to_objnum[i] != -1) + tm_topology->constraints[idx++] = localrank_to_objnum[i]; + + tm_topology->oversub_fact = 1; + #ifdef __DEBUG__ + assert(num_objs_in_node == tm_topology->nb_nodes[tm_topology->nb_levels-1]); fprintf(stdout,"Levels in topo : %i | num procs in node : %i\n",tm_topology->nb_levels,num_procs_in_node); for(i = 0; i < tm_topology->nb_levels ; i++){ fprintf(stdout,"Nb objs for level %i : %i | arity %i\n ",i,tm_topology->nb_nodes[i],tm_topology->arity[i]); dump_int_array("", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]); } - display_topology(tm_topology); + tm_display_topology(tm_topology); #endif + tm_optimize_topology(&tm_topology); + aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node); + comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL); + sol = tm_compute_mapping(tm_topology, comm_tree); - tm_opt_topology = optimize_topology(tm_topology); - comm_tree = build_tree_from_topology(tm_opt_topology, comm_pattern, num_procs_in_node, NULL, NULL); - map_topology_simple(tm_opt_topology, comm_tree, matching, num_procs_in_node, NULL); + k = (int *)calloc(sol->k_length, sizeof(int)); + for(idx = 0 ; idx < sol->k_length ; idx++) + k[idx] = sol->k[idx][0]; #ifdef __DEBUG__ - dump_int_array("Matching:", "", matching, num_procs_in_node); + fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels); + dump_int_array("Rank permutation sigma/k : ", "", k, num_procs_in_node); + assert(num_procs_in_node == sol->sigma_length); + dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length); #endif + + free(aff_mat->sum_row); + free(aff_mat); free(comm_pattern); - for(i = 0; i < tm_topology->nb_levels; i++) - free(tm_topology->node_id[i]); - free(tm_topology->node_id); - free(tm_topology->nb_nodes); - free(tm_topology->arity); - free(tm_topology); - FREE_topology(tm_opt_topology); + tm_free_solution(sol); + tm_free_tree(comm_tree); + tm_free_topology(tm_topology); } + /* Todo : Bcast + group creation */ + /* scatter the ranks */ if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_bcast(matching, num_procs_in_node, - MPI_INT,0,localcomm, - localcomm->c_coll->coll_bcast_module))) + MPI_INT,0,localcomm, + localcomm->c_coll->coll_bcast_module))) ERR_EXIT(err); - object = hwloc_get_obj_by_depth(opal_hwloc_topology, - effective_depth, matching[ompi_process_info.my_local_rank]); - if( NULL == object) goto fallback; - hwloc_bitmap_copy(set, object->cpuset); - hwloc_bitmap_singlify(set); - err = hwloc_set_cpubind(opal_hwloc_topology,set,0); - if( -1 == err) goto fallback; + if ( 0 == rank ) + free(k); - /* Report new binding to ORTE/OPAL */ - /* hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set); */ - err = hwloc_bitmap_snprintf(set_as_string, 64, set); - -#ifdef __DEBUG__ - fprintf(stdout,"Bitmap str size : %i\n", err); -#endif - - OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_PMIX_CPUSET); - kv.type = OPAL_STRING; - kv.data.string = strdup(set_as_string); - - (void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv); - OBJ_DESTRUCT(&kv); - - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, - ompi_process_info.cpuset,set_as_string); - OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_PMIX_LOCALITY); - kv.type = OPAL_UINT16; - kv.data.uint16 = locality; - (void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv); - OBJ_DESTRUCT(&kv); - - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm))) { + /* this needs to be optimized but will do for now */ + if (OMPI_SUCCESS != (err = ompi_comm_split(localcomm, 0, newrank, newcomm, false))) ERR_EXIT(err); - } else { - /* Attach the dist_graph to the newly created communicator */ - (*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH; - (*newcomm)->c_topo = topo_module; - (*newcomm)->c_topo->reorder = reorder; - } + /* end of TODO */ + + /* Attach the dist_graph to the newly created communicator */ + (*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH; + (*newcomm)->c_topo = topo_module; + (*newcomm)->c_topo->reorder = reorder; + free(matching); free(grank_to_lrank); free(lrank_to_grank); diff --git a/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.c b/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.c index 00ee56a161..25a6708b2c 100644 --- a/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.c +++ b/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.c @@ -2,13 +2,12 @@ #include #include "IntConstantInitializedVector.h" - int intCIV_isInitialized(int_CIVector * v, int i) { if(v->top == 0) return 0; if(v->from[i] >= 0) - if(v->from[i] < v->top && v->to[v->from[i]] == i) + if(v->from[i] < v->top && v->to[v->from[i]] == i) return 1; return 0; } @@ -45,7 +44,7 @@ int intCIV_set(int_CIVector * v, int i, int val) v->top++; } v->vec[i] = val; - return 0; + return 0; } int intCIV_get(int_CIVector * v, int i) diff --git a/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.h b/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.h index 1b237b1b0e..25e5a1d759 100644 --- a/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.h +++ b/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.h @@ -12,5 +12,4 @@ void intCIV_exit(int_CIVector * v); int intCIV_set(int_CIVector * v, int i, int val); int intCIV_get(int_CIVector * v, int i); - #endif /*INTEGER_CONSTANT_INITIALIZED_VECTOR*/ diff --git a/ompi/mca/topo/treematch/treematch/PriorityQueue.c b/ompi/mca/topo/treematch/treematch/PriorityQueue.c new file mode 100644 index 0000000000..004224e300 --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/PriorityQueue.c @@ -0,0 +1,174 @@ +#include +#include "PriorityQueue.h" + +/* + This comparison function is used to sort elements in key descending order. +*/ +int compfunc(const FiboNode * const, const FiboNode * const); + + + +int compFunc(const FiboNode * const node1, const FiboNode * const node2) +{ + return + ( ( ((QueueElement*)(node1))->key > ((QueueElement*)(node2))->key ) ? -1 : 1); +} + +int PQ_init(PriorityQueue * const q, int size) +{ + int i; + q->size = size; + q->elements = malloc(sizeof(QueueElement *) * size); + for(i=0; i < size; i++) + q->elements[i]=NULL; + return fiboTreeInit((FiboTree *)q, compFunc); +} + +void PQ_exit(PriorityQueue * const q) +{ + + int i; + for(i = 0; i < q->size; i++) + { + if(q->elements[i] != NULL) + free(q->elements[i]); + } + if(q->elements != NULL) + free(q->elements); + fiboTreeExit((FiboTree *)q); +} +void PQ_free(PriorityQueue * const q) +{ + int i; + for(i = 0; i < q->size; i++) + { + if(q->elements[i] != NULL) + free(q->elements[i]); + } + fiboTreeFree((FiboTree *)q); +} + +int PQ_isEmpty(PriorityQueue * const q) +{ + FiboTree * tree = (FiboTree *)q; +/* if the tree root is linked to itself then the tree is empty */ + if(&(tree->rootdat) == (tree->rootdat.linkdat.nextptr)) + return 1; + return 0; +} + +void PQ_insertElement(PriorityQueue * const q, QueueElement * const e) +{ + if(e->value >= 0 && e->value < q->size) + { + fiboTreeAdd((FiboTree *)q, (FiboNode *)(e)); + q->elements[e->value] = e; + e->isInQueue = 1; + } +} +void PQ_deleteElement(PriorityQueue * const q, QueueElement * const e) +{ + fiboTreeDel((FiboTree *)q, (FiboNode *)(e)); + q->elements[e->value] = NULL; + e->isInQueue = 0; +} + +void PQ_insert(PriorityQueue * const q, int val, double key) +{ + if( val >= 0 && val < q->size) + { + QueueElement * e = malloc(sizeof(QueueElement)); + e->value = val; + e->key = key; + PQ_insertElement(q, e); + } +} + +void PQ_delete(PriorityQueue * const q, int val) +{ + QueueElement * e = q->elements[val]; + PQ_deleteElement(q, e); + free(e); +} + +QueueElement * PQ_findMaxElement(PriorityQueue * const q) +{ + QueueElement * e = (QueueElement *)(fiboTreeMin((FiboTree *)q)); + return e; +} +QueueElement * PQ_deleteMaxElement(PriorityQueue * const q) +{ + QueueElement * e = (QueueElement *)(fiboTreeMin((FiboTree *)q)); + if(e != NULL) + { + PQ_deleteElement(q, e); + } + return e; +} + +double PQ_findMaxKey(PriorityQueue * const q) +{ + QueueElement * e = PQ_findMaxElement(q); + if(e!=NULL) + return e->key; + return 0; +} + +int PQ_deleteMax(PriorityQueue * const q) +{ + QueueElement * e = PQ_deleteMaxElement(q); + int res = -1; + if(e != NULL) + res = e->value; + free(e); + return res; +} + +void PQ_increaseElementKey(PriorityQueue * const q, QueueElement * const e, double i) +{ + if(e->isInQueue) + { + PQ_deleteElement(q, e); + e->key += i; + PQ_insertElement(q, e); + } +} +void PQ_decreaseElementKey(PriorityQueue * const q, QueueElement * const e, double i) +{ + if(e->isInQueue) + { + PQ_deleteElement(q, e); + e->key -= i; + PQ_insertElement(q, e); + } +} +void PQ_adjustElementKey(PriorityQueue * const q, QueueElement * const e, double i) +{ + if(e->isInQueue) + { + PQ_deleteElement(q, e); + e->key = i; + PQ_insertElement(q, e); + } +} + +void PQ_increaseKey(PriorityQueue * const q, int val, double i) +{ + QueueElement * e = q->elements[val]; + if(e != NULL) + PQ_increaseElementKey(q, e, i); +} + +void PQ_decreaseKey(PriorityQueue * const q, int val, double i) +{ + QueueElement * e = q->elements[val]; + if(e != NULL) + PQ_decreaseElementKey(q, e, i); +} + +void PQ_adjustKey(PriorityQueue * const q, int val, double i) +{ + QueueElement * e = q->elements[val]; + if(e != NULL) + PQ_adjustElementKey(q, e, i); +} diff --git a/ompi/mca/topo/treematch/treematch/PriorityQueue.h b/ompi/mca/topo/treematch/treematch/PriorityQueue.h new file mode 100644 index 0000000000..c9ef1d2291 --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/PriorityQueue.h @@ -0,0 +1,108 @@ +#ifndef PRIORITY_QUEUE +#define PRIORITY_QUEUE + +#include "fibo.h" + +/* + This is the struct for our elements in a PriorityQueue. + The node is at first place so we only have to use a cast to switch between QueueElement's pointer and Fibonode's pointer. +*/ +typedef struct QueueElement_ +{ + FiboNode node; /*the node used to insert the element in a FiboTree*/ + double key; /*the key of the element, elements are sorted in a descending order according to their key*/ + int value; + int isInQueue; +} QueueElement; + +typedef struct PriorityQueue_ +{ + FiboTree tree; + QueueElement ** elements; /*a vector of element with their value as key so we can easily retreive an element from its value */ + int size; /*the size allocated to the elements vector*/ +} PriorityQueue; + + +/* + PQ_init initiates a PriorityQueue with a size given in argument and sets compFunc as comparison function. Note that you have to allocate memory to the PriorityQueue pointer before calling this function. + Returns : + 0 if success + !0 if failed + + PQ_free simply empties the PriorityQueue but does not free the memory used by its elements. + PQ_exit destroys the PriorityQueue without freeing elements. The PriorityQueue is no longer usable without using PQ_init again. +Note that the PriorityQueue pointer is not deallocated. +*/ +int PQ_init(PriorityQueue * const, int size); +void PQ_free(PriorityQueue * const); +void PQ_exit(PriorityQueue * const); + +/* + PQ_isEmpty returns 1 if the PriorityQueue is empty, 0 otherwise. +*/ +int PQ_isEmpty(PriorityQueue * const); + +/* + PQ_insertElement inserts the given QueueElement in the given PriorityQueue +*/ +void PQ_insertElement(PriorityQueue * const, QueueElement * const); +/* + PQ_deleteElement delete the element given in argument from the PriorityQueue. +*/ +void PQ_deleteElement(PriorityQueue * const, QueueElement * const); + +/* + PQ_insert inserts an element in the PriorityQueue with the value and key given in argument. +*/ +void PQ_insert(PriorityQueue * const, int val, double key); +/* + PQ_delete removes the first element found with the value given in argument and frees it. +*/ +void PQ_delete(PriorityQueue * const, int val); + + +/* + PQ_findMaxElement returns the QueueElement with the greatest key in the given PriorityQueue +*/ +QueueElement * PQ_findMaxElement(PriorityQueue * const); +/* + PQ_deleteMaxElement returns the QueueElement with the geatest key in the given PriorityQueue and removes it from the queue. +*/ +QueueElement * PQ_deleteMaxElement(PriorityQueue * const); + +/* + PQ_findMax returns the key of the element with the geatest key in the given PriorityQueue +*/ +double PQ_findMaxKey(PriorityQueue * const); +/* + PQ_deleteMax returns the value of the element with the greatest key in the given PriorityQueue and removes it from the queue. +*/ +int PQ_deleteMax(PriorityQueue * const); + +/* + PQ_increaseElementKey adds the value of i to the key of the given QueueElement +*/ +void PQ_increaseElementKey(PriorityQueue * const, QueueElement * const, double i); +/* + PQ_decreaseElementKey substracts the value of i from the key of the given QueueElement +*/ +void PQ_decreaseElementKey(PriorityQueue * const, QueueElement * const, double i); +/* + PQ_adjustElementKey sets to i the key of the given QueueElement. +*/ +void PQ_adjustElementKey(PriorityQueue * const, QueueElement * const, double i); + +/* + PQ_increaseKey adds i to the key of the first element found with a value equal to val in the PriorityQueue. +*/ +void PQ_increaseKey(PriorityQueue * const, int val, double i); +/* + PQ_decreaseKey substracts i from the key of the first element found with a value equal to val in the PriorityQueue. +*/ +void PQ_decreaseKey(PriorityQueue * const, int val, double i); +/* + PQ_adjustKey sets to i the key of the first element found with a value equal to val in the PriorityQueue. +*/ +void PQ_adjustKey(PriorityQueue * const, int val, double i); + +#endif /*PRIORITY_QUEUE*/ diff --git a/ompi/mca/topo/treematch/treematch/fibo.c b/ompi/mca/topo/treematch/treematch/fibo.c new file mode 100644 index 0000000000..97070e7273 --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/fibo.c @@ -0,0 +1,372 @@ +/* Copyright 2010 IPB, INRIA & CNRS +** +** This file originally comes from the Scotch software package for +** static mapping, graph partitioning and sparse matrix ordering. +** +** This software is governed by the CeCILL-B license under French law +** and abiding by the rules of distribution of free software. You can +** use, modify and/or redistribute the software under the terms of the +** CeCILL-B license as circulated by CEA, CNRS and INRIA at the following +** URL: "http://www.cecill.info". +** +** As a counterpart to the access to the source code and rights to copy, +** modify and redistribute granted by the license, users are provided +** only with a limited warranty and the software's author, the holder of +** the economic rights, and the successive licensors have only limited +** liability. +** +** In this respect, the user's attention is drawn to the risks associated +** with loading, using, modifying and/or developing or reproducing the +** software by the user in light of its specific status of free software, +** that may mean that it is complicated to manipulate, and that also +** therefore means that it is reserved for developers and experienced +** professionals having in-depth computer knowledge. Users are therefore +** encouraged to load and test the software's suitability as regards +** their requirements in conditions enabling the security of their +** systems and/or data to be ensured and, more generally, to use and +** operate it in the same conditions as regards security. +** +** The fact that you are presently reading this means that you have had +** knowledge of the CeCILL-B license and that you accept its terms. +*/ +/************************************************************/ +/** **/ +/** NAME : fibo.c **/ +/** **/ +/** AUTHOR : Francois PELLEGRINI **/ +/** **/ +/** FUNCTION : This module handles Fibonacci trees. **/ +/** **/ +/** DATES : # Version 1.0 : from : 01 may 2010 **/ +/** to 12 may 2010 **/ +/** **/ +/************************************************************/ + +/* +** The defines and includes. +*/ + +#define FIBO + +#include +#include +#include +#include "fibo.h" + +/* Helper macros which can be redefined at compile time. */ + +#ifndef INT +#define INT int /* "long long" can be used on 64-bit systems */ +#endif /* INT */ + +#ifndef errorPrint +#define errorPrint(s) fprintf (stderr, s) +#endif /* errorPrint */ + +#ifndef memAlloc +#define memAlloc malloc +#define memSet memset +#define memFree free +#endif /* memAlloc */ + +/*********************************************/ +/* */ +/* These routines deal with Fibonacci trees. */ +/* */ +/*********************************************/ + +/* This routine initializes a Fibonacci +** tree structure. +** It returns: +** - 0 : in case of success. +** - !0 : on error. +*/ + +int +fiboTreeInit ( +FiboTree * const treeptr, +int (* cmpfptr) (const FiboNode * const, const FiboNode * const)) +{ + if ((treeptr->degrtab = (FiboNode **) memAlloc ((sizeof (INT) << 3) * sizeof (FiboNode *))) == NULL) /* As many cells as there are bits in an INT */ + return (1); + + memSet (treeptr->degrtab, 0, (sizeof (INT) << 3) * sizeof (FiboNode *)); /* Make degree array ready for consolidation: all cells set to NULL */ + + treeptr->rootdat.linkdat.prevptr = /* Link root node to itself */ + treeptr->rootdat.linkdat.nextptr = &treeptr->rootdat; + treeptr->cmpfptr = cmpfptr; + + return (0); +} + +/* This routine flushes the contents of +** the given Fibonacci tree. +** It returns: +** - VOID : in all cases. +*/ + +void +fiboTreeExit ( +FiboTree * const treeptr) +{ + if (treeptr->degrtab != NULL) + memFree (treeptr->degrtab); +} + +/* This routine flushes the contents of +** the given Fibonacci tree. It does not +** free any of its contents, but instead +** makes the tree structure look empty again. +** It returns: +** - VOID : in all cases. +*/ + +void +fiboTreeFree ( +FiboTree * const treeptr) +{ + treeptr->rootdat.linkdat.prevptr = /* Link root node to itself */ + treeptr->rootdat.linkdat.nextptr = &treeptr->rootdat; +} + +/* This routine perform the consolidation +** of roots per degree. It returns the best +** element found because this element is not +** recorded in the data structure itself. +** It returns: +** - !NULL : pointer to best element found. +** - NULL : Fibonacci tree is empty. +*/ + +FiboNode * +fiboTreeConsolidate ( +FiboTree * const treeptr) +{ + FiboNode ** restrict degrtab; + int degrmax; + int degrval; + FiboNode * rootptr; + FiboNode * nextptr; + FiboNode * bestptr; + + degrtab = treeptr->degrtab; + + for (rootptr = treeptr->rootdat.linkdat.nextptr, nextptr = rootptr->linkdat.nextptr, degrmax = 0; /* For all roots in root list */ + rootptr != &treeptr->rootdat; ) { + degrval = rootptr->deflval >> 1; /* Get degree, getting rid of flag part */ +#ifdef FIBO_DEBUG + if (degrval >= (sizeof (INT) << 3)) + errorPrint ("fiboTreeConsolidate: invalid node degree"); +#endif /* FIBO_DEBUG */ + if (degrtab[degrval] == NULL) { /* If no tree with same degree already found */ + if (degrval > degrmax) /* Record highest degree found */ + degrmax = degrval; + + degrtab[degrval] = rootptr; /* Record tree as first tree with this degree */ + rootptr = nextptr; /* Process next root in list during next iteration */ + nextptr = rootptr->linkdat.nextptr; + } + else { + FiboNode * oldrptr; /* Root which will no longer be a root */ + FiboNode * chldptr; + + oldrptr = degrtab[degrval]; /* Assume old root is worse */ + if (treeptr->cmpfptr (oldrptr, rootptr) <= 0) { /* If old root is still better */ + oldrptr = rootptr; /* This root will be be linked to it */ + rootptr = degrtab[degrval]; /* We will go on processing this root */ + } + + degrtab[degrval] = NULL; /* Remaining root changes degree so leaves this cell */ + fiboTreeUnlink (oldrptr); /* Old root is no longer a root */ + oldrptr->deflval &= ~1; /* Whatever old root flag was, it is reset to 0 */ + oldrptr->pareptr = rootptr; /* Remaining root is now father of old root */ + + chldptr = rootptr->chldptr; /* Get first child of remaining root */ + if (chldptr != NULL) { /* If remaining root had already some children, link old root with them */ + rootptr->deflval += 2; /* Increase degree by 1, that is, by 2 with left shift in deflval */ + fiboTreeLinkAfter (chldptr, oldrptr); + } + else { /* Old root becomes first child of remaining root */ + rootptr->deflval = 2; /* Real degree set to 1, and flag set to 0 */ + rootptr->chldptr = oldrptr; + oldrptr->linkdat.prevptr = /* Chain old root to oneself as only child */ + oldrptr->linkdat.nextptr = oldrptr; + } + } /* Process again remaining root as its degree has changed */ + } + + bestptr = NULL; + for (degrval = 0; degrval <= degrmax; degrval ++) { + if (degrtab[degrval] != NULL) { /* If some tree is found */ + bestptr = degrtab[degrval]; /* Record it as potential best */ + degrtab[degrval] = NULL; /* Clean-up used part of array */ + degrval ++; /* Go on at next cell in next loop */ + break; + } + } + for ( ; degrval <= degrmax; degrval ++) { /* For remaining roots once a potential best root has been found */ + if (degrtab[degrval] != NULL) { + if (treeptr->cmpfptr (degrtab[degrval], bestptr) < 0) /* If new root is better */ + bestptr = degrtab[degrval]; /* Record new root as best root */ + degrtab[degrval] = NULL; /* Clean-up used part of array */ + } + } + + return (bestptr); +} + +/* This routine returns the node of minimum +** key in the given tree. The node is searched +** for each time this routine is called, so this +** information should be recorded if needed. +** This is the non-macro version, for testing +** and setting up breakpoints. +** It returns: +** - !NULL : pointer to best element found. +** - NULL : Fibonacci tree is empty. +*/ + +#ifndef fiboTreeMin + +FiboNode * +fiboTreeMin ( +FiboTree * const treeptr) +{ + FiboNode * bestptr; + + bestptr = fiboTreeMinMacro (treeptr); + +#ifdef FIBO_DEBUG + fiboTreeCheck (treeptr); +#endif /* FIBO_DEBUG */ + + return (bestptr); +} + +#endif /* fiboTreeMin */ + +/* This routine adds the given node to the +** given tree. This is the non-macro version, +** for testing and setting up breakpoints. +** It returns: +** - void : in all cases. +*/ + +#ifndef fiboTreeAdd + +void +fiboTreeAdd ( +FiboTree * const treeptr, +FiboNode * const nodeptr) +{ + fiboTreeAddMacro (treeptr, nodeptr); + +#ifdef FIBO_DEBUG + fiboTreeCheck (treeptr); +#endif /* FIBO_DEBUG */ +} + +#endif /* fiboTreeAdd */ + +/* This routine deletes the given node from +** the given tree, whatever ths node is (root +** or non root). This is the non-macro version, +** for testing and setting up breakpoints. +** It returns: +** - void : in all cases. +*/ + +#ifndef fiboTreeDel + +void +fiboTreeDel ( +FiboTree * const treeptr, +FiboNode * const nodeptr) +{ + fiboTreeDelMacro (treeptr, nodeptr); + +#ifdef FIBO_DEBUG + nodeptr->pareptr = + nodeptr->chldptr = + nodeptr->linkdat.prevptr = + nodeptr->linkdat.nextptr = NULL; + + fiboTreeCheck (treeptr); +#endif /* FIBO_DEBUG */ +} + +#endif /* fiboTreeDel */ + +/* This routine checks the consistency of the +** given linked list. +** It returns: +** - !NULL : pointer to the vertex. +** - NULL : if no such vertex available. +*/ + +#ifdef FIBO_DEBUG + +static +int +fiboTreeCheck2 ( +const FiboNode * const nodeptr) +{ + FiboNode * chldptr; + int degrval; + + degrval = 0; + chldptr = nodeptr->chldptr; + if (chldptr != NULL) { + do { + if (chldptr->linkdat.nextptr->linkdat.prevptr != chldptr) { + errorPrint ("fiboTreeCheck: bad child linked list"); + return (1); + } + + if (chldptr->pareptr != nodeptr) { + errorPrint ("fiboTreeCheck: bad child parent"); + return (1); + } + + if (fiboTreeCheck2 (chldptr) != 0) + return (1); + + degrval ++; + chldptr = chldptr->linkdat.nextptr; + } while (chldptr != nodeptr->chldptr); + } + + if (degrval != (nodeptr->deflval >> 1)) { /* Real node degree is obtained by discarding lowest bit */ + errorPrint ("fiboTreeCheck2: invalid child information"); + return (1); + } + + return (0); +} + +int +fiboTreeCheck ( +const FiboTree * const treeptr) +{ + FiboNode * nodeptr; + + for (nodeptr = treeptr->rootdat.linkdat.nextptr; + nodeptr != &treeptr->rootdat; nodeptr = nodeptr->linkdat.nextptr) { + if (nodeptr->linkdat.nextptr->linkdat.prevptr != nodeptr) { + errorPrint ("fiboTreeCheck: bad root linked list"); + return (1); + } + + if (nodeptr->pareptr != NULL) { + errorPrint ("fiboTreeCheck: bad root parent"); + return (1); + } + + if (fiboTreeCheck2 (nodeptr) != 0) + return (1); + } + + return (0); +} + +#endif /* FIBO_DEBUG */ diff --git a/ompi/mca/topo/treematch/treematch/fibo.h b/ompi/mca/topo/treematch/treematch/fibo.h new file mode 100644 index 0000000000..32e0a7c082 --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/fibo.h @@ -0,0 +1,205 @@ +/* Copyright 2010 IPB, INRIA & CNRS +** +** This file originally comes from the Scotch software package for +** static mapping, graph partitioning and sparse matrix ordering. +** +** This software is governed by the CeCILL-B license under French law +** and abiding by the rules of distribution of free software. You can +** use, modify and/or redistribute the software under the terms of the +** CeCILL-B license as circulated by CEA, CNRS and INRIA at the following +** URL: "http://www.cecill.info". +** +** As a counterpart to the access to the source code and rights to copy, +** modify and redistribute granted by the license, users are provided +** only with a limited warranty and the software's author, the holder of +** the economic rights, and the successive licensors have only limited +** liability. +** +** In this respect, the user's attention is drawn to the risks associated +** with loading, using, modifying and/or developing or reproducing the +** software by the user in light of its specific status of free software, +** that may mean that it is complicated to manipulate, and that also +** therefore means that it is reserved for developers and experienced +** professionals having in-depth computer knowledge. Users are therefore +** encouraged to load and test the software's suitability as regards +** their requirements in conditions enabling the security of their +** systems and/or data to be ensured and, more generally, to use and +** operate it in the same conditions as regards security. +** +** The fact that you are presently reading this means that you have had +** knowledge of the CeCILL-B license and that you accept its terms. +*/ +/************************************************************/ +/** **/ +/** NAME : fibo.h **/ +/** **/ +/** AUTHOR : Francois PELLEGRINI **/ +/** **/ +/** FUNCTION : This module contains the definitions of **/ +/** the generic Fibonacci trees. **/ +/** **/ +/** DATES : # Version 1.0 : from : 01 may 2010 **/ +/** to 12 may 2010 **/ +/** **/ +/** NOTES : # Since this module has originally been **/ +/** designed as a gain keeping data **/ +/** structure for local optimization **/ +/** algorithms, the computation of the **/ +/** best node is only done when actually **/ +/** searching for it. **/ +/** This is most useful when many **/ +/** insertions and deletions can take **/ +/** place in the mean time. This is why **/ +/** this data structure does not keep **/ +/** track of the best node, unlike most **/ +/** implementations do. **/ +/** **/ +/************************************************************/ + +/* +** The type and structure definitions. +*/ + +/* The doubly linked list structure. */ + +typedef struct FiboLink_ { + struct FiboNode_ * prevptr; /*+ Pointer to previous sibling element +*/ + struct FiboNode_ * nextptr; /*+ Pointer to next sibling element +*/ +} FiboLink; + +/* The tree node data structure. The deflval + variable merges degree and flag variables. + The degree of a node is smaller than + "bitsizeof (INT)", so it can hold on an + "int". The flag value is stored in the + lowest bit of the value. */ + + +typedef struct FiboNode_ { + struct FiboNode_ * pareptr; /*+ Pointer to parent element, if any +*/ + struct FiboNode_ * chldptr; /*+ Pointer to first child element, if any +*/ + FiboLink linkdat; /*+ Pointers to sibling elements +*/ + int deflval; /*+ Lowest bit: flag value; other bits: degree value +*/ +} FiboNode; + +/* The tree data structure. The fake dummy node aims + at handling root node insertion without any test. + This is important as many insertions have to be + performed. */ + +typedef struct FiboTree_ { + FiboNode rootdat; /*+ Dummy node for fast root insertion +*/ + FiboNode ** restrict degrtab; /*+ Consolidation array of size "bitsizeof (INT)" +*/ + int (* cmpfptr) (const FiboNode * const, const FiboNode * const); /*+ Comparison routine +*/ +} FiboTree; + +/* +** The marco definitions. +*/ + +/* This is the core of the module. All of + the algorithms have been de-recursived + and written as macros. */ + +#define fiboTreeLinkAfter(o,n) do { \ + FiboNode * nextptr; \ + nextptr = (o)->linkdat.nextptr; \ + (n)->linkdat.nextptr = nextptr; \ + (n)->linkdat.prevptr = (o); \ + nextptr->linkdat.prevptr = (n); \ + (o)->linkdat.nextptr = (n); \ + } while (0) + +#define fiboTreeUnlink(n) do { \ + (n)->linkdat.prevptr->linkdat.nextptr = (n)->linkdat.nextptr; \ + (n)->linkdat.nextptr->linkdat.prevptr = (n)->linkdat.prevptr; \ + } while (0) + +#define fiboTreeAddMacro(t,n) do { \ + (n)->pareptr = NULL; \ + (n)->chldptr = NULL; \ + (n)->deflval = 0; \ + fiboTreeLinkAfter (&((t)->rootdat), (n)); \ + } while (0) + +#define fiboTreeMinMacro(t) (fiboTreeConsolidate (t)) + +#define fiboTreeCutChildren(t,n) do { \ + FiboNode * chldptr; \ + chldptr = (n)->chldptr; \ + if (chldptr != NULL) { \ + FiboNode * cendptr; \ + cendptr = chldptr; \ + do { \ + FiboNode * nextptr; \ + nextptr = chldptr->linkdat.nextptr; \ + chldptr->pareptr = NULL; \ + fiboTreeLinkAfter (&((t)->rootdat), chldptr); \ + chldptr = nextptr; \ + } while (chldptr != cendptr); \ + } \ + } while (0) + +#define fiboTreeDelMacro(t,n) do { \ + FiboNode * pareptr; \ + FiboNode * rghtptr; \ + pareptr = (n)->pareptr; \ + fiboTreeUnlink (n); \ + fiboTreeCutChildren ((t), (n)); \ + if (pareptr == NULL) \ + break; \ + rghtptr = (n)->linkdat.nextptr; \ + while (1) { \ + FiboNode * gdpaptr; \ + int deflval; \ + deflval = pareptr->deflval - 2; \ + pareptr->deflval = deflval | 1; \ + gdpaptr = pareptr->pareptr; \ + pareptr->chldptr = (deflval <= 1) ? NULL : rghtptr; \ + if (((deflval & 1) == 0) || (gdpaptr == NULL)) \ + break; \ + rghtptr = pareptr->linkdat.nextptr; \ + fiboTreeUnlink (pareptr); \ + pareptr->pareptr = NULL; \ + fiboTreeLinkAfter (&((t)->rootdat), pareptr); \ + pareptr = gdpaptr; \ + } \ + } while (0) + +/* +** The function prototypes. +*/ + +/* This set of definitions allows the user + to specify whether he prefers to use + the fibonacci routines as macros or as + regular functions, for instance for + debugging. */ + +#define fiboTreeAdd fiboTreeAddMacro +/* #define fiboTreeDel fiboTreeDelMacro */ +/* #define fiboTreeMin fiboTreeMinMacro */ + +#ifndef FIBO +#define static +#endif + +int fiboTreeInit (FiboTree * const, int (*) (const FiboNode * const, const FiboNode * const)); +void fiboTreeExit (FiboTree * const); +void fiboTreeFree (FiboTree * const); +FiboNode * fiboTreeConsolidate (FiboTree * const); +#ifndef fiboTreeAdd +void fiboTreeAdd (FiboTree * const, FiboNode * const); +#endif /* fiboTreeAdd */ +#ifndef fiboTreeDel +void fiboTreeDel (FiboTree * const, FiboNode * const); +#endif /* fiboTreeDel */ +#ifndef fiboTreeMin +FiboNode * fiboTreeMin (FiboTree * const); +#endif /* fiboTreeMin */ +#ifdef FIBO_DEBUG +int fiboTreeCheck (const FiboTree * const); +static int fiboTreeCheck2 (const FiboNode * const); +#endif /* FIBO_DEBUG */ + +#undef static diff --git a/ompi/mca/topo/treematch/treematch/k-partitioning.c b/ompi/mca/topo/treematch/treematch/k-partitioning.c new file mode 100644 index 0000000000..f035ffa24a --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/k-partitioning.c @@ -0,0 +1,339 @@ +#include +#include +#include "k-partitioning.h" +#include "tm_mt.h" +#include "tm_verbose.h" + +void memory_allocation(PriorityQueue ** Q, PriorityQueue ** Qinst, double *** D, int n, int k); +void initialization(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int k, int * const deficit, int * const surplus); +void algo(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int * const deficit, int * const surplus); +double nextGain(PriorityQueue * const Qpart, PriorityQueue * const Q, int * const deficit, int * const surplus); +void balancing(int n, int deficit, int surplus, double ** const D, int * const part); +void destruction(PriorityQueue * Qpart, PriorityQueue * Q, PriorityQueue * Qinst, double ** D, int n, int k); + +void allocate_vertex2(int u, int *res, double **comm, int n, int *size, int max_size); +double eval_cost2(int *,int,double **); +int *kpartition_greedy2(int k, double **comm, int n, int nb_try_max, int *constraints, int nb_constraints); +int* build_p_vector(double **comm, int n, int k, int greedy_trials, int * constraints, int nb_constraints); + +int* kPartitioning(double ** comm, int n, int k, int * constraints, int nb_constraints, int greedy_trials) +{ + /* ##### declarations & allocations ##### */ + + PriorityQueue Qpart, *Q = NULL, *Qinst = NULL; + double **D = NULL; + int deficit, surplus, *part = NULL; + int real_n = n-nb_constraints; + + part = build_p_vector(comm, n, k, greedy_trials, constraints, nb_constraints); + + memory_allocation(&Q, &Qinst, &D, real_n, k); + + /* ##### Initialization ##### */ + + initialization(part, comm, &Qpart, Q, Qinst, D, real_n, k, &deficit, &surplus); + + /* ##### Main loop ##### */ + while((nextGain(&Qpart, Q, &deficit, &surplus))>0) + { + algo(part, comm, &Qpart, Q, Qinst, D, real_n, &deficit, &surplus); + } + + /* ##### Balancing the partition ##### */ + balancing(real_n, deficit, surplus, D, part); /*if partition isn't balanced we have to make one last move*/ + + /* ##### Memory deallocation ##### */ + destruction(&Qpart, Q, Qinst, D, real_n, k); + + return part; +} + +void memory_allocation(PriorityQueue ** Q, PriorityQueue ** Qinst, double *** D, int n, int k) +{ + int i; + *Q = calloc(k, sizeof(PriorityQueue)); /*one Q for each partition*/ + *Qinst = calloc(n, sizeof(PriorityQueue)); /*one Qinst for each vertex*/ + *D = malloc(sizeof(double *) * n); /*D's size is n * k*/ + for(i=0; i < n; ++i) + (*D)[i] = calloc(k, sizeof(double)); +} + +void initialization(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int k, int * const deficit, int * const surplus) +{ + int i,j; + + /* ##### PriorityQueue initializations ##### */ + /* We initialize Qpart with a size of k because it contains the subsets's indexes. */ + PQ_init(Qpart, k); + + /* We initialize each Q[i] with a size of n because each vertex is in one of these queue at any time. */ + /* However we could set a size of (n/k)+1 as this is the maximum size of a subset when the partition is not balanced. */ + for(i=0; i= CRITICAL) + fprintf(stderr,"Error Max element in priority queue negative!\n"); + exit(-1); + } + *surplus = j; /*this subset becomes surplus*/ + + for(v=0; v < n; ++v) /*we scan though all edges (u,v) */ + { + j = part[u]; /*we set j to the starting subset */ + D[v][j]= D[v][j] - matrice[u][v]; /*we compute the new D[v, i] (here j has the value of the starting subset of u, that's why we say i) */ + PQ_adjustKey(&Qinst[v], j, D[v][j]); /*we update this gain in Qinst[v]*/ + j = *surplus; /*we put back the arrival subset in j*/ + D[v][j] = D[v][j] + matrice[u][v]; /*matrice[u][v]; we compute the new D[v, j]*/ + PQ_adjustKey(&Qinst[v], j, D[v][j]);/*we update this gain in Qinst[v]*/ + d = PQ_findMaxKey(&Qinst[v]) - D[v][part[v]]; /*we compute v's new highest possible gain*/ + PQ_adjustKey(&Q[part[v]], v, d); /*we update it in Q[p[v]]*/ + d = PQ_findMaxKey(&Q[part[v]]); /*we get the highest possible gain in v's subset*/ + PQ_adjustKey(Qpart, part[v], d); /*we update it in Qpart*/ + } + part[u] = *surplus; /*we move u from i to j (here surplus has the value of j the arrival subset)*/ + + d = PQ_findMaxKey(&Qinst[u]) - D[u][part[u]]; /*we compute the new u's highest possible gain*/ + if(!PQ_isEmpty(&Qinst[u])) /*if at least one more move of u is possible*/ + PQ_insert(&Q[part[u]], u, d); /*we insert u in the Q queue of its new subset*/ + PQ_adjustKey(Qpart, part[u], d); /*we update the new highest possible gain in u's subset*/ +} + +double nextGain(PriorityQueue * const Qpart, PriorityQueue * const Q, int * const deficit, int * const surplus) +{ + double res; + if(*deficit == *surplus) /*if the current partition is balanced*/ + res = PQ_findMaxKey(Qpart); /*we get the highest possible gain*/ + else /*the current partition is not balanced*/ + res = PQ_findMaxKey(&Q[*surplus]); /*we get the highest possible gain from surplus*/ + return res; +} + +void balancing(int n, int deficit, int surplus, double ** const D, int * const part) +{ + if(surplus != deficit) /*if the current partition is not balanced*/ + { + int i; + PriorityQueue moves; /*we use a queue to store the possible moves from surplus to deficit*/ + PQ_init(&moves, n); + for(i=0; i= max_size) + continue; + /* find a vertex not already partitionned*/ + do{ + /* call the mersenne twister PRNG of tm_mt.c*/ + j = genrand_int32() % n; + } while ( res[j] != -1 ); + /* allocate and update size of partition*/ + res[j] = i; + /* printf("random: %d -> %d\n",j,i); */ + size[i]++; + } + + /* allocate each unallocated vertices in the partition that maximize the communication*/ + for( i = 0 ; i < n ; ++i ) + if( res[i] == -1) + allocate_vertex2(i, res, comm, n-nb_constraints, size, max_size); + + cost = eval_cost2(res,n-nb_constraints,comm); + /*print_1D_tab(res,n); + printf("cost=%.2f\n",cost);*/ + if((cost best_cost)){ + best_cost = cost; + best_part = res[i]; + } + } + } + + /* printf("size[%d]: %d\n",best_part, size[best_part]);*/ + /* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */ + + res[u] = best_part; + size[best_part]++; +} + +double eval_cost2(int *partition, int n, double **comm) +{ + double cost = 0; + int i,j; + + for( i = 0 ; i < n ; ++i ) + for( j = i+1 ; j < n ; ++j ) + if(partition[i] != partition[j]) + cost += comm[i][j]; + + return cost; +} + +int* build_p_vector(double **comm, int n, int k, int greedy_trials, int * constraints, int nb_constraints) +{ + int * part = NULL; + if(greedy_trials>0) /*if greedy_trials > 0 then we use kpartition_greedy with greedy_trials trials*/ + { + part = kpartition_greedy2(k, comm, n, greedy_trials, constraints, nb_constraints); + } + else + { + int * size = calloc(k, sizeof(int)); + int i,j; + int nodes_per_part = n/k; + int nb_real_nodes = n-nb_constraints; + part = malloc(sizeof(int) * n); + for(i=0; i 0 : use of kpartition_greedy with greedy_trials number of trials + */ + +int* kPartitioning(double ** comm, int n, int k, int * const constraints, int nb_constraints, int greedy_trials); + +#endif /*K_PARTITIONING*/ diff --git a/ompi/mca/topo/treematch/treematch/tgt_map.c b/ompi/mca/topo/treematch/treematch/tgt_map.c deleted file mode 100644 index ea0a35542a..0000000000 --- a/ompi/mca/topo/treematch/treematch/tgt_map.c +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -//#include "tm_hwloc.h" -#include "tm_tree.h" -#include "tm_mapping.h" -#include "tm_timings.h" - - - -int main(int argc, char**argv){; - tree_t *comm_tree=NULL; - double **comm,**arch; - tm_topology_t *topology; - int nb_processes,nb_cores; - int *sol,*k; - if(argc<3){ - fprintf(stderr,"Usage: %s \n",argv[0]); - return -1; - } - - topology=tgt_to_tm(argv[1],&arch); - optimize_topology(&topology); - nb_processes=build_comm(argv[2],&comm); - sol=(int*)MALLOC(sizeof(int)*nb_processes); - - nb_cores=nb_processing_units(topology); - k=(int*)MALLOC(sizeof(int)*nb_cores); - // TreeMatchMapping(nb_processes,nb_cores,comm,sol); - - if(nb_processes>nb_cores){ - fprintf(stderr,"Error: to many processes (%d) for this topology (%d nodes)\n",nb_processes,nb_cores); - exit(-1); - } - TIC; - comm_tree=build_tree_from_topology(topology,comm,nb_processes,NULL,NULL); - map_topology_simple(topology,comm_tree,sol,k); - double duration=TOC; - printf("mapping duration: %f\n",duration); - printf("TreeMatch: "); - print_sol_inv(nb_processes,sol,comm,arch); - //print_1D_tab(k,nb_cores); -// display_other_heuristics(topology,nb_processes,comm,arch); - - //display_tab(arch,nb_cores); - - FREE_topology(topology); - //FREE_tree(comm_tree); - FREE(sol); - FREE(comm); - FREE(arch); - - - - return 0; -} diff --git a/ompi/mca/topo/treematch/treematch/tgt_to_mat.c b/ompi/mca/topo/treematch/treematch/tgt_to_mat.c deleted file mode 100644 index 1e65a21a94..0000000000 --- a/ompi/mca/topo/treematch/treematch/tgt_to_mat.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include "tm_hwloc.h" -#include "tm_tree.h" -#include "tm_mapping.h" -#include "tm_timings.h" - - - -int main(int argc, char**argv){; - tm_topology_t *topology; - int nb_cores; - double **arch; - if(argc<2){ - fprintf(stderr,"Usage: %s \n",argv[0]); - return -1; - } - - topology=tgt_to_tm(argv[1],&arch); - nb_cores=nb_nodes(topology); - - display_tab(arch,nb_cores); - - FREE_topology(topology); - FREE(arch); - - - - return 0; -} diff --git a/ompi/mca/topo/treematch/treematch/tm_bucket.c b/ompi/mca/topo/treematch/treematch/tm_bucket.c index 28e7664574..59d93e8e00 100644 --- a/ompi/mca/topo/treematch/treematch/tm_bucket.c +++ b/ompi/mca/topo/treematch/treematch/tm_bucket.c @@ -31,7 +31,7 @@ static int ilog2(int val) static int verbose_level = ERROR; -bucket_list_t global_bl = {0}; +bucket_list_t global_bl; int tab_cmp(const void*,const void*); int old_bucket_id(int,int,bucket_list_t); @@ -47,12 +47,12 @@ void fill_buckets(bucket_list_t); int is_power_of_2(int); void partial_sort(bucket_list_t *,double **,int); void next_bucket_elem(bucket_list_t,int *,int *); -int add_edge_3(tree_t *,tree_t *,int,int,int *); -void FREE_bucket(bucket_t *); -void FREE_tab_bucket(bucket_t **,int); -void FREE_bucket_list(bucket_list_t); -void partial_update_val (int nb_args, void **args); - +int add_edge_3(tm_tree_t *,tm_tree_t *,int,int,int *); +void free_bucket(bucket_t *); +void free_tab_bucket(bucket_t **,int); +void free_bucket_list(bucket_list_t); +void partial_update_val (int nb_args, void **args, int thread_id); +double bucket_grouping(tm_affinity_mat_t *,tm_tree_t *, tm_tree_t *, int ,int); int tab_cmp(const void* x1,const void* x2) { int *e1 = NULL,*e2 = NULL,i1,i2,j1,j2; @@ -146,7 +146,7 @@ void check_bucket(bucket_t *b,double **tab,double inf, double sup) j = b->bucket[k].j; if((tab[i][j] < inf) || (tab[i][j] > sup)){ if(verbose_level >= CRITICAL) - printf("[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup); + fprintf(stderr,"[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup); exit(-1); } } @@ -197,15 +197,20 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list) n = bucket_list->nb_buckets; size = N*N/n; /* display_bucket(bucket);*/ - bucket->bucket = (coord*)realloc(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len)); + if(verbose_level >= DEBUG){ + printf("Extending bucket %d (%p) from size %d to size %d!\n", + id,bucket->bucket, bucket->nb_elem, bucket->nb_elem+size); + } + + bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len)); bucket->bucket_len += size; - if(verbose_level >= DEBUG){ - printf("MALLOC/realloc: %d\n",id); - printf("(%d,%d)\n",i,j); - display_bucket(bucket); - printf("\n"); - } + /* if(verbose_level >= DEBUG){ */ + /* printf("MALLOC/realloc: %d\n",id); */ + /* printf("(%d,%d)\n",i,j); */ + /* display_bucket(bucket); */ + /* printf("\n"); */ + /* } */ } @@ -289,7 +294,13 @@ void partial_sort(bucket_list_t *bl,double **tab,int N) bucket_list_t bucket_list; int nb_buckets, nb_bits; - /* after these operations, nb_bucket is a power of 2 interger close to log2(N)*/ + if( N <= 0){ + if(verbose_level >= ERROR ) + fprintf(stderr,"Error: tryng to group a matrix of size %d<=0!\n",N); + return; + } + + /* after these operations, nb_buckets is a power of 2 interger close to log2(N)*/ nb_buckets = (int)floor(CmiLog2(N)); @@ -404,7 +415,7 @@ void next_bucket_elem(bucket_list_t bucket_list,int *i,int *j) } -int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups) +int add_edge_3(tm_tree_t *tab_node, tm_tree_t *parent,int i,int j,int *nb_groups) { /* printf("%d <-> %d ?\n",tab_node[i].id,tab_node[j].id); */ if((!tab_node[i].parent) && (!tab_node[j].parent)){ @@ -453,7 +464,7 @@ int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups) return 0; } -int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups) +int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups) { assert( i != j ); @@ -481,40 +492,40 @@ int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_ } } -void FREE_bucket(bucket_t *bucket) +void free_bucket(bucket_t *bucket) { FREE(bucket->bucket); FREE(bucket); } -void FREE_tab_bucket(bucket_t **bucket_tab,int N) +void free_tab_bucket(bucket_t **bucket_tab,int N) { int i; for( i = 0 ; i < N ; i++ ) - FREE_bucket(bucket_tab[i]); + free_bucket(bucket_tab[i]); FREE(bucket_tab); } -void FREE_bucket_list(bucket_list_t bucket_list) +void free_bucket_list(bucket_list_t bucket_list) { - /* Do not FREE the tab field it is used elsewhere */ - FREE_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets); + /* Do not free the tab field it is used elsewhere */ + free_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets); FREE(bucket_list->pivot); FREE(bucket_list->pivot_tree); FREE(bucket_list); } -void partial_update_val (int nb_args, void **args){ +void partial_update_val (int nb_args, void **args, int thread_id){ int inf = *(int*)args[0]; int sup = *(int*)args[1]; - affinity_mat_t *aff_mat = (affinity_mat_t*)args[2]; - tree_t *new_tab_node = (tree_t*)args[3]; + tm_affinity_mat_t *aff_mat = (tm_affinity_mat_t*)args[2]; + tm_tree_t *new_tab_node = (tm_tree_t*)args[3]; double *res=(double*)args[4]; int l; - if(nb_args != 6){ + if(nb_args != 5){ if(verbose_level >= ERROR) - fprintf(stderr,"Wrong number of args in %s: %d\n",__func__, nb_args); + fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __FUNCTION__, nb_args); exit(-1); } @@ -524,7 +535,7 @@ void partial_update_val (int nb_args, void **args){ } } -void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, +double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity,int M) { bucket_list_t bucket_list; @@ -536,10 +547,12 @@ void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_n int N = aff_mat->order; double **mat = aff_mat->mat; - verbose_level = get_verbose_level(); + verbose_level = tm_get_verbose_level(); if(verbose_level >= INFO ) printf("starting sort of N=%d elements\n",N); + + TIC; partial_sort(&bucket_list,mat,N); duration = TOC; @@ -662,8 +675,8 @@ void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_n printf("Bucket: %d, indice:%d\n",bucket_list->cur_bucket,bucket_list->bucket_indice); printf("val=%f\n",val); } - FREE_bucket_list(bucket_list); + free_bucket_list(bucket_list); - /* exit(-1); */ - /* display_grouping(new_tab_node,M,arity,val); */ + return val; } + diff --git a/ompi/mca/topo/treematch/treematch/tm_bucket.h b/ompi/mca/topo/treematch/treematch/tm_bucket.h index 17e7060398..433d481646 100644 --- a/ompi/mca/topo/treematch/treematch/tm_bucket.h +++ b/ompi/mca/topo/treematch/treematch/tm_bucket.h @@ -28,7 +28,8 @@ typedef struct{ typedef _bucket_list_t *bucket_list_t; -void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, - int arity,int M); -int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups); +double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node, + int arity,int M); +int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups); #endif + diff --git a/ompi/mca/topo/treematch/treematch/tm_hwloc.c b/ompi/mca/topo/treematch/treematch/tm_hwloc.c deleted file mode 100644 index 00e279e0cd..0000000000 --- a/ompi/mca/topo/treematch/treematch/tm_hwloc.c +++ /dev/null @@ -1,286 +0,0 @@ -#include "opal/mca/hwloc/hwloc-internal.h" -#include "tm_tree.h" -#include "tm_mapping.h" -#include -#include "tm_verbose.h" - - -double ** tm_topology_to_arch(tm_topology_t *topology,double *cost); -tm_topology_t * tgt_to_tm(char *filename,double **pcost); -int topo_nb_proc(hwloc_topology_t topology,int N); -double ** topology_to_arch(hwloc_topology_t topology); -int symetric(hwloc_topology_t topology); -tm_topology_t* hwloc_to_tm(char *filename,double **pcost); -tm_topology_t* get_local_topo_with_hwloc(void); - - - - -/* transform a tgt scotch file into a topology file*/ -tm_topology_t * tgt_to_tm(char *filename, double **pcost) -{ - tm_topology_t *topology = NULL; - FILE *pf = NULL; - char line[1024]; - char *s = NULL; - double *cost = NULL; - int i; - - - - pf = fopen(filename,"r"); - if(!pf){ - if(get_verbose_level() >= CRITICAL) - fprintf(stderr,"Cannot open %s\n",filename); - exit(-1); - } - - if(get_verbose_level() >= INFO) - printf("Reading TGT file: %s\n",filename); - - - fgets(line,1024,pf); - - s = strstr(line,"tleaf"); - if(!s){ - if(get_verbose_level() >= CRITICAL) - fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename); - exit(-1); - } - - s += 5; - while(isspace(*s)) - s++; - - topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); - topology->nb_levels = atoi(strtok(s," "))+1; - topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels); - cost = (double*)CALLOC(topology->nb_levels,sizeof(double)); - - for( i = 0 ; i < topology->nb_levels-1 ; i++ ){ - topology->arity[i] = atoi(strtok(NULL," ")); - cost[i] = atoi(strtok(NULL," ")); - } - - topology->arity[topology->nb_levels-1] = 0; - /* cost[topology->nb_levels-1]=0; */ - - /*aggregate costs*/ - for( i = topology->nb_levels-2 ; i >= 0 ; i-- ) - cost[i] += cost[i+1]; - - build_synthetic_proc_id(topology); - - *pcost = cost; - fclose(pf); - /* - topology->arity[0]=nb_proc; - topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity); - printf("levels=%d\n",topology->nb_levels); - */ - if(get_verbose_level() >= INFO) - printf("Topology built from %s!\n",filename); - - return topology; -} - -int topo_nb_proc(hwloc_topology_t topology,int N) -{ - hwloc_obj_t *objs = NULL; - int nb_proc; - - objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N); - objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL); - nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1); - FREE(objs); - return nb_proc; -} - - -double ** topology_to_arch(hwloc_topology_t topology) -{ - int nb_proc,i,j; - hwloc_obj_t obj_proc1,obj_proc2,obj_res; - double **arch = NULL; - - nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); - arch = (double**)MALLOC(sizeof(double*)*nb_proc); - for( i = 0 ; i < nb_proc ; i++ ){ - obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); - arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc); - for( j = 0 ; j < nb_proc ; j++ ){ - obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j); - obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2); - /* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */ - arch[obj_proc1->os_index][obj_proc2->os_index]=speed(obj_res->depth+1); - } - } - return arch; -} - -int symetric(hwloc_topology_t topology) -{ - int depth,i,topodepth = hwloc_topology_get_depth(topology); - unsigned int arity; - hwloc_obj_t obj; - for ( depth = 0; depth < topodepth-1 ; depth++ ) { - int N = hwloc_get_nbobjs_by_depth(topology, depth); - obj = hwloc_get_next_obj_by_depth (topology,depth,NULL); - arity = obj->arity; - - /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */ - for (i = 1; i < N; i++ ){ - obj = hwloc_get_next_obj_by_depth (topology,depth,obj); - if( obj->arity != arity){ - /* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */ - return 0; - } - } - } - return 1; -} - -tm_topology_t* hwloc_to_tm(char *filename,double **pcost) -{ - hwloc_topology_t topology; - tm_topology_t *res = NULL; - hwloc_obj_t *objs = NULL; - unsigned topodepth,depth; - int nb_nodes,i; - double *cost; - int err; - - /* Build the topology */ - hwloc_topology_init(&topology); - err = hwloc_topology_set_xml(topology,filename); - if(err == -1){ - if(get_verbose_level() >= CRITICAL) - fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename); - exit(-1); - } - -#if HWLOC_API_VERSION < 0x20000 - hwloc_topology_ignore_all_keep_structure(topology); -#else -#warning FIXME hwloc v2 -#endif - hwloc_topology_load(topology); - - - /* Test if symetric */ - if(!symetric(topology)){ - if(get_verbose_level() >= CRITICAL) - fprintf(stderr,"%s not symetric!\n",filename); - exit(-1); - } - - /* work on depth */ - topodepth = hwloc_topology_get_depth(topology); - - res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); - res->nb_levels = topodepth; - res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); - res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels); - res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); - - if(get_verbose_level() >= INFO) - printf("topodepth = %d\n",topodepth); - - /* Build TreeMatch topology */ - for( depth = 0 ; depth < topodepth ; depth++ ){ - nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); - res->nb_nodes[depth] = nb_nodes; - res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); - - objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); - objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); - hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); - res->arity[depth] = objs[0]->arity; - - if(get_verbose_level() >= INFO) - printf("%d(%d):",res->arity[depth],nb_nodes); - - /* Build process id tab */ - for (i = 0; i < nb_nodes; i++){ - res->node_id[depth][i] = objs[i]->os_index; - /* if(depth==topodepth-1) */ - } - FREE(objs); - } - - cost = (double*)CALLOC(res->nb_levels,sizeof(double)); - for(i=0; inb_levels; i++){ - cost[i] = speed(i); - } - - *pcost = cost; - - - /* Destroy topology object. */ - hwloc_topology_destroy(topology); - if(get_verbose_level() >= INFO) - printf("\n"); - return res; -} - -tm_topology_t* get_local_topo_with_hwloc(void) -{ - hwloc_topology_t topology; - tm_topology_t *res = NULL; - hwloc_obj_t *objs = NULL; - unsigned topodepth,depth; - int nb_nodes,i; - - /* Build the topology */ - hwloc_topology_init(&topology); -#if HWLOC_API_VERSION < 0x20000 - hwloc_topology_ignore_all_keep_structure(topology); -#else -#warning FIXME hwloc v2 -#endif - hwloc_topology_load(topology); - - /* Test if symetric */ - if(!symetric(topology)){ - if(get_verbose_level() >= CRITICAL) - fprintf(stderr,"Local toplogy not symetric!\n"); - exit(-1); - } - - /* work on depth */ - topodepth = hwloc_topology_get_depth(topology); - - res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); - res->nb_levels = topodepth; - res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); - res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels); - res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); - - /* Build TreeMatch topology */ - for( depth = 0 ; depth < topodepth ; depth++ ){ - nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); - res->nb_nodes[depth] = nb_nodes; - res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); - - objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); - objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); - hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); - res->arity[depth] = objs[0]->arity; - - /* printf("%d:",res->arity[depth]); */ - - /* Build process id tab */ - for (i = 0; i < nb_nodes; i++){ - res->node_id[depth][i] = objs[i]->os_index; - /* if(depth==topodepth-1) */ - } - FREE(objs); - } - - /* Destroy HWLOC topology object. */ - hwloc_topology_destroy(topology); - - /* printf("\n"); */ - return res; -} - diff --git a/ompi/mca/topo/treematch/treematch/tm_hwloc.h b/ompi/mca/topo/treematch/treematch/tm_hwloc.h deleted file mode 100644 index 7ba09d3e51..0000000000 --- a/ompi/mca/topo/treematch/treematch/tm_hwloc.h +++ /dev/null @@ -1,7 +0,0 @@ -#include "opal/mca/hwloc/hwloc-internal.h" -#include "tm_tree.h" - -void hwloc_topology_tag(hwloc_topology_t topology); -tm_topology_t* hwloc_to_tm(char *filename,double **pcost); -tm_topology_t * tgt_to_tm(char *filename,double **pcost); -tm_topology_t* get_local_topo_with_hwloc(void); diff --git a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c index 985c7c07fa..282e7f7e04 100644 --- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c +++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c @@ -1,13 +1,12 @@ #include "tm_mapping.h" #include "tm_mt.h" #include "tm_kpartitioning.h" +#include "k-partitioning.h" #include #include +#include "config.h" #define USE_KL_KPART 0 -#if USE_KL_KPART -#include "k-partitioning.h" -#endif /* USE_KL_KPART */ #define KL_KPART_GREEDY_TRIALS 0 static int verbose_level = ERROR; @@ -15,25 +14,23 @@ static int verbose_level = ERROR; #define MAX_TRIALS 10 #define USE_KL_STRATEGY 1 -#if !defined(MIN) + #define MIN(a,b) ((a)<(b)?(a):(b)) -#endif int fill_tab(int **,int *,int,int,int,int); -void complete_com_mat(double ***,int,int); void complete_obj_weight(double **,int,int); void allocate_vertex(int,int *,com_mat_t *,int,int *,int); double eval_cost(int *, com_mat_t *); int *kpartition_greedy(int, com_mat_t *,int,int *,int); -constraint_t *split_constraints (int *,int,int,tm_topology_t *,int); +constraint_t *split_constraints (int *,int,int,tm_topology_t *,int, int); com_mat_t **split_com_mat(com_mat_t *,int,int,int *); int **split_vertices(int *,int,int,int *); -void FREE_tab_com_mat(com_mat_t **,int); -void FREE_tab_local_vertices(int **,int); -void FREE_const_tab(constraint_t *,int); -void kpartition_build_level_topology(tree_t *,com_mat_t *,int,int,tm_topology_t *, +void free_tab_com_mat(com_mat_t **,int); +void free_tab_local_vertices(int **,int); +void free_const_tab(constraint_t *,int); +void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *, int *,int *,int,double *,double *); @@ -51,10 +48,14 @@ void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int best_part = res[i]; break; } + }else{ for( i = 0 ; i < n ; i++){ if (( res[i] != -1 ) && ( size[res[i]] < max_size )){ cost = (((i)n)) ?com_mat->comm[u][i]:0; + /* if((n<=16) && (u==8)){ */ + /* printf("u=%d, i=%d: %f\n",u, i, cost); */ + /* } */ if (( cost > best_cost)){ best_cost = cost; best_part = res[i]; @@ -62,8 +63,10 @@ void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int } } } - /* printf("size[%d]: %d\n",best_part, size[best_part]);*/ - /* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */ + /* if(n<=16){ */ + /* printf("size[%d]: %d\n",best_part, size[best_part]); */ + /* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */ + /* } */ res[u] = best_part; size[best_part]++; @@ -84,25 +87,45 @@ double eval_cost(int *partition, com_mat_t *com_mat) int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints) { - int *res = NULL, *best_res=NULL, *size = NULL; + int *partition = NULL, *best_partition=NULL, *size = NULL; int i,j,nb_trials; int max_size, max_val; double cost, best_cost = -1; int start, end; int dumb_id, nb_dumb; + int vl = tm_get_verbose_level(); + if(nb_constraints > n){ + if(vl >= ERROR){ + fprintf(stderr,"Error more constraints (%d) than the problem size (%d)!\n",nb_constraints, n); + } + return NULL; + } + + max_size = n/k; + + if(vl >= DEBUG){ + printf("max_size = %d (n=%d,k=%d)\ncom_mat->n-1=%d\n",max_size,n,k,com_mat->n-1); + printf("nb_constraints = %d\n",nb_constraints); + + if(n<=16){ + printf("Constraints: ");print_1D_tab(constraints,nb_constraints); + } + } + /* if(com_mat->n){ */ + /* printf ("val [n-1][0]= %f\n",com_mat->comm[com_mat->n-1][0]); */ + /* } */ for( nb_trials = 0 ; nb_trials < MAX_TRIALS ; nb_trials++ ){ - res = (int *)MALLOC(sizeof(int)*n); + partition = (int *)MALLOC(sizeof(int)*n); for ( i = 0 ; i < n ; i ++ ) - res[i] = -1; + partition[i] = -1; size = (int *)CALLOC(k,sizeof(int)); - max_size = n/k; - /*printf("Constraints: ");print_1D_tab(constraints,nb_constraints);*/ + /* put "dumb" vertices in the correct partition if there are any*/ if (nb_constraints){ @@ -121,12 +144,13 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int number of leaves of the subtree (n/k) and the number of constraints */ nb_dumb = n/k - (end-start); - /*printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k);*/ - + /* if(n<=16){ */ + /* printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k); */ + /* } */ /* dumb vertices are the one with highest indices: put them in the ith partitions*/ for( j = 0; j < nb_dumb; j ++ ){ - res[dumb_id] = i; + partition[dumb_id] = i; dumb_id--; } /* increase the size of the ith partition accordingly*/ @@ -134,7 +158,10 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int start=end; } } - /*printf("After dumb vertices mapping: ");print_1D_tab(res,n);*/ + /* if(n<=16){ */ + /* printf("After dumb vertices mapping: ");print_1D_tab(partition,n); */ + /* } */ + /* choose k initial "true" vertices at random and put them in a different partition */ for ( i = 0 ; i < k ; i ++ ){ @@ -145,35 +172,39 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int do{ /* call the mersenne twister PRNG of tm_mt.c*/ j = genrand_int32() % n; - } while ( res[j] != -1 ); + } while ( partition[j] != -1 ); /* allocate and update size of partition*/ - res[j] = i; - /* printf("random: %d -> %d\n",j,i); */ + partition[j] = i; + /* if(n<=16){ */ + /* printf("random: %d -> %d\n",j,i); */ + /* } */ size[i]++; } /* allocate each unaloacted vertices in the partition that maximize the communication*/ for( i = 0 ; i < n ; i ++) - if( res[i] == -1) - allocate_vertex(i, res, com_mat, n, size, max_size); + if( partition[i] == -1) + allocate_vertex(i, partition, com_mat, n, size, max_size); - cost = eval_cost(res,com_mat); - /*print_1D_tab(res,n); - printf("cost=%.2f\n",cost);*/ + cost = eval_cost(partition,com_mat); + /* if(n<=16){ */ + /* print_1D_tab(partition,n); */ + /* printf("cost=%.2f\n",cost); */ + /* } */ if((cost=DEBUG){ + printf("Step %d\n",i); + printf("\tConstraint: "); print_1D_tab(constraints, nb_constraints); + printf("\tSub constraint: "); print_1D_tab(const_tab[i].constraints, end-start); + } + + if(end-start > N/k){ + if(vl >= ERROR){ + fprintf(stderr, "Error in spliting constraint at step %d. N=%d k= %d, length = %d\n", i, N, k, end-start); + } + FREE(const_tab); + return NULL; + } const_tab[i].id = i; start = end; } @@ -224,6 +279,7 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm } +/* split the com_mat of order n in k partiton according to parmutition table*/ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition) { com_mat_t **res = NULL, *sub_com_mat; @@ -237,6 +293,8 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition) if(verbose_level >= DEBUG){ printf("Partition: "); print_1D_tab(partition,n); display_tab(com_mat->comm,com_mat->n); + printf("m=%d,n=%d,k=%d\n",m,n,k); + printf("perm=%p\n",perm); } perm = (int*)MALLOC(sizeof(int)*m); @@ -244,10 +302,22 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition) /* build perm such that submat[i][j] correspond to com_mat[perm[i]][perm[j]] according to the partition*/ s = 0; - for( j = 0; j < com_mat->n; j ++) /* check only non zero element of of com_mat*/ + /* The partition is of size n. n can be larger than the communication matrix order + as only the input problem are in the communication matrix while n is of the size + of all the element (including the added one where it is possible to map computation) : + we can have more compute units than processes*/ + for( j = 0; j < com_mat->n; j ++) if ( partition[j] == cur_part ) perm[s++] = j; + if(s>m){ + if(verbose_level >= CRITICAL){ + fprintf(stderr,"Partition: "); print_1D_tab(partition,n); + display_tab(com_mat->comm,com_mat->n); + fprintf(stderr,"too many elements of the partition for the permuation (s=%d>%d=m). n=%d, k=%d, cur_part= %d\n",s,m,n,k, cur_part); + } + exit(-1); + } /* s is now the size of the non zero sub matrix for this partition*/ /* built a sub-matrix for partition cur_part*/ sub_mat = (double **) MALLOC(sizeof(double *) * s); @@ -264,7 +334,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition) } } - sub_com_mat = (com_mat_t *)malloc(sizeof(com_mat_t)); + sub_com_mat = (com_mat_t *)MALLOC(sizeof(com_mat_t)); sub_com_mat -> n = s; sub_com_mat -> comm = sub_mat; @@ -275,7 +345,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition) res[cur_part] = sub_com_mat; } - FREE(perm); + FREE(perm); return res; } @@ -311,7 +381,7 @@ int **split_vertices( int *vertices, int n, int k, int *partition) return res; } -void FREE_tab_com_mat(com_mat_t **mat,int k) +void free_tab_com_mat(com_mat_t **mat,int k) { int i,j; if( !mat ) @@ -321,11 +391,13 @@ void FREE_tab_com_mat(com_mat_t **mat,int k) for ( j = 0 ; j < mat[i]->n ; j ++) FREE( mat[i]->comm[j] ); FREE( mat[i]->comm ); + FREE(mat[i]); + } FREE(mat); } -void FREE_tab_local_vertices(int **mat, int k) +void free_tab_local_vertices(int **mat, int k) { int i; /* m=n/k; */ if( !mat ) @@ -338,7 +410,7 @@ void FREE_tab_local_vertices(int **mat, int k) } -void FREE_const_tab(constraint_t *const_tab, int k) +void free_const_tab(constraint_t *const_tab, int k) { int i; @@ -353,19 +425,33 @@ void FREE_const_tab(constraint_t *const_tab, int k) FREE(const_tab); } -void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N, int depth, + +void check_com_mat(com_mat_t *com_mat){ + int i,j; + + for( i = 0 ; i < com_mat->n ; i++ ) + for( j = 0 ; j < com_mat->n ; j++ ) + if(com_mat->comm[i][j]<0){ + printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]); + exit(-1); + } + + +} + +void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth, tm_topology_t *topology, int *local_vertices, int *constraints, int nb_constraints, double *obj_weight, double *comm_speed) { com_mat_t **tab_com_mat = NULL; /* table of comunication matrix. We will have k of such comunication matrix, one for each subtree */ int k = topology->arity[depth]; - tree_t **tab_child = NULL; + tm_tree_t **tab_child = NULL; int *partition = NULL; int **tab_local_vertices = NULL; constraint_t *const_tab = NULL; int i; - verbose_level = get_verbose_level(); + verbose_level = tm_get_verbose_level(); /* if we are at the bottom of the tree set cur_node and return*/ @@ -377,8 +463,14 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N } + if(verbose_level >= DEBUG){ + printf("Partitionning Matrix of size %d (problem size= %d) in %d partitions\n", com_mat->n, N, k); + } + + /* check_com_mat(com_mat); */ + /* partition the com_matrix in k partitions*/ - partition = kpartition(topology->arity[depth], com_mat, N, constraints, nb_constraints); + partition = kpartition(k, com_mat, N, constraints, nb_constraints); /* split the communication matrix in k parts according to the partition just found above */ tab_com_mat = split_com_mat( com_mat, N, k, partition); @@ -387,12 +479,12 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N tab_local_vertices = split_vertices( local_vertices, N, k, partition); /* construct a tab of constraints of size k: one for each partitions*/ - const_tab = split_constraints (constraints, nb_constraints, k, topology, depth); + const_tab = split_constraints (constraints, nb_constraints, k, topology, depth, N); /* create the table of k nodes of the resulting sub-tree */ - tab_child = (tree_t **) CALLOC (k,sizeof(tree_t*)); + tab_child = (tm_tree_t **) CALLOC (k,sizeof(tm_tree_t*)); for( i = 0 ; i < k ; i++){ - tab_child[i] = (tree_t *) MALLOC(sizeof(tree_t)); + tab_child[i] = (tm_tree_t *) MALLOC(sizeof(tm_tree_t)); } /* for each child, proceeed recursively*/ @@ -408,28 +500,30 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N /* link the node with its child */ set_node( cur_node, tab_child, k, NULL, cur_node->id, 0, NULL, depth); - /* FREE local data*/ + /* free local data*/ FREE(partition); - FREE_tab_com_mat(tab_com_mat,k); - FREE_tab_local_vertices(tab_local_vertices,k); - FREE_const_tab(const_tab,k); + free_tab_com_mat(tab_com_mat,k); + free_tab_local_vertices(tab_local_vertices,k); + free_const_tab(const_tab,k); } -tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed) +tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed) { int depth,i, K; - tree_t *root = NULL; + tm_tree_t *root = NULL; int *local_vertices = NULL; int nb_cores; com_mat_t com_mat; - verbose_level = get_verbose_level(); + verbose_level = tm_get_verbose_level(); + + + nb_cores=nb_processing_units(topology)*topology->oversub_fact; + if(verbose_level>=INFO) - printf("Number of constraints: %d, N=%d\n", nb_constraints, N); - - nb_cores=nb_processing_units(topology); + printf("Number of constraints: %d, N=%d, nb_cores = %d, K=%d\n", nb_constraints, N, nb_cores, nb_cores-N); if((constraints == NULL) && (nb_constraints != 0)){ if(verbose_level>=ERROR) @@ -449,7 +543,6 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com if((K=nb_cores - N)>0){ /* add K element to the object weight*/ complete_obj_weight(&obj_weight,N,K); - /* display_tab(tab,N+K);*/ } else if( K < 0){ if(verbose_level>=ERROR) fprintf(stderr,"Not enough cores!\n"); @@ -463,7 +556,7 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com local_vertices is the array of vertices that can be used the min(N,nb_contraints) 1st element are number from 0 to N the last ones have value -1 - the value of this array will be used to number the leaves of the tree_t tree + the value of this array will be used to number the leaves of the tm_tree_t tree that start at "root" min(N,nb_contraints) is used to takle the case where thre is less processes than constraints @@ -479,18 +572,20 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com /* we assume all objects have the same arity*/ /* assign the root of the tree*/ - root = (tree_t*) MALLOC (sizeof(tree_t)); - root->id = 0; + root = (tm_tree_t*) MALLOC (sizeof(tm_tree_t)); + root -> id = 0; + /*build the tree downward from the root*/ kpartition_build_level_topology(root, &com_mat, N+K, depth, topology, local_vertices, - constraints, nb_constraints, obj_weight, com_speed); + constraints, nb_constraints, obj_weight, com_speed); /*print_1D_tab(local_vertices,K+N);*/ if(verbose_level>=INFO) printf("Build (bottom-up) tree done!\n"); + FREE(local_vertices); diff --git a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h index 58cf6af6ff..09c2227c06 100644 --- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h +++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h @@ -1,9 +1,9 @@ typedef struct _com_mat_t{ - double **comm; + double **comm; int n; /*comm is of size n by n the other element are zeroes*/ - + } com_mat_t; int *kpartition(int, com_mat_t*, int, int *, int); -tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed); +tm_tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed); diff --git a/ompi/mca/topo/treematch/treematch/tm_malloc.c b/ompi/mca/topo/treematch/treematch/tm_malloc.c index 4613aa5c36..cc6e711bf3 100644 --- a/ompi/mca/topo/treematch/treematch/tm_malloc.c +++ b/ompi/mca/topo/treematch/treematch/tm_malloc.c @@ -1,35 +1,60 @@ +#include +#include +#include +#include #include "uthash.h" #include #include "tm_verbose.h" #include "tm_malloc.h" -#include "opal/util/alfg.h" +#include "tm_tree.h" +#include "tm_mt.h" + + +#define MIN(a,b) ((a)<(b)?(a):(b)) #define EXTRA_BYTE 100 -typedef signed char byte; +typedef uint8_t byte; /* static int verbose_level = ERROR;*/ typedef struct _hash_t { - void *key; /* we'll use this field as the key */ - size_t size; - UT_hash_handle hh; /* makes this structure hashable */ + void *key; /* we'll use this field as the key */ + size_t size; + char *file; + int line; + UT_hash_handle hh; /* makes this structure hashable */ }hash_t; static hash_t *size_hash = NULL; static char extra_data[EXTRA_BYTE]; -static void save_size(void *ptr, size_t size); +static void save_ptr(void *ptr, size_t size, char *file, int line); static size_t retreive_size(void *someaddr); static void init_extra_data(void); -void save_size(void *ptr, size_t size) { + + +char *my_strdup(char* string){ + int size = 1+strlen(string); + char *res = (char*)malloc(size*sizeof(char)); + + if(res) + memcpy(res, string, size*sizeof(char)); + + return res; + +} + +void save_ptr(void *ptr, size_t size, char *file, int line) { hash_t *elem; elem = (hash_t*) malloc(sizeof(hash_t)); - elem -> key = ptr; + elem -> key = ptr; elem -> size = size; - if(get_verbose_level() >= DEBUG) + elem -> line = line; + elem -> file = my_strdup(file); + if(tm_get_verbose_level() >= DEBUG) printf("Storing (%p,%ld)\n",ptr,size); HASH_ADD_PTR( size_hash, key, elem ); } @@ -40,72 +65,76 @@ size_t retreive_size(void *someaddr){ hash_t *elem = NULL; HASH_FIND_PTR(size_hash, &someaddr, elem); if(!elem){ - fprintf(stderr,"cannot find ptr %p to free!\n",someaddr); + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Cannot find ptr %p to free!\n",someaddr); + abort(); return 0; } res = elem->size; - if(get_verbose_level()>=DEBUG) + if(tm_get_verbose_level()>=DEBUG) printf("Retreiving (%p,%ld)\n",someaddr, res); + free(elem->file); HASH_DEL( size_hash, elem); return res; } -void my_mem_check(void){ +void tm_mem_check(void){ +#ifdef __DEBUG_TM_MALLOC__ hash_t *s; int nb_errors = 0; for(s=size_hash; s != NULL; s=s->hh.next) { - if(get_verbose_level() >= ERROR) { - printf("pointer %p of size %ld has not been freed!\n", s->key, s->size); - } - nb_errors ++; + if(tm_get_verbose_level()>=ERROR) + printf("pointer %p of size %ld (%s: %d) has not been freed!\n", s->key, s->size, s->file, s->line); + nb_errors ++; } - if(get_verbose_level() >= INFO) + if(tm_get_verbose_level() >= INFO) printf ("Number of errors in managing memory: %d\n",nb_errors); +#endif } void init_extra_data(void){ static int done = 0; - opal_rng_buff_t rng; int i; if(done) return; - opal_srand(&rng,0); + init_genrand(0); for( i = 0 ; i < EXTRA_BYTE; i++) - extra_data[i] = (char) opal_rand(&rng) % 256; + extra_data[i] = (char) genrand_int32() % 256; done = 1; } -void *my_malloc(size_t size, char *file, int line){ +void *tm_malloc(size_t size, char *file, int line){ byte *ptr; init_extra_data(); size+=2*EXTRA_BYTE; ptr = malloc(size); - if(get_verbose_level()>=DEBUG) - printf("my_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,(void*)ptr,file,line); + if(tm_get_verbose_level()>=DEBUG) + printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,ptr,file,line); - save_size(ptr,size); + save_ptr(ptr, size, file, line); memcpy(ptr, extra_data, EXTRA_BYTE); memcpy(ptr + size - EXTRA_BYTE, extra_data, EXTRA_BYTE); - if(get_verbose_level()>=DEBUG) - printf("my_malloc returning: %p\n",(void*)(ptr+EXTRA_BYTE)); + if(tm_get_verbose_level()>=DEBUG) + printf("tm_malloc returning: %p\n",ptr+EXTRA_BYTE); return (void *)(ptr + EXTRA_BYTE); } -void *my_calloc(size_t count, size_t size, char *file, int line){ + +void *tm_calloc(size_t count, size_t size, char *file, int line){ byte *ptr; size_t full_size; @@ -115,22 +144,72 @@ void *my_calloc(size_t count, size_t size, char *file, int line){ ptr = malloc(full_size); bzero(ptr,full_size); - save_size(ptr, full_size); + save_ptr(ptr, full_size, file, line); - if(get_verbose_level()>=DEBUG) - printf("my_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void*)ptr, file, line); + if(tm_get_verbose_level()>=DEBUG) + printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line); memcpy(ptr, extra_data, EXTRA_BYTE); memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE); - if(get_verbose_level()>=DEBUG) - printf("my_calloc returning: %p\n",(void*)(ptr+EXTRA_BYTE)); + if(tm_get_verbose_level()>=DEBUG) + printf("tm_calloc returning: %p\n",ptr+EXTRA_BYTE); return (void *)(ptr+EXTRA_BYTE); } -void my_free(void *ptr){ + +void *tm_realloc(void *old_ptr, size_t size, char *file, int line){ + byte *ptr; + size_t full_size; + + init_extra_data(); + + full_size = size + 2 * EXTRA_BYTE; + + ptr = malloc(full_size); + save_ptr(ptr, full_size, file, line); + + if(tm_get_verbose_level()>=DEBUG) + printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line); + + + memcpy(ptr, extra_data, EXTRA_BYTE); + memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE); + + if(old_ptr){ + byte *original_ptr = ((byte *)old_ptr) - EXTRA_BYTE; + size_t old_ptr_size = retreive_size(original_ptr); + + memcpy(ptr + EXTRA_BYTE, old_ptr, MIN(old_ptr_size - 2 * EXTRA_BYTE, size)); + + if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ + fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", original_ptr); + fprintf(stderr,"memory is probably corrupted here!\n"); + } + + if((bcmp(original_ptr + old_ptr_size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ + fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", original_ptr); + fprintf(stderr,"memory is probably corrupted here!\n"); + } + + if(tm_get_verbose_level()>=DEBUG) + printf("tm_free freeing: %p\n",original_ptr); + + + free(original_ptr); + } + + + if(tm_get_verbose_level()>=DEBUG) + printf("tm_realloc returning: %p (----- %p)\n",ptr+EXTRA_BYTE, ((byte *)ptr) - EXTRA_BYTE); + + + return (void *)(ptr+EXTRA_BYTE); +} + +void tm_free(void *ptr){ byte *original_ptr = ((byte *)ptr) - EXTRA_BYTE; size_t size; @@ -139,18 +218,18 @@ void my_free(void *ptr){ size = retreive_size(original_ptr); - if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){ - fprintf(stderr,"cannot find special string ***before*** %p!\n",ptr); + if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ + fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", original_ptr); fprintf(stderr,"memory is probably corrupted here!\n"); } - if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){ - fprintf(stderr,"cannot find special string ***after*** %p!\n",ptr); + if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ + fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", original_ptr); fprintf(stderr,"memory is probably corrupted here!\n"); } - if(get_verbose_level()>=DEBUG) - printf("my_free freeing: %p\n",(void*)original_ptr); + if(tm_get_verbose_level()>=DEBUG) + printf("tm_free freeing: %p\n",original_ptr); free(original_ptr); diff --git a/ompi/mca/topo/treematch/treematch/tm_malloc.h b/ompi/mca/topo/treematch/treematch/tm_malloc.h index c4038d90be..f74cd3db6a 100644 --- a/ompi/mca/topo/treematch/treematch/tm_malloc.h +++ b/ompi/mca/topo/treematch/treematch/tm_malloc.h @@ -1,5 +1,29 @@ +#ifndef _TM_MALLOC_H_ +#define _TM_MALLOC_H_ + #include -void *my_malloc(size_t size, char *, int); -void *my_calloc(size_t count, size_t size, char *, int); -void my_free(void *ptr); -void my_mem_check(void); +void *tm_malloc(size_t size, char *, int); +void *tm_calloc(size_t count, size_t size, char *, int); +void *tm_realloc(void *ptr, size_t size, char *, int); +void tm_free(void *ptr); +void tm_mem_check(void); + +/* for debugging malloc */ +/* #define __DEBUG_TM_MALLOC__ */ +#undef __DEBUG_TM_MALLOC__ +#ifdef __DEBUG_TM_MALLOC__ +#define MALLOC(x) tm_malloc(x,__FILE__,__LINE__) +#define CALLOC(x,y) tm_calloc(x,y,__FILE__,__LINE__) +#define REALLOC(x,y) tm_realloc(x,y,__FILE__,__LINE__) +#define FREE tm_free +#define MEM_CHECK tm_mem_check +#else +#define MALLOC malloc +#define CALLOC calloc +#define FREE free +#define REALLOC realloc +#define MEM_CHECK tm_mem_check +#endif + + +#endif diff --git a/ompi/mca/topo/treematch/treematch/tm_mapping.c b/ompi/mca/topo/treematch/treematch/tm_mapping.c index 1debcb606c..3c50abe6c4 100644 --- a/ompi/mca/topo/treematch/treematch/tm_mapping.c +++ b/ompi/mca/topo/treematch/treematch/tm_mapping.c @@ -10,6 +10,7 @@ #include "tm_mt.h" #include "tm_mapping.h" #include "tm_timings.h" +#include "tm_thread_pool.h" #include "tm_tree.h" #ifdef _WIN32 @@ -25,11 +26,6 @@ #define LINE_SIZE (1000000) -typedef struct { - int val; - long key; -} hash_t; - typedef struct { double val; @@ -37,126 +33,48 @@ typedef struct { int key2; } hash2_t; -int distance(tm_topology_t *topology,int i, int j); -int nb_lines(char *); -void init_comm(char *,int,double **);void map_Packed(tm_topology_t *,int,int *); -void map_RR(int ,int *,int *); -int hash_asc(const void*,const void*); -int *generate_random_sol(tm_topology_t *,int,int,int); -double eval_sol(int *,int,double **,double **); -double eval_sol_inv(int *,int,double **,double **); -void exchange(int *,int,int); -double gain_exchange(int *,int,int,double,int,double **,double **); -void select_max(int *,int *,double **,int,int *); -void compute_gain(int *,int,double **,double **,double **); -void map_MPIPP(tm_topology_t *,int,int,int *,double **,double **); -void depth_first(tree_t *,int *,int *); -int nb_leaves(tree_t *); -void map_topology(tm_topology_t *,tree_t *,int,int,int *,int,int *); -int int_cmp(const void*,const void*); -int decompose(int,int,int *); -tree_t *build_synthetic_topology_old(int *,int,int,int); -void update_comm_speed(double **,int,int); -void topology_numbering(tm_topology_t *,int **,int *); -void topology_arity(tm_topology_t *,int **,int *); -void optimize_arity(int **,int *,int); -int get_indice(int *,int,int); -int fill_tab(int **,int *,int,int,int,int); -void update_canonical(int *,int,int,int); -int constraint_dsc(const void*,const void*); -void display_contsraint_tab(constraint_t *,int); -void update_perm(int *,int,constraint_t *,int,int); -void recursive_canonicalization(int,tm_topology_t *,int *,int *,int *,int,int); -void FREE_topology(tm_topology_t *); + +tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order); +int compute_nb_leaves_from_level(int depth,tm_topology_t *topology); +void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i); +int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift); +void init_mat(char *filename,int N, double **mat, double *sum_row); +void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level, + int *sigma, int nb_processes, int **k, int nb_compute_units); +int nb_leaves(tm_tree_t *comm_tree); +int nb_lines(char *filename); +int nb_processing_units(tm_topology_t *topology); +void print_1D_tab(int *tab,int N); +tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree); +void tm_finalize(); +void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat); +tm_affinity_mat_t *tm_load_aff_mat(char *filename); +void update_comm_speed(double **comm_speed,int old_size,int new_size); +tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order); -int distance(tm_topology_t *topology,int i, int j) +/* compute the number of leaves of any subtree starting froma node of depth depth*/ +int compute_nb_leaves_from_level(int depth,tm_topology_t *topology) { - int level = topology->nb_levels; - int arity; - int f_i = i,f_j = j; + int res = 1; - do{ - level--; - arity = topology->arity[level]; - if( arity == 0 ) - arity = 1; - f_i = f_i/arity; - f_j = f_j/arity; - } while(f_i!=f_j); + while(depth < topology->nb_levels-1) + res *= topology->arity[depth++]; - /* printf("(%d,%d):%d\n",i,j,level);*/ - /* exit(-1); */ - return level; + return res; +} + +void tm_finalize(){ + terminate_thread_pool(); + tm_mem_check(); } int nb_processing_units(tm_topology_t *topology) { - return topology->nb_nodes[topology->nb_levels-1]; + return topology->nb_proc_units; } -void FREE_topology(tm_topology_t *topology) -{ - int i; - for( i = 0 ; i < topology->nb_levels ; i++ ) - FREE(topology->node_id[i]); - FREE(topology->node_id); - FREE(topology->nb_nodes); - FREE(topology->arity); - FREE(topology); -} - -double print_sol(int N,int *Value,double **comm, double *cost, tm_topology_t *topology) -{ - double a,c,sol; - int i,j; - - sol = 0; - for ( i = 0 ; i < N ; i++ ) - for ( j = i+1 ; j < N ; j++){ - c = comm[i][j]; - a = cost[distance(topology,Value[i],Value[j])]; - /* printf("T_%d_%d %f/%f=%f\n",i,j,c,a,c/a); */ - sol += c/a; - } - - for (i = 0; i < N; i++) { - printf("%d", Value[i]); - if(i= CRITICAL) - fprintf(stderr,"Cannot open %s\n",filename); - exit(-1); - } + if(!(pf = fopen(filename,"r"))){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Cannot open %s\n",filename); + exit(-1); + } - while(fgets(line,LINE_SIZE,pf)) - N++; + while(fgets(line,LINE_SIZE,pf)) + N++; - if(get_verbose_level() >= DEBUG) - printf("Number of lines of file %s = %d\n",filename,N); + if(tm_get_verbose_level() >= DEBUG) + printf("Number of lines of file %s = %d\n",filename,N); - fclose(pf); - return N; + fclose(pf); + return N; } -void init_comm(char *filename,int N,double **comm) +void init_mat(char *filename,int N, double **mat, double *sum_row) { FILE *pf = NULL; char *ptr= NULL; char line[LINE_SIZE]; int i,j; - unsigned int vl = get_verbose_level(); - + unsigned int vl = tm_get_verbose_level(); if(!(pf=fopen(filename,"r"))){ @@ -208,381 +125,122 @@ void init_comm(char *filename,int N,double **comm) j = -1; i = 0; + + while(fgets(line,LINE_SIZE,pf)){ char *l = line; j = 0; - comm[i][N] = 0; - /* printf("%s|",line); */ + sum_row[i] = 0; while((ptr=strtok(l," \t"))){ l = NULL; if((ptr[0]!='\n')&&(!isspace(ptr[0]))&&(*ptr)){ - comm[i][j] = atof(ptr); - comm[i][N] += comm [i][j]; - /* printf ("comm[%d][%d]=%f|%s|\n",i,j,comm[i][j],ptr); */ - j++; + mat[i][j] = atof(ptr); + sum_row[i] += mat [i][j]; + if(mat[i][j]<0){ + if(vl >= WARNING) + fprintf(stderr,"Warning: negative value in com matrix! mat[%d][%d]=%f\n",i,j,mat[i][j]); + } + j++; } } if( j != N){ if(vl >= CRITICAL) - fprintf(stderr,"Error at %d %d (%d!=%d)for %s\n",i,j,j,N,filename); + fprintf(stderr,"Error at %d %d (%d!=%d). Too many columns for %s\n",i,j,j,N,filename); exit(-1); } i++; } + + if( i != N ){ if(vl >= CRITICAL) - fprintf(stderr,"Error at %d %d for %s\n",i,j,filename); + fprintf(stderr,"Error at %d %d. Too many rows for %s\n",i,j,filename); exit(-1); } - /* - printf("%s:\n",filename); - for(i=0;i= CRITICAL) - fprintf(stderr,"Cannot open %s\n",filename); - exit(-1); - } + aff_mat = (tm_affinity_mat_t *) MALLOC(sizeof(tm_affinity_mat_t)); + aff_mat -> mat = mat; + aff_mat -> sum_row = sum_row; + aff_mat -> order = order; - /* compute the size od the array to store the constraints*/ - n = 0; - fgets(line, LINE_SIZE, pf); - l = line; - while((ptr=strtok(l," \t"))){ - l = NULL; - if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)) - n++; - } - - tab = (int*)MALLOC((n+1)*sizeof(int)); - - rewind(pf); - fgets(line, LINE_SIZE, pf); - l = line; - i = 0; - while((ptr=strtok(l," \t"))){ - l = NULL; - if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)){ - if(i <= n) - tab[i] = atoi(ptr); - else{ - if(vl >= CRITICAL) - fprintf(stderr, "More than %d entries in %s\n", n, filename); - exit(-1); - } - i++; - } - } - - if( i != n ){ - if(vl >= CRITICAL) - fprintf(stderr, "Read %d entries while expecting %d ones\n", i, n); - exit(-1); - } - - *ptab = tab; - fclose(pf); - return n; + return aff_mat; } -int build_comm(char *filename,double ***pcomm) + +tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order){ + double *sum_row = NULL; + int i,j; + sum_row = (double*)MALLOC(order*sizeof(double)); + + for( i = 0 ; i < order ; i++){ + sum_row[i] = 0; + for(j = 0 ; j < order ; j++) + sum_row[i] += mat [i][j]; + } + + return new_affinity_mat(mat, sum_row, order); +} + + + + + +void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat){ + int i; + int n = aff_mat->order; + + for(i = 0 ; i < n ; i++) + FREE(aff_mat->mat[i]); + + FREE(aff_mat->mat); + FREE(aff_mat->sum_row); + FREE(aff_mat); +} + + +tm_affinity_mat_t *tm_load_aff_mat(char *filename) { - double **comm = NULL; - int i,N; + double **mat = NULL; + double *sum_row = NULL; + int i, order; - if(get_verbose_level() >= INFO) - printf("Reading communication matrix file: %s\n",filename); + if(tm_get_verbose_level() >= INFO) + printf("Reading matrix file: %s\n",filename); - N = nb_lines(filename); - comm = (double**)MALLOC(N*sizeof(double*)); - for( i = 0 ; i < N ; i++) + order = nb_lines(filename); + + sum_row = (double*)MALLOC(order*sizeof(double)); + mat = (double**)MALLOC(order*sizeof(double*)); + for( i = 0 ; i < order ; i++) /* the last column stores the sum of the line*/ - comm[i] = (double*)MALLOC((N+1)*sizeof(double)); - init_comm(filename,N,comm); - *pcomm = comm; - - if(get_verbose_level() >= INFO) - printf("Communication matrix built from %s!\n",filename); - - return N; -} - -void map_Packed(tm_topology_t *topology,int N,int *Value) -{ - int i,j = 0,depth; - - depth = topology->nb_levels-1; - - for( i = 0 ; i < nb_processing_units(topology) ; i++){ - /* printf ("%d -> %d\n",objs[i]->os_index,i); */ - if(topology->node_id[depth][i] != -1){ - Value[j++]=topology->node_id[depth][i]; - if(j == N) - break; - } - } -} - -void map_RR(int N,int *Value, int *constraints) -{ - int i; - - for( i = 0 ; i < N ; i++ ){ - /*printf ("%d -> %d\n",i,i);*/ - if(constraints) - Value[i]=constraints[i]; - else - Value[i]=i; - } -} - -int hash_asc(const void* x1,const void* x2) -{ - hash_t *e1 = NULL,*e2 = NULL; - - e1 = ((hash_t*)x1); - e2 = ((hash_t*)x2); - - return (e1->key < e2->key) ? -1 : 1; -} + mat[i] = (double*)MALLOC((order)*sizeof(double)); + init_mat(filename,order, mat, sum_row); -int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed) -{ - hash_t *hash_tab = NULL; - int *sol = NULL; - int *nodes_id= NULL; - int i; + if(tm_get_verbose_level() >= INFO) + printf("Affinity matrix built from %s!\n",filename); - nodes_id = topology->node_id[level]; - - hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N); - sol = (int*)MALLOC(sizeof(int)*N); - - init_genrand(seed); - - for( i = 0 ; i < N ; i++ ){ - hash_tab[i].val = nodes_id[i]; - hash_tab[i].key = genrand_int32(); - } - - qsort(hash_tab,N,sizeof(hash_t),hash_asc); - for( i = 0 ; i < N ; i++ ) - sol[i] = hash_tab[i].val; - - FREE(hash_tab); - return sol; -} + return new_affinity_mat(mat, sum_row, order); -double eval_sol(int *sol,int N,double **comm, double **arch) -{ - double a,c,res; - int i,j; - - res = 0; - for ( i = 0 ; i < N ; i++ ) - for ( j = i+1 ; j < N ; j++ ){ - c = comm[i][j]; - a = arch[sol[i]][sol[j]]; - res += c/a; - } - - return res; -} - -double eval_sol_inv(int *sol,int N,double **comm, double **arch) -{ - double a,c,res; - int i,j; - - res = 0; - for ( i = 0 ; i < N ; i++ ) - for ( j = i+1 ; j < N ; j++ ){ - c = comm[i][j]; - a = arch[sol[i]][sol[j]]; - res += c*a; - } - - return res; -} - -void exchange(int *sol,int i,int j) -{ - int tmp; - tmp = sol[i]; - sol[i] = sol[j]; - sol[j] = tmp; -} - -double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch) -{ - double eval2; - if( l == m ) - return 0; - exchange(sol,l,m); - eval2 = eval_sol(sol,N,comm,arch); - exchange(sol,l,m); - - return eval1-eval2; -} - -void select_max(int *l,int *m,double **gain,int N,int *state) -{ - double max; - int i,j; - - max = -DBL_MAX; - - for( i = 0 ; i < N ; i++ ) - if(!state[i]) - for( j = 0 ; j < N ; j++ ) - if( (i != j) && (!state[j]) ){ - if(gain[i][j] > max){ - *l = i; - *m = j; - max=gain[i][j]; - } - } -} - -void compute_gain(int *sol,int N,double **gain,double **comm, double **arch) -{ - double eval1; - int i,j; - - eval1 = eval_sol(sol,N,comm,arch); - for( i = 0 ; i < N ; i++ ) - for( j = 0 ; j <= i ; j++) - gain[i][j] = gain[j][i] = gain_exchange(sol,i,j,eval1,N,comm,arch); } -/* Randomized Algorithm of -Hu Chen, Wenguang Chen, Jian Huang ,Bob Robert,and H.Kuhn. Mpipp: an automatic profile-guided -parallel process placement toolset for smp clusters and multiclusters. In -Gregory K. Egan and Yoichi Muraoka, editors, ICS, pages 353-360. ACM, 2006. - */ -void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *Value,double **comm, double **arch) -{ - int *sol = NULL; - int *state = NULL; - double **gain = NULL; - int **history = NULL; - double *temp = NULL; - int i,j,t,l=0,m=0,seed=0; - double max,sum,best_eval,eval; - gain = (double**)MALLOC(sizeof(double*)*N); - history = (int**)MALLOC(sizeof(int*)*N); - for( i = 0 ; i < N ; i++){ - gain[i] = (double*)MALLOC(sizeof(double)*N); - history[i] = (int*)MALLOC(sizeof(int)*3); - } - - state = (int*)MALLOC(sizeof(int)*N); - temp = (double*)MALLOC(sizeof(double)*N); - - sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++); - for( i = 0 ; i < N ; i++) - Value[i] = sol[i]; - - best_eval = DBL_MAX; - while(seed <= nb_seed){ - do{ - for( i = 0 ; i < N ; i++ ){ - state[i] = 0; - /* printf("%d ",sol[i]); */ - } - /* printf("\n"); */ - compute_gain(sol,N,gain,comm,arch); - /* - display_tab(gain,N); - exit(-1); - */ - for( i = 0 ; i < N/2 ; i++ ){ - select_max(&l,&m,gain,N,state); - /* printf("%d: %d <=> %d : %f\n",i,l,m,gain[l][m]); */ - state[l] = 1; - state[m] = 1; - exchange(sol,l,m); - history[i][1] = l; - history[i][2] = m; - temp[i] = gain[l][m]; - compute_gain(sol,N,gain,comm,arch); - } - - t = -1; - max = 0; - sum = 0; - for(i = 0 ; i < N/2 ; i++ ){ - sum += temp[i]; - if( sum > max ){ - max = sum; - t = i; - } - } - /*for(j=0;j<=t;j++) - printf("exchanging: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */ - for( j = t+1 ; j < N/2 ; j++ ){ - exchange(sol,history[j][1],history[j][2]); - /* printf("Undoing: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */ - } - /* printf("max=%f\n",max); */ - - /*for(i=0;i 0 ); - - FREE(sol); - sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++); - } - FREE(sol); - FREE(temp); - FREE(state); - for( i = 0 ; i < N ; i++){ - FREE(gain[i]); - FREE(history[i]); - } - FREE(gain); - FREE(history); -} - -/* void map_tree(tree_t* t1,tree_t *t2) */ +/* void map_tree(tm_tree_t* t1,tm_tree_t *t2) */ /* { */ /* double x1,x2; if((!t1->left)&&(!t1->right)){ printf ("%d -> %d\n",t1->id,t2->id); - Value[t2->id]=t1->id; + sigma[t2->id]=t1->id; return; } x1=t2->right->val/t1->right->val+t2->left->val/t1->left->val; @@ -596,7 +254,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *Value,double **com }*/ /* } */ -void depth_first(tree_t *comm_tree, int *proc_list,int *i) +void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i) { int j; if(!comm_tree->child){ @@ -608,7 +266,7 @@ void depth_first(tree_t *comm_tree, int *proc_list,int *i) depth_first(comm_tree->child[j],proc_list,i); } -int nb_leaves(tree_t *comm_tree) +int nb_leaves(tm_tree_t *comm_tree) { int j,n=0; @@ -621,249 +279,143 @@ int nb_leaves(tree_t *comm_tree) return n; } +/* find the first '-1 in the array of size n and put the value there*/ +void set_val(int *tab, int val, int n){ + int i = 0; + while (i < n ){ + if(tab[i] ==- 1){ + tab[i] = val; + return; + } + i++; + } + + if(tm_get_verbose_level() >= CRITICAL){ + fprintf(stderr,"Error while assigning value %d to k\n",val); + } + + exit(-1); + +} /*Map topology to cores: sigma_i is such that process i is mapped on core sigma_i k_i is such that core i exectutes process k_i size of sigma is the number of process "nb_processes" - size of k is the number of cores/nodes "topology->nb_nodes[level]" + size of k is the number of cores/nodes "nb_compute_units" We must have numbe of process<=number of cores k_i =-1 if no process is mapped on core i */ -void map_topology(tm_topology_t *topology,tree_t *comm_tree,int nb_compute_units, - int level,int *sigma, int nb_processes, int *k) +void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level, + int *sigma, int nb_processes, int **k, int nb_compute_units) { - int *nodes_id = NULL; - int *proc_list = NULL; - int i,N,M,block_size; - unsigned int vl = get_verbose_level(); + int *nodes_id = NULL; + int *proc_list = NULL; + int i,j,N,M,block_size; - M = nb_leaves(comm_tree); - nodes_id = topology->node_id[level]; - N = topology->nb_nodes[level]; + unsigned int vl = tm_get_verbose_level(); + M = nb_leaves(comm_tree); + nodes_id = topology->node_id[level]; + N = topology->nb_nodes[level]; - if(vl >= INFO){ - printf("nb_leaves=%d\n",M); - printf("level=%d, nodes_id=%p, N=%d\n",level,(void *)nodes_id,N); - printf("N=%d,nb_compute_units=%d\n",N,nb_compute_units); - } - - /* The number of node at level "level" in the tree should be equal to the number of processors*/ - assert(N==nb_compute_units); - - proc_list = (int*)MALLOC(sizeof(int)*M); - i = 0; - depth_first(comm_tree,proc_list,&i); - - if(vl >= DEBUG) - for(i=0;i= INFO) - printf("M=%d, N=%d, BS=%d\n",M,N,block_size); - for( i = 0 ; i < nb_processing_units(topology) ; i++ ) - k[i] = -1; - - for( i = 0 ; i < M ; i++ ) - if(proc_list[i] != -1){ - if(vl >= DEBUG) - printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]); - - if( proc_list[i] < nb_processes ){ - sigma[proc_list[i]] = nodes_id[i/block_size]; - k[nodes_id[i/block_size]] = proc_list[i]; - } - } - }else{ - if(vl >= INFO) - printf("M=%d, N=%d, BS=%d\n",M,N,block_size); - for( i = 0 ; i < M ; i++ ) - if(proc_list[i] != -1){ - if(vl >= DEBUG) - printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]); - if( proc_list[i] < nb_processes ) - sigma[proc_list[i]] = nodes_id[i/block_size]; - } - } - - if((vl >= DEBUG) && (k)){ - printf("k: "); - for( i = 0 ; i < nb_processing_units(topology) ; i++ ) - printf("%d ",k[i]); - printf("\n"); - } - - - FREE(proc_list); -} - -void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma, int nb_processes, int *k) -{ - map_topology(topology,comm_tree,topology->nb_nodes[topology->nb_levels-1], - topology->nb_levels-1,sigma,nb_processes,k); -} - -int int_cmp(const void* x1,const void* x2) -{ - int *e1 = NULL,*e2= NULL; - - e1 = ((int *)x1); - e2 = ((int *)x2); - - return ((*e1) > (*e2)) ? -1 : 1; -} - - -int decompose(int n,int optimize,int *tab) -{ - int primes[6] = {2,3,5,7,11,0}; - int i = 0,j = 1,flag = 2; - unsigned int vl = get_verbose_level(); - - while( primes[i] && (n!=1) ){ - /* printf("[%d] before=%d\n",primes[i],n); */ - if( flag && optimize && (n%primes[i]!= 0) ){ - n += primes[i] - n%primes[i]; - flag--; - i = 0; - continue; - } - /* printf("after=%d\n",n); */ - if( n%primes[i] == 0 ){ - tab[j++] = primes[i]; - n /= primes[i]; - }else{ - i++; - flag = 1; - } - } - if( n != 1 ) - tab[j++] = n; - - qsort(tab+1,j-1,sizeof(int),int_cmp); - - if(vl >= DEBUG){ - for( i = 0 ; i < j ; i++ ) - printf("%d:",tab[i]); - printf("\n"); + if(vl >= INFO){ + printf("nb_leaves=%d\n",M); + printf("level=%d, nodes_id=%p, N=%d\n",level,(void *)nodes_id,N); + printf("N=%d,nb_compute_units=%d\n",N,nb_compute_units); } - tab[j] = 0; + /* The number of node at level "level" in the tree should be equal to the number of processors*/ + assert(N==nb_compute_units*topology->oversub_fact); - return (j+1); -} + proc_list = (int*)MALLOC(sizeof(int)*M); + i = 0; + depth_first(comm_tree,proc_list,&i); + block_size = M/N; -tree_t *build_synthetic_topology_old(int *synt_tab,int id,int depth,int nb_levels) -{ - tree_t *res = NULL,**child = NULL; - int arity = synt_tab[0]; - int val,i; + if(k){/*if we need the k vector*/ + if(vl >= INFO) + printf("M=%d, N=%d, BS=%d\n",M,N,block_size); + for( i = 0 ; i < nb_processing_units(topology) ; i++ ) + for(j = 0 ; j < topology->oversub_fact ; j++){ + k[i][j] = -1; + } - res = (tree_t*)MALLOC(sizeof(tree_t)); - val = 0; - if(depth >= nb_levels) - child = NULL; - else{ - child = (tree_t**)MALLOC(sizeof(tree_t*)*arity); - for( i = 0 ; i < arity ; i++ ){ - child[i] = build_synthetic_topology_old(synt_tab+1,i,depth+1,nb_levels); - child[i]->parent = res; - val += child[i]->val; + for( i = 0 ; i < M ; i++ ) + if(proc_list[i] != -1){ + if(vl >= DEBUG) + printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]); + + if( proc_list[i] < nb_processes ){ + sigma[proc_list[i]] = nodes_id[i/block_size]; + set_val(k[nodes_id[i/block_size]], proc_list[i], topology->oversub_fact); + } + } + }else{ + if(vl >= INFO) + printf("M=%d, N=%d, BS=%d\n",M,N,block_size); + for( i = 0 ; i < M ; i++ ) + if(proc_list[i] != -1){ + if(vl >= DEBUG) + printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]); + if( proc_list[i] < nb_processes ) + sigma[proc_list[i]] = nodes_id[i/block_size]; + } + } + + if((vl >= DEBUG) && (k)){ + printf("k: "); + for( i = 0 ; i < nb_processing_units(topology) ; i++ ){ + printf("Procesing unit %d: ",i); + for (j = 0 ; joversub_fact; j++){ + if( k[i][j] == -1) + break; + printf("%d ",k[i][j]); + } + printf("\n"); } } - set_node(res,child,arity,NULL,id,val+speed(depth),child[0],depth); - return res; + + FREE(proc_list); } -void display_topology(tm_topology_t *topology) +tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree) { - int i,j; + size_t i; + tm_solution_t *solution; + int *sigma, **k; + size_t sigma_length = comm_tree->nb_processes; + size_t k_length = nb_processing_units(topology); - for( i = 0 ; i < topology->nb_levels ; i++ ){ - printf("%d: ",i); - for( j = 0 ; j < topology->nb_nodes[i] ; j++) - printf("%d ",topology->node_id[i][j]); - printf("\n"); + solution = (tm_solution_t *)MALLOC(sizeof(tm_solution_t)); + sigma = (int*) MALLOC(sizeof(int) * sigma_length); + k = (int**) MALLOC(sizeof(int*) * k_length); + for (i=0 ; i < k_length ; i++){ + k[i] = (int*) MALLOC(sizeof(int) * topology->oversub_fact); } -} -/* - Build a synthetic balanced topology + map_topology(topology, comm_tree, topology->nb_levels-1, sigma, sigma_length ,k, k_length); - arity : array of arity of the first nb_level (of size nb_levels-1) - core_numbering: numbering of the core by the system. Array of size nb_core_per_node + solution->sigma = sigma; + solution->sigma_length = sigma_length; + solution->k = k; + solution->k_length = k_length; + solution->oversub_fact = topology->oversub_fact; - nb_core_per_nodes: number of cores of a given node - - The numbering of the cores is done in round robin fashion after a width traversal of the topology - */ - -tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes) -{ - tm_topology_t *topology = NULL; - int i,j,n = 1; - - topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); - topology->arity = (int*)MALLOC(sizeof(int)*nb_levels); - memcpy(topology->arity,arity,sizeof(int)*nb_levels); - topology->nb_levels = nb_levels; - - topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels); - topology->nb_nodes = (int*)MALLOC(sizeof(int)*topology->nb_levels); - - for( i = 0 ; i < topology->nb_levels ; i++ ){ - topology->nb_nodes[i] = n; - topology->node_id[i] = (int*)MALLOC(sizeof(int)*n); - if( i < topology->nb_levels-1) - for( j = 0 ; j < n ; j++ ) - topology->node_id[i][j] = j; - else - for( j = 0 ; j < n ; j++ ) - topology->node_id[i][j] = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes); - - n *= topology->arity[i]; - } - return topology; + return solution; } -void build_synthetic_proc_id(tm_topology_t *topology) -{ - int i; - size_t j,n = 1; - - topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels); - topology->nb_nodes = (int*)MALLOC(sizeof(int)*topology->nb_levels); - - for( i = 0 ; i < topology->nb_levels ; i++ ){ - /* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */ - topology->nb_nodes[i] = n; - topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n); - if ( !topology->node_id[i] ){ - if(get_verbose_level() >= CRITICAL) - fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n); - exit(-1); - } - for( j = 0 ; j < n ; j++ ) - topology->node_id[i][j] = j; - n *= topology->arity[i]; - } -} void update_comm_speed(double **comm_speed,int old_size,int new_size) { double *old_tab = NULL,*new_tab= NULL; int i; - unsigned int vl = get_verbose_level(); + unsigned int vl = tm_get_verbose_level(); if(vl >= DEBUG) printf("comm speed [%p]: ",(void *)*comm_speed); @@ -886,260 +438,9 @@ void update_comm_speed(double **comm_speed,int old_size,int new_size) } -/* d: size of comm_speed */ -void TreeMatchMapping(int nb_obj, int nb_proc, double **comm_mat, double *obj_weight, double * comm_speed, int d, int *sol) -{ - tree_t *comm_tree = NULL; - tm_topology_t *topology= NULL; - double duration; - int i; - unsigned int vl = get_verbose_level(); - - TIC; - - for( i = 0 ; i < nb_obj ; i++ ){ - sol[i] = i; - /* printf("%f ",obj_weight[i]); */ - } - /* - printf("\n"); - return; - */ - - topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); - topology->arity = (int*)MALLOC(sizeof(int)*MAX_LEVELS); - topology->arity[0] = nb_proc; - topology->nb_levels = decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity); - if(vl >= INFO) - printf("Topology nb levels=%d\n",topology->nb_levels); - build_synthetic_proc_id(topology); - - if(topology->nb_levels > d) - update_comm_speed(&comm_speed,d,topology->nb_levels); - - /* - exit(-1); - topology_to_arch(topology); - - display_tab(arch,hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PROC)); - display_tab(arch,96); - exit(-1); - int nb_core=topo_nb_proc(topology,1000); - - display_tab(comm_mat,N); - */ - - TIC; - comm_tree = build_tree_from_topology(topology,comm_mat,nb_obj,obj_weight,comm_speed); - if(vl >= INFO) - printf("Tree building time=%f\n",TOC); - TIC; - map_topology(topology,comm_tree,nb_proc,1,sol,nb_obj,NULL); - if(vl >= INFO) - printf("Topology mapping time=%f\n",TOC); - - if(topology->nb_levels > d) - FREE(comm_speed); - - FREE_topology(topology); - FREE_tree(comm_tree); - - duration=TOC; - if(vl >= INFO) - printf("-------------- Mapping done in %.4fs!\n",duration); -} - -void display_other_heuristics(tm_topology_t *topology,int N,double **comm,int TGT_flag, int *constraints, double *cost) -{ - int *sol = NULL; - - sol = (int*)MALLOC(sizeof(int)*N); - - map_Packed(topology,N,sol); - printf("Packed: "); - if (TGT_flag == 1) - print_sol_inv(N,sol,comm,cost, topology); - else - print_sol(N,sol,comm,cost, topology); - - map_RR(N,sol,constraints); - printf("RR: "); - if (TGT_flag == 1) - print_sol_inv(N,sol,comm, cost, topology); - else - print_sol(N,sol,comm, cost, topology); - -/* double duration; */ -/* CLOCK_T time1,time0; */ -/* CLOCK(time0); */ -/* map_MPIPP(topology,1,N,sol,comm,arch); */ -/* CLOCK(time1); */ -/* duration=CLOCK_DIFF(time1,time0); */ -/* printf("MPIPP-1-D:%f\n",duration); */ -/* printf("MPIPP-1: "); */ -/* if (TGT_flag == 1) */ -/* print_sol_inv(N,sol,comm,arch); */ -/* else */ -/* print_sol(N,sol,comm,arch); */ - -/* CLOCK(time0); */ -/* map_MPIPP(topology,5,N,sol,comm,arch); */ -/* CLOCK(time1); */ -/* duration=CLOCK_DIFF(time1,time0); */ -/* printf("MPIPP-5-D:%f\n",duration); */ -/* printf("MPIPP-5: "); */ -/* if (TGT_flag == 1) */ -/* print_sol_inv(N,sol,comm,arch); */ -/* else */ -/* print_sol(N,sol,comm,arch); */ - - FREE(sol); -} - -void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes) -{ - int nb_levels; - unsigned int vl = get_verbose_level(); - - nb_levels = topology->nb_levels; - *nb_nodes = topology->nb_nodes[nb_levels-1]; - if(vl >= INFO) - printf("nb_nodes=%d\n",*nb_nodes); - *numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes)); - memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes)); -} - -void topology_arity(tm_topology_t *topology,int **arity,int *nb_levels) -{ - *nb_levels = topology->nb_levels; - *arity = (int*)MALLOC(sizeof(int)*(*nb_levels)); - memcpy(*arity,topology->arity,sizeof(int)*(*nb_levels)); -} - -void optimize_arity(int **arity, int *nb_levels,int n) -{ - int a,i; - int *new_arity = NULL; - - if( n < 0 ) - return; - /* printf("n=%d\tnb_levels=%d\n",n,*nb_levels); */ - /* for(i=0;i<*nb_levels;i++) */ - /* printf("%d:",(*arity)[i]); */ - /* printf("\n"); */ - /* if(n==(*nb_levels)-3) */ - /* exit(-1); */ - a = (*arity)[n]; - if( (a%3 == 0) && (a > 3) ){ - /* - check if the a rity of level n devides 3 - If this is the case: - Add a level - */ - (*nb_levels)++; - /* Build a new arity array */ - new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels)); - /* Copy the begining if the old array */ - for( i = 0 ; i < n ; i++) - new_arity[i] = (*arity)[i]; - /* set the nth level to arity 3 */ - new_arity[n] = 3; - /* printf("a=%d\n",a); */ - /* Set the (n+1) level to arity a/3 */ - new_arity[n+1] = a/3; - /* Copy the end of the array */ - for( i = n+2 ; i < *nb_levels ; i++) - new_arity[i] = (*arity)[i-1]; - FREE(*arity); - /* if a/3 =3 then go to the next level */ - if(new_arity[n+1] == 3) - optimize_arity(&new_arity,nb_levels,n); - else /* continue to this level (remember we just add a new level */ - optimize_arity(&new_arity,nb_levels,n+1); - *arity=new_arity; - }else if( (a%2==0) && (a>2) ){/* same as above but for arity == 2 instead of 3 */ - (*nb_levels)++; - new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels)); - for( i = 0 ; i < n ; i++ ) - new_arity[i] = (*arity)[i]; - new_arity[n] = 2; - /* printf("a=%d\n",a); */ - new_arity[n+1] = a/2; - for( i = n+2 ; i < *nb_levels ; i++ ) - new_arity[i] = (*arity)[i-1]; - FREE(*arity); - if(new_arity[n+1] == 2) - optimize_arity(&new_arity,nb_levels,n); - else - optimize_arity(&new_arity,nb_levels,n+1); - *arity = new_arity; - }else /* if nothing works go to next level. */ - optimize_arity(arity,nb_levels,n-1); -} -tm_topology_t *optimize_topology(tm_topology_t *topology){ - int *arity = NULL,nb_levels; - int *numbering = NULL,nb_nodes; - tm_topology_t *new_topo; - - topology_arity(topology,&arity,&nb_levels); - /* printf("nb_levels=%d\n",nb_levels); */ - /* for(i=0;inb_levels-1) - res *= topology->arity[depth++]; - - return res; -} - - - -/* return the indice of the greatest element of tab slower than val - tab needs to be sorted in increasing order*/ -int get_indice(int *tab, int n, int val) -{ - int i = 0, j = n-1, k; - - if( tab[n-1] < val ) - return n-1; - - while( i != j){ - k = (i+j)/2; - if( (tab[k]length > e2->length) ? -1 : 1; -} - - -/* display function*/ -void display_contsraint_tab(constraint_t *const_tab, int n) -{ - int i; - for( i = 0; i < n; i++ ) { - printf("tab %d:",i); - print_1D_tab(const_tab[i].constraints, const_tab[i].length); - } -} - - -/* - We shift perm in new_perm and then copy back - perm is decomposed in m part of size 'size' - - in part k of new_perm we copy part constratint[k].id -*/ - -void update_perm(int *perm, int n, constraint_t *const_tab, int m, int size) -{ - int k; - int *new_perm = NULL; - - if( n <= 1 ) - return; - - new_perm = (int*)MALLOC(sizeof(int)*n); - - for ( k = 0 ; k < m ; k++ ) - memcpy(new_perm+k*size,perm+const_tab[k].id*size,size*sizeof(int)); - - memcpy(perm,new_perm,n*sizeof(int)); - /*printf("perm:");print_1D_tab(perm,n);*/ - - FREE(new_perm); -} - - - -/* we are at a given subtree of depth depth of the topology - the mapping constraints are in the table constraints of size n - The value of constraints are between 0 and the number of leaves-1 of the current subtree - - Canonical is the output of the function and is a just a renumbering of constraints in the canonical way - perm is a way to go from canonical[i] to the corresponding constraints[k]: perm[canonical[i]]=constraints[k] -*/ - -void recursive_canonicalization(int depth, tm_topology_t *topology, int *constraints, int *canonical, int *perm, int n, int m) -{ - constraint_t *const_tab = NULL; - int nb_leaves,nb_subtrees; - int k, prec, start, end; - - /* if there is no constraints stop and return*/ - if( !constraints ){ - assert( n == 0 ); - return; - } - - /* if we are at teh bottom of the tree set canonical to the 0 value: it will be shifted by update_canonical - and return*/ - if ( depth == topology->nb_levels ){ - assert( n==1 ); - canonical[0] = 0; - return; - } - - /* compute in how many subtrees we need to devide the curret one*/ - nb_subtrees = topology->arity[depth]; - /* construct a tab of constraints of this size*/ - const_tab = (constraint_t *) MALLOC( nb_subtrees * sizeof(constraint_t) ); - - /*printf("tab (%d):",nb_subtrees,n);print_1D_tab(constraints,n);*/ - /* nb_leaves is the number of leaves of the current subtree - this will help to detremine where to split constraints and how to shift values - */ - nb_leaves = compute_nb_leaves_from_level( depth + 1, topology ); - - /* split the constraints into nb_subtrees sub-constraints - each sub-contraints k contains constraints of value in [k*nb_leaves,(k+1)*nb_leaves[ - */ - start = 0; - for(k = 0; k < nb_subtrees; k++){ - /*returns the indice in contsraints that contains the smallest value not copied - end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/ - end=fill_tab(&(const_tab[k].constraints), constraints, n,start, (k+1) * nb_leaves, k * nb_leaves); - const_tab[k].length = end-start; - const_tab[k].id = k; - start = end; - } - - /* sort constraint tab such that subtrees with the largest number of - constraints are put on the left and managed first, this how we canonize subtrees*/ - qsort(const_tab, nb_subtrees, sizeof(constraint_t), constraint_dsc); - /*display_contsraint_tab(const_tab,nb_subtrees);*/ - - /* update perm such taht we can backtrack the changes between constraints and caononical - To go from canonical[i] to the corresponding constraints[k] perm is such that perm[canonical[i]]=constraints[k]*/ - update_perm(perm, m, const_tab, nb_subtrees, nb_leaves); - - /* recursively call each subtree*/ - prec = 0; - for(k = 0; k < nb_subtrees; k++){ - /* the tricky part is here : we send only a subtab of canonical that will be updated recursively - This will greatly simplify the merging*/ - recursive_canonicalization(depth+1, topology, const_tab[k].constraints, canonical+prec, perm+k*nb_leaves, - const_tab[k].length, nb_leaves); - prec += const_tab[k].length; - } - - /* merging consist only in shifting the right part of canonical*/ - start = const_tab[0].length; - for( k = 1; k < nb_subtrees ; k++){ - update_canonical(canonical, start, start+const_tab[k].length, k * nb_leaves); - start += const_tab[k].length; - } - - /* FREE local subconstraints*/ - for( k = 0; k < nb_subtrees; k++ ) - if(const_tab[k].length) - FREE(const_tab[k].constraints); - - FREE(const_tab); -} - -/* - shuffle the constraints such that for each node there are more constraints on the left subtree than on the right subtree - - This is required to avoid handling permutations. On a 2:2:2:2 tree, if the - contraints are (0,1,3), it is equivalent to (0,1,2) The canonical form is the - second one. This help to handle the case (0,6,7,9,11,13,14,15) which are - symetric constaints and for which the canonical form is (0,1,2,4,6,8,9,12)) - - - - We store in *perm the way to go from the canonical form to the original constraints. - perm is a way to go from canonical[i] to the corresponding constraints[k]: perm[canonical[i]]=constraints[k] - */ -void canonize_constraints(tm_topology_t *topology, int *constraints, int **canonical, int n, int **perm, int *m) -{ - int *p = NULL, *c = NULL; - int i; - unsigned int vl = get_verbose_level(); - - *m = compute_nb_leaves_from_level(0,topology); - - p = (int*) MALLOC(sizeof(int)*(*m)); - for( i = 0 ; i < *m ; i++ ) - p[i] = i; - - c = (int*) MALLOC(sizeof(int)*n); - - if(vl>=DEBUG){ - printf("constraints:"); - print_1D_tab(constraints, n); - } - - recursive_canonicalization(0, topology, constraints, c, p, n, *m); - - if(vl>=DEBUG){ - printf("canonical:"); - print_1D_tab(c, n); - printf("perm:"); - print_1D_tab(p, *m); - } - - *perm = p; - *canonical = c; -} diff --git a/ompi/mca/topo/treematch/treematch/tm_mapping.h b/ompi/mca/topo/treematch/treematch/tm_mapping.h index 0068184b56..391073b6ae 100644 --- a/ompi/mca/topo/treematch/treematch/tm_mapping.h +++ b/ompi/mca/topo/treematch/treematch/tm_mapping.h @@ -1,43 +1,34 @@ +#ifndef __TM_MAPPING_H__ +#define __TM_MAPPING_H__ #include "tm_tree.h" -#include "tm_hwloc.h" +#include "tm_topology.h" #include "tm_timings.h" #include "tm_verbose.h" -int build_comm(char *filename,double ***pcomm); -void TreeMatchMapping(int nb_obj, int nb_proc,double **comm_mat, double * obj_weigth, double *com_speed, int d, int *sol); - -/*Map topology to cores: - sigma_i is such that process i is mapped on core sigma_i - k_i is such that core i exectutes process k_i - - size of sigma is the number of process (nb_objs) - size of k is the number of cores/nodes (nb_proc) - - We must have numbe of process<=number of cores - - k_i =-1 if no process is mapped on core i -*/ -void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma, int nb_processes, int *k); - -int nb_processing_units(tm_topology_t *topology); -void free_topology(tm_topology_t *topology); -void display_other_heuristics(tm_topology_t *topology,int N,double **comm,int TGT_flag, int *constraints, double *cost); -void print_1D_tab(int *tab,int N); +tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order); void build_synthetic_proc_id(tm_topology_t *topology); -void display_topology(tm_topology_t *topology); -tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_node); -tm_topology_t *optimize_topology(tm_topology_t *topology); -double print_sol_inv(int N,int *Value,double **comm, double *cost, tm_topology_t *topology); -double print_sol(int N,int *Value,double **comm, double *cost, tm_topology_t *topology); -int build_binding_constraints(char *filename, int **ptab); -void canonize_constraints(tm_topology_t *topology, int *constraints, int **canonical, int n, int **perm, int *m); +tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes); int compute_nb_leaves_from_level(int depth,tm_topology_t *topology); -void FREE_topology(tm_topology_t *); - +void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i); +int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift); +void init_mat(char *filename,int N, double **mat, double *sum_row); +void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level, + int *sigma, int nb_processes, int **k, int nb_compute_units); +int nb_leaves(tm_tree_t *comm_tree); +int nb_lines(char *filename); +int nb_processing_units(tm_topology_t *topology); +void print_1D_tab(int *tab,int N); +tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree); +void tm_finalize(); +void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat); +tm_affinity_mat_t *tm_load_aff_mat(char *filename); +void update_comm_speed(double **comm_speed,int old_size,int new_size); /* use to split a constaint into subconstraint according the tree*/ -typedef struct _constraint{ +typedef struct{ int *constraints; /* the subconstraints*/ int length; /*length of *constraints*/ int id; /* id of the corresponding subtree*/ }constraint_t; + +#endif diff --git a/ompi/mca/topo/treematch/treematch/tm_mt.h b/ompi/mca/topo/treematch/treematch/tm_mt.h index 260067d514..58f50d8f50 100644 --- a/ompi/mca/topo/treematch/treematch/tm_mt.h +++ b/ompi/mca/topo/treematch/treematch/tm_mt.h @@ -2,8 +2,7 @@ void init_genrand(unsigned long s); void init_by_array(unsigned long init_key[], int key_length); /* generates a random number on the interval [0,0x7fffffff] */ -unsigned long genrand_int32(void); - +unsigned long genrand_int32(void); long genrand_int31(void); double genrand_real1(void); double genrand_real2(void); diff --git a/ompi/mca/topo/treematch/treematch/tm_solution.c b/ompi/mca/topo/treematch/treematch/tm_solution.c new file mode 100644 index 0000000000..ee741d73be --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/tm_solution.c @@ -0,0 +1,525 @@ +#include +#include +#include "tm_solution.h" +#include "tm_mt.h" +#include "tm_mapping.h" + +typedef struct { + int val; + long key; +} hash_t; + + + +void tm_free_solution(tm_solution_t *sol); +int distance(tm_topology_t *topology,int i, int j); +double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma); + double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric); +double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, + tm_metric_t metric); +void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric); +int in_tab(int *tab, int n, int val); +void map_Packed(tm_topology_t *topology, int N, int *sigma); +void map_RR(tm_topology_t * topology, int N, int *sigma); +int hash_asc(const void* x1,const void* x2); +int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed); +double eval_sol(int *sol,int N,double **comm, double **arch); +void exchange(int *sol,int i,int j); +double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch); +void select_max(int *l,int *m,double **gain,int N,int *state); +void compute_gain(int *sol,int N,double **gain,double **comm, double **arch); +void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch); + + +void tm_free_solution(tm_solution_t *sol){ + int i,n; + + n = sol->k_length; + + if(sol->k) + for(i=0 ; ik[i]); + + FREE(sol->k); + FREE(sol->sigma); + FREE(sol); +} + +/* + Compute the distance in the tree + between node i and j : the farther away node i and j, the + larger the returned value. + + The algorithm looks at the largest level, starting from the top, + for which node i and j are still in the same subtree. This is done + by iteratively dividing their numbering by the arity of the levels +*/ +int distance(tm_topology_t *topology,int i, int j) +{ + int level = 0; + int arity; + int f_i, f_j ; + int vl = tm_get_verbose_level(); + int depth = topology->nb_levels-1; + + f_i = topology->node_rank[depth][i]; + f_j = topology->node_rank[depth][j]; + + if(vl >= DEBUG) + printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j); + + + do{ + level++; + arity = topology->arity[level]; + if( arity == 0 ) + arity = 1; + f_i = f_i/arity; + f_j = f_j/arity; + } while((f_i!=f_j) && (level < depth)); + + if(vl >= DEBUG) + printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level); + /* exit(-1); */ + return level; +} + +double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma) +{ + double a,c,sol; + int i,j; + double *cost = topology->cost; + double **mat = aff_mat->mat; + int N = aff_mat->order; + int depth = topology->nb_levels - 1; + + + sol = 0; + for ( i = 0 ; i < N ; i++ ) + for ( j = i+1 ; j < N ; j++){ + c = mat[i][j]; + /* + Compute cost in funvtion of the inverse of the distance + This is due to the fact that the cost matrix is numbered + from top to bottom : cost[0] is the cost of the longest distance. + */ + a = cost[depth-distance(topology,sigma[i],sigma[j])]; + if(tm_get_verbose_level() >= DEBUG) + printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a); + sol += c*a; + } + + for (i = 0; i < N; i++) { + printf("%d", sigma[i]); + if(icost; + double **mat = aff_mat->mat; + int N = aff_mat->order; + int vl = tm_get_verbose_level(); + int depth = topology->nb_levels - 1; + + sol = 0; + for ( i = 0 ; i < N ; i++ ) + for ( j = i+1 ; j < N ; j++){ + c = mat[i][j]; + /* + Compute cost in funvtion of the inverse of the distance + This is due to the fact that the cost matrix is numbered + from top to bottom : cost[0] is the cost of the longest distance. + */ + a = cost[depth-distance(topology,sigma[i],sigma[j])]; + if(vl >= DEBUG) + printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a); + if(c*a > sol) + sol = c*a; + } + + for (i = 0; i < N; i++) { + printf("%d", sigma[i]); + if(imat; + int N = aff_mat->order; + + sol = 0; + for ( i = 0 ; i < N ; i++ ) + for ( j = i+1 ; j < N ; j++){ + c = mat[i][j]; + nb_hops = 2*distance(topology,sigma[i],sigma[j]); + if(tm_get_verbose_level() >= DEBUG) + printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops); + sol += c*nb_hops; + } + + for (i = 0; i < N; i++) { + printf("%d", sigma[i]); + if(i= ERROR){ + fprintf(stderr,"Error printing solution: metric %d not implemented\n",metric); + return -1; + } + } + return -1; +} + +double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, + tm_metric_t metric){ + + int i,j; + int **k = sol->k; + + + if(tm_get_verbose_level() >= DEBUG){ + printf("k: \n"); + for( i = 0 ; i < nb_processing_units(topology) ; i++ ){ + if(k[i][0] != -1){ + printf("\tProcessing unit %d: ",i); + for (j = 0 ; joversub_fact; j++){ + if( k[i][j] == -1) + break; + printf("%d ",k[i][j]); + } + printf("\n"); + } + } + } + + + return display_sol(topology, aff_mat, sol->sigma, metric); +} + +void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric) +{ + int *sigma = NULL; + int N = aff_mat->order; + + sigma = (int*)MALLOC(sizeof(int)*N); + + map_Packed(topology, N, sigma); + printf("Packed: "); + display_sol(topology, aff_mat, sigma, metric); + + map_RR(topology, N, sigma); + printf("RR: "); + display_sol(topology, aff_mat, sigma, metric); + +/* double duration; */ +/* CLOCK_T time1,time0; */ +/* CLOCK(time0); */ +/* map_MPIPP(topology,1,N,sigma,comm,arch); */ +/* CLOCK(time1); */ +/* duration=CLOCK_DIFF(time1,time0); */ +/* printf("MPIPP-1-D:%f\n",duration); */ +/* printf("MPIPP-1: "); */ +/* if (TGT_flag == 1) */ +/* print_sigma_inv(N,sigma,comm,arch); */ +/* else */ +/* print_sigma(N,sigma,comm,arch); */ + +/* CLOCK(time0); */ +/* map_MPIPP(topology,5,N,sigma,comm,arch); */ +/* CLOCK(time1); */ +/* duration=CLOCK_DIFF(time1,time0); */ +/* printf("MPIPP-5-D:%f\n",duration); */ +/* printf("MPIPP-5: "); */ +/* if (TGT_flag == 1) */ +/* print_sigma_inv(N,sigma,comm,arch); */ +/* else */ +/* print_sigma(N,sigma,comm,arch); */ + + FREE(sigma); +} + + +int in_tab(int *tab, int n, int val){ + int i; + for( i = 0; i < n ; i++) + if(tab[i] == val) + return 1; + + return 0; +} + +void map_Packed(tm_topology_t *topology, int N, int *sigma) +{ + size_t i; + int j = 0,depth; + int vl = tm_get_verbose_level(); + + depth = topology->nb_levels-1; + + for( i = 0 ; i < topology->nb_nodes[depth] ; i++){ + /* printf ("%d -> %d\n",objs[i]->os_index,i); */ + if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){ + if(vl >= DEBUG) + printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]); + sigma[j++]=topology->node_id[depth][i]; + if(j == N) + break; + } + } +} + +void map_RR(tm_topology_t *topology, int N,int *sigma) +{ + int i; + int vl = tm_get_verbose_level(); + + for( i = 0 ; i < N ; i++ ){ + if(topology->constraints) + sigma[i]=topology->constraints[i%topology->nb_constraints]; + else + sigma[i]=i%topology->nb_proc_units; + if(vl >= DEBUG) + printf ("%d -> %d (%d)\n",i,sigma[i],topology->nb_proc_units); + } +} + +int hash_asc(const void* x1,const void* x2) +{ + hash_t *e1 = NULL,*e2 = NULL; + + e1 = ((hash_t*)x1); + e2 = ((hash_t*)x2); + + return (e1->key < e2->key) ? -1 : 1; +} + + +int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed) +{ + hash_t *hash_tab = NULL; + int *sol = NULL; + int *nodes_id= NULL; + int i; + + nodes_id = topology->node_id[level]; + + hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N); + sol = (int*)MALLOC(sizeof(int)*N); + + init_genrand(seed); + + for( i = 0 ; i < N ; i++ ){ + hash_tab[i].val = nodes_id[i]; + hash_tab[i].key = genrand_int32(); + } + + qsort(hash_tab,N,sizeof(hash_t),hash_asc); + for( i = 0 ; i < N ; i++ ) + sol[i] = hash_tab[i].val; + + FREE(hash_tab); + return sol; +} + + +double eval_sol(int *sol,int N,double **comm, double **arch) +{ + double a,c,res; + int i,j; + + res = 0; + for ( i = 0 ; i < N ; i++ ) + for ( j = i+1 ; j < N ; j++ ){ + c = comm[i][j]; + a = arch[sol[i]][sol[j]]; + res += c/a; + } + + return res; +} + +void exchange(int *sol,int i,int j) +{ + int tmp; + tmp = sol[i]; + sol[i] = sol[j]; + sol[j] = tmp; +} + +double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch) +{ + double eval2; + if( l == m ) + return 0; + exchange(sol,l,m); + eval2 = eval_sol(sol,N,comm,arch); + exchange(sol,l,m); + + return eval1-eval2; +} + +void select_max(int *l,int *m,double **gain,int N,int *state) +{ + double max; + int i,j; + + max = -DBL_MAX; + + for( i = 0 ; i < N ; i++ ) + if(!state[i]) + for( j = 0 ; j < N ; j++ ) + if( (i != j) && (!state[j]) ){ + if(gain[i][j] > max){ + *l = i; + *m = j; + max=gain[i][j]; + } + } +} + + +void compute_gain(int *sol,int N,double **gain,double **comm, double **arch) +{ + double eval1; + int i,j; + + eval1 = eval_sol(sol,N,comm,arch); + for( i = 0 ; i < N ; i++ ) + for( j = 0 ; j <= i ; j++) + gain[i][j] = gain[j][i] = gain_exchange(sol,i,j,eval1,N,comm,arch); +} + + +/* Randomized Algorithm of +Hu Chen, Wenguang Chen, Jian Huang ,Bob Robert,and H.Kuhn. Mpipp: an automatic profile-guided +parallel process placement toolset for smp clusters and multiclusters. In +Gregory K. Egan and Yoichi Muraoka, editors, ICS, pages 353-360. ACM, 2006. + */ + +void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch) +{ + int *sol = NULL; + int *state = NULL; + double **gain = NULL; + int **history = NULL; + double *temp = NULL; + int i,j,t,l=0,m=0,seed=0; + double max,sum,best_eval,eval; + + gain = (double**)MALLOC(sizeof(double*)*N); + history = (int**)MALLOC(sizeof(int*)*N); + for( i = 0 ; i < N ; i++){ + gain[i] = (double*)MALLOC(sizeof(double)*N); + history[i] = (int*)MALLOC(sizeof(int)*3); + } + + state = (int*)MALLOC(sizeof(int)*N); + temp = (double*)MALLOC(sizeof(double)*N); + + sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++); + for( i = 0 ; i < N ; i++) + sigma[i] = sol[i]; + + best_eval = DBL_MAX; + while(seed <= nb_seed){ + do{ + for( i = 0 ; i < N ; i++ ){ + state[i] = 0; + /* printf("%d ",sol[i]); */ + } + /* printf("\n"); */ + compute_gain(sol,N,gain,comm,arch); + /* + display_tab(gain,N); + exit(-1); + */ + for( i = 0 ; i < N/2 ; i++ ){ + select_max(&l,&m,gain,N,state); + /* printf("%d: %d <=> %d : %f\n",i,l,m,gain[l][m]); */ + state[l] = 1; + state[m] = 1; + exchange(sol,l,m); + history[i][1] = l; + history[i][2] = m; + temp[i] = gain[l][m]; + compute_gain(sol,N,gain,comm,arch); + } + + t = -1; + max = 0; + sum = 0; + for(i = 0 ; i < N/2 ; i++ ){ + sum += temp[i]; + if( sum > max ){ + max = sum; + t = i; + } + } + /*for(j=0;j<=t;j++) + printf("exchanging: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */ + for( j = t+1 ; j < N/2 ; j++ ){ + exchange(sol,history[j][1],history[j][2]); + /* printf("Undoing: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */ + } + /* printf("max=%f\n",max); */ + + /*for(i=0;i 0 ); + FREE(sol); + sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++); + } + + + FREE(sol); + FREE(temp); + FREE(state); + for( i = 0 ; i < N ; i++){ + FREE(gain[i]); + FREE(history[i]); + } + FREE(gain); + FREE(history); +} diff --git a/ompi/mca/topo/treematch/treematch/tm_solution.h b/ompi/mca/topo/treematch/treematch/tm_solution.h new file mode 100644 index 0000000000..5ed62b7022 --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/tm_solution.h @@ -0,0 +1,26 @@ +#ifndef TM_SOLUION_H +#define TM_SOLUION_H + +#include "treematch.h" + +void tm_free_solution(tm_solution_t *sol); +int distance(tm_topology_t *topology,int i, int j); +double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma); + double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric); +double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, + tm_metric_t metric); +void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric); +int in_tab(int *tab, int n, int val); +void map_Packed(tm_topology_t *topology, int N, int *sigma); +void map_RR(tm_topology_t *topology, int N, int *sigma); +int hash_asc(const void* x1,const void* x2); +int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed); +double eval_sol(int *sol,int N,double **comm, double **arch); +void exchange(int *sol,int i,int j); +double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch); +void select_max(int *l,int *m,double **gain,int N,int *state); +void compute_gain(int *sol,int N,double **gain,double **comm, double **arch); +void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch); + + +#endif diff --git a/ompi/mca/topo/treematch/treematch/tm_thread_pool.c b/ompi/mca/topo/treematch/treematch/tm_thread_pool.c index ce649ce097..ef9ccbf68d 100644 --- a/ompi/mca/topo/treematch/treematch/tm_thread_pool.c +++ b/ompi/mca/topo/treematch/treematch/tm_thread_pool.c @@ -1,13 +1,18 @@ #include #include "tm_thread_pool.h" #include "tm_verbose.h" -#include "opal/mca/hwloc/hwloc-internal.h" +#include #include "tm_verbose.h" #include "tm_tree.h" #include +#include +typedef enum _mapping_policy {COMPACT, SCATTER} mapping_policy_t; + +static mapping_policy_t mapping_policy = COMPACT; static int verbose_level = ERROR; static thread_pool_t *pool = NULL; +static unsigned int max_nb_threads = INT_MAX; static thread_pool_t *get_thread_pool(void); static void execute_work(work_t *work); @@ -16,39 +21,21 @@ static void *thread_loop(void *arg); static void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work); static thread_pool_t *create_threads(void); -static void f1 (int nb_args, void **args); -static void f2 (int nb_args, void **args); +static void f1 (int nb_args, void **args, int thread_id); +static void f2 (int nb_args, void **args, int thread_id); static void destroy_work(work_t *work); +#define MIN(a, b) ((a)<(b)?(a):(b)) +#define MAX(a, b) ((a)>(b)?(a):(b)) -void f1 (int nb_args, void **args){ - int a, b; - a = *(int*)args[0]; - b = *(int*)args[1]; - printf("nb_args=%d, a=%d, b=%d\n",nb_args,a,b); + + +void tm_set_max_nb_threads(unsigned int val){ + max_nb_threads = val; } - -void f2 (int nb_args, void **args){ - int n, *tab; - int *res; - int i,j; - n = *(int*)args[0]; - tab = (int*)args[1]; - res=(int*)args[2]; - - for(j=0;j<1000000;j++){ - *res=0; - for (i=0;itask(work->nb_args, work->args); + work->task(work->nb_args, work->args, work->thread_id); } int bind_myself_to_core(hwloc_topology_t topology, int id){ @@ -57,10 +44,29 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){ char *str; int binding_res; int depth = hwloc_topology_get_depth(topology); + int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1); + int my_core; + int nb_threads = get_nb_threads(); /* printf("depth=%d\n",depth); */ + switch (mapping_policy){ + case SCATTER: + my_core = id*(nb_cores/nb_threads); + break; + default: + if(verbose_level>=WARNING){ + printf("Wrong scheduling policy. Using COMPACT\n"); + } + case COMPACT: + my_core = id%nb_cores; + } + + if(verbose_level>=INFO){ + printf("Mapping thread %d on core %d\n",id,my_core); + } + /* Get my core. */ - obj = hwloc_get_obj_by_depth(topology, depth-1, id); + obj = hwloc_get_obj_by_depth(topology, depth-1, my_core); if (obj) { /* Get a copy of its cpuset that we may modify. */ cpuset = hwloc_bitmap_dup(obj->cpuset); @@ -71,7 +77,7 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){ /*hwloc_bitmap_asprintf(&str, cpuset); - printf("Binding thread %d to cpuset %s\n", id,str); + printf("Binding thread %d to cpuset %s\n", my_core,str); FREE(str); */ @@ -81,8 +87,8 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){ int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); if(verbose_level>=WARNING) - fprintf(stderr,"%d Couldn't bind to cpuset %s: %s\n", id, str, strerror(error)); - FREE(str); + printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error)); + free(str); /* str is allocated by hlwoc, free it normally*/ return 0; } /* FREE our cpuset copy */ @@ -90,7 +96,7 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){ return 1; }else{ if(verbose_level>=WARNING) - fprintf(stderr,"No valid object for core id %d!\n",id); + printf("No valid object for core id %d!\n",my_core); return 0; } } @@ -161,6 +167,7 @@ void wait_work_completion(work_t *work){ int submit_work(work_t *work, int thread_id){ if( (thread_id>=0) && (thread_id< pool->nb_threads)){ + work->thread_id = thread_id; add_work(&pool->list_lock[thread_id], &pool->cond_var[thread_id], &pool->working_list[thread_id], work); return 1; } @@ -171,11 +178,11 @@ thread_pool_t *create_threads(){ hwloc_topology_t topology; int i; local_thread_t *local; - int nb_cores; + int nb_threads; + unsigned int nb_cores; int depth; - verbose_level = get_verbose_level(); - + verbose_level = tm_get_verbose_level(); /*Get number of cores: set 1 thread per core*/ /* Allocate and initialize topology object. */ @@ -187,7 +194,7 @@ thread_pool_t *create_threads(){ depth = hwloc_topology_get_depth(topology); if (depth == -1 ) { if(verbose_level>=CRITICAL) - fprintf(stderr,"Error: topology with unknown depth\n"); + fprintf(stderr,"Error: HWLOC unable to find the depth of the topology of this node!\n"); exit(-1); } @@ -195,19 +202,23 @@ thread_pool_t *create_threads(){ /* at depth 'depth' it is necessary a PU/core where we can execute things*/ nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1); + nb_threads = MIN(nb_cores, max_nb_threads); + + if(verbose_level>=INFO) + printf("nb_threads = %d\n",nb_threads); pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t)); pool -> topology = topology; - pool -> nb_threads = nb_cores; - pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_cores); - pool -> working_list = (work_t*)CALLOC(nb_cores,sizeof(work_t)); - pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_cores); - pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_cores); + pool -> nb_threads = nb_threads; + pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_threads); + pool -> working_list = (work_t*)CALLOC(nb_threads,sizeof(work_t)); + pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_threads); + pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_threads); - local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_cores); + local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_threads); pool->local = local; - for (i=0;iworking_list[i]; @@ -245,11 +256,12 @@ void terminate_thread_pool(){ for (id=0;idnb_threads;id++){ pthread_join(pool->thread_list[id],(void **) &ret); + FREE(ret); pthread_cond_destroy(pool->cond_var +id); pthread_mutex_destroy(pool->list_lock +id); if (pool->working_list[id].next != NULL) if(verbose_level >= WARNING) - fprintf(stderr,"Working list of thread %d not empty!\n",id); + printf("Working list of thread %d not empty!\n",id); } hwloc_topology_destroy(pool->topology); @@ -272,7 +284,7 @@ int get_nb_threads(){ } -work_t *create_work(int nb_args, void **args, void (*task) (int, void **)){ +work_t *create_work(int nb_args, void **args, void (*task) (int, void **, int)){ work_t *work; work = MALLOC(sizeof(work_t)); work -> nb_args = nb_args; @@ -293,6 +305,34 @@ void destroy_work(work_t *work){ FREE(work); } +/* CODE example 2 functions and test driver*/ + +void f1 (int nb_args, void **args, int thread_id){ + int a, b; + a = *(int*)args[0]; + b = *(int*)args[1]; + printf("id: %d, nb_args=%d, a=%d, b=%d\n",thread_id, nb_args,a,b); +} + + +void f2 (int nb_args, void **args, int thread_id){ + int n, *tab; + int *res; + int i,j; + n = *(int*)args[0]; + tab = (int*)args[1]; + res=(int*)args[2]; + + for(j=0;j<1000000;j++){ + *res=0; + for (i=0;i -#include "opal/mca/hwloc/hwloc-internal.h" +#include typedef struct _work_t{ int nb_args; - void (*task)(int nb_args, void **args); + void (*task)(int nb_args, void **args, int thread_id); void **args; struct _work_t *next; pthread_cond_t work_done; pthread_mutex_t mutex; int done; + int thread_id; }work_t; typedef struct { @@ -38,8 +39,10 @@ int get_nb_threads(void); int submit_work(work_t *work, int thread_id); void wait_work_completion(work_t *work); void terminate_thread_pool(void); -work_t *create_work(int nb_args, void **args, void (int, void **)); +work_t *create_work(int nb_args, void **args, void (int, void **, int)); int test_main(void); + + #endif /* THREAD_POOL_H */ diff --git a/ompi/mca/topo/treematch/treematch/tm_timings.c b/ompi/mca/topo/treematch/treematch/tm_timings.c index 8f00865eba..b20747370e 100644 --- a/ompi/mca/topo/treematch/treematch/tm_timings.c +++ b/ompi/mca/topo/treematch/treematch/tm_timings.c @@ -12,6 +12,7 @@ void get_time(void) CLOCK(time_tab[clock_num]); } + double time_diff(void) { CLOCK_T t2,t1; @@ -22,7 +23,7 @@ double time_diff(void) } if(clock_num < 0){ - return -1.0; + return -2.0; } CLOCK(t2); diff --git a/ompi/mca/topo/treematch/treematch/tm_timings.h b/ompi/mca/topo/treematch/treematch/tm_timings.h index 250ee5c145..377a1cd46e 100644 --- a/ompi/mca/topo/treematch/treematch/tm_timings.h +++ b/ompi/mca/topo/treematch/treematch/tm_timings.h @@ -1,4 +1,3 @@ - #ifndef TIMINGS_H #define TIMINGS_H #include diff --git a/ompi/mca/topo/treematch/treematch/tm_topology.c b/ompi/mca/topo/treematch/treematch/tm_topology.c new file mode 100644 index 0000000000..341194801a --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/tm_topology.c @@ -0,0 +1,842 @@ +#include +#include +#include "tm_tree.h" +#include "tm_mapping.h" +#include +#include "tm_verbose.h" +#include "tm_solution.h" + + +tm_topology_t* get_local_topo_with_hwloc(void); +tm_topology_t* hwloc_to_tm(char *filename); +int int_cmp_inc(const void* x1,const void* x2); +void optimize_arity(int **arity, double **cost, int *nb_levels,int n); +int symetric(hwloc_topology_t topology); +tm_topology_t * tgt_to_tm(char *filename); +void tm_display_arity(tm_topology_t *topology); +void tm_display_topology(tm_topology_t *topology); +void tm_free_topology(tm_topology_t *topology); +tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type); +void tm_optimize_topology(tm_topology_t **topology); +int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology); +int topo_nb_proc(hwloc_topology_t topology,int N); +void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels); +void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints); +void topology_cost_cpy(tm_topology_t *topology,double **cost); +void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes); +double ** topology_to_arch(hwloc_topology_t topology); +void build_synthetic_proc_id(tm_topology_t *topology); +tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes); + + +#define LINE_SIZE (1000000) + + +/* transform a tgt scotch file into a topology file*/ +tm_topology_t * tgt_to_tm(char *filename) +{ + tm_topology_t *topology = NULL; + FILE *pf = NULL; + char line[1024]; + char *s = NULL; + double *cost = NULL; + int i; + + + + pf = fopen(filename,"r"); + if(!pf){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Cannot open %s\n",filename); + exit(-1); + } + + if(tm_get_verbose_level() >= INFO) + printf("Reading TGT file: %s\n",filename); + + + fgets(line,1024,pf); + fclose(pf); + + s = strstr(line,"tleaf"); + if(!s){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename); + exit(-1); + } + + s += 5; + while(isspace(*s)) + s++; + + topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); + topology->nb_constraints = 0; + topology->oversub_fact = 1; + topology->constraints = NULL; + topology->nb_levels = atoi(strtok(s," "))+1; + topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels); + + cost = (double*)CALLOC(topology->nb_levels,sizeof(double)); + + for( i = 0 ; i < topology->nb_levels-1 ; i++ ){ + topology->arity[i] = atoi(strtok(NULL," ")); + cost[i] = atoi(strtok(NULL," ")); + } + + topology->arity[topology->nb_levels-1] = 0; + /* cost[topology->nb_levels-1]=0; */ + + /*aggregate costs*/ + for( i = topology->nb_levels-2 ; i >= 0 ; i-- ) + cost[i] += cost[i+1]; + + build_synthetic_proc_id(topology); + + if(tm_get_verbose_level() >= INFO) + printf("Topology built from %s!\n",filename); + + topology->cost=cost; + + + return topology; +} + +int topo_nb_proc(hwloc_topology_t topology,int N) +{ + hwloc_obj_t *objs = NULL; + int nb_proc; + + objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N); + objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL); + nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1); + FREE(objs); + return nb_proc; +} + + + +double link_cost(int depth) +{ + /* + Bertha values + double tab[5]={21,9,4.5,2.5,0.001}; + double tab[5]={1,1,1,1,1}; + double tab[6]={100000,10000,1000,500,100,10}; + */ + double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1}; + + return tab[depth]; + /* + return 10*log(depth+2); + return (depth+1); + return (long int)pow(100,depth); + */ +} + + +double ** topology_to_arch(hwloc_topology_t topology) +{ + int nb_proc,i,j; + hwloc_obj_t obj_proc1,obj_proc2,obj_res; + double **arch = NULL; + + nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); + arch = (double**)MALLOC(sizeof(double*)*nb_proc); + for( i = 0 ; i < nb_proc ; i++ ){ + obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); + arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc); + for( j = 0 ; j < nb_proc ; j++ ){ + obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j); + obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2); + /* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */ + arch[obj_proc1->os_index][obj_proc2->os_index]=link_cost(obj_res->depth+1); + } + } + return arch; +} + +int symetric(hwloc_topology_t topology) +{ + int depth,i,topodepth = hwloc_topology_get_depth(topology); + unsigned int arity; + hwloc_obj_t obj; + for ( depth = 0; depth < topodepth-1 ; depth++ ) { + int N = hwloc_get_nbobjs_by_depth(topology, depth); + obj = hwloc_get_next_obj_by_depth (topology,depth,NULL); + arity = obj->arity; + + /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */ + for (i = 1; i < N; i++ ){ + obj = hwloc_get_next_obj_by_depth (topology,depth,obj); + if( obj->arity != arity){ + /* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */ + return 0; + } + } + } + return 1; +} + +tm_topology_t* hwloc_to_tm(char *filename) +{ + hwloc_topology_t topology; + tm_topology_t *res = NULL; + hwloc_obj_t *objs = NULL; + unsigned topodepth,depth; + unsigned int nb_nodes; + double *cost; + int err, l; + unsigned int i; + int vl = tm_get_verbose_level(); + + /* Build the topology */ + hwloc_topology_init(&topology); + err = hwloc_topology_set_xml(topology,filename); + if(err == -1){ + if(vl >= CRITICAL) + fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename); + exit(-1); + } + + hwloc_topology_ignore_all_keep_structure(topology); + hwloc_topology_load(topology); + + + /* Test if symetric */ + if(!symetric(topology)){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"%s not symetric!\n",filename); + exit(-1); + } + + /* work on depth */ + topodepth = hwloc_topology_get_depth(topology); + + res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); + res->oversub_fact = 1; + res->nb_constraints = 0; + res->constraints = NULL; + res->nb_levels = topodepth; + res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); + res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels); + res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels); + res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); + + if(vl >= INFO) + printf("topodepth = %d\n",topodepth); + + /* Build TreeMatch topology */ + for( depth = 0 ; depth < topodepth ; depth++ ){ + nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); + res->nb_nodes[depth] = nb_nodes; + res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); + res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); + + objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); + objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); + hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); + res->arity[depth] = objs[0]->arity; + + if (depth == topodepth -1){ + res->nb_constraints = nb_nodes; + res->nb_proc_units = nb_nodes; + } + + if(vl >= DEBUG) + printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]); + + /* Build process id tab */ + for (i = 0; i < nb_nodes; i++){ + if(objs[i]->os_index > nb_nodes){ + if(vl >= CRITICAL){ + fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n", + i, depth, objs[i]->os_index, nb_nodes); + } + exit(-1); + } + + res->node_id[depth][i] = objs[i]->os_index; + res->node_rank[depth][objs[i]->os_index] = i; + /* if(depth==topodepth-1) */ + } + FREE(objs); + + + } + + cost = (double*)CALLOC(res->nb_levels,sizeof(double)); + for(l=0; lnb_levels; l++){ + cost[l] = link_cost(l); + } + res->cost = cost; + + + /* Destroy topology object. */ + hwloc_topology_destroy(topology); + if(tm_get_verbose_level() >= INFO) + printf("\n"); + + + + return res; +} + +tm_topology_t* get_local_topo_with_hwloc(void) +{ + hwloc_topology_t topology; + tm_topology_t *res = NULL; + hwloc_obj_t *objs = NULL; + unsigned topodepth,depth; + int nb_nodes,i; + + /* Build the topology */ + hwloc_topology_init(&topology); + hwloc_topology_ignore_all_keep_structure(topology); + hwloc_topology_load(topology); + + /* Test if symetric */ + if(!symetric(topology)){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Local toplogy not symetric!\n"); + exit(-1); + } + + /* work on depth */ + topodepth = hwloc_topology_get_depth(topology); + + res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); + res->nb_constraints = 0; + res->constraints = NULL; + res->nb_levels = topodepth; + res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); + res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels); + res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels); + res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); + + /* Build TreeMatch topology */ + for( depth = 0 ; depth < topodepth ; depth++ ){ + nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); + res->nb_nodes[depth] = nb_nodes; + res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); + res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); + + objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); + objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); + hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); + res->arity[depth] = objs[0]->arity; + + if (depth == topodepth -1){ + res->nb_constraints = nb_nodes; + res->nb_proc_units = nb_nodes; + } + /* printf("%d:",res->arity[depth]); */ + + /* Build process id tab */ + for (i = 0; i < nb_nodes; i++){ + res->node_id[depth][i] = objs[i]->os_index; + res->node_rank[depth][objs[i]->os_index] = i; + /* if(depth==topodepth-1) */ + } + FREE(objs); + } + + + + /* Destroy HWLOC topology object. */ + hwloc_topology_destroy(topology); + + /* printf("\n"); */ + return res; +} + + +void tm_free_topology(tm_topology_t *topology) +{ + int i; + for( i = 0 ; i < topology->nb_levels ; i++ ){ + FREE(topology->node_id[i]); + FREE(topology->node_rank[i]); + } + + FREE(topology->constraints); + FREE(topology->node_id); + FREE(topology->node_rank); + FREE(topology->nb_nodes); + FREE(topology->arity); + FREE(topology->cost); + FREE(topology); +} + +tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type){ + switch(arch_file_type){ + case TM_FILE_TYPE_TGT: + return tgt_to_tm(arch_filename); + case TM_FILE_TYPE_XML: + return hwloc_to_tm(arch_filename); + default: + if(tm_get_verbose_level() >= ERROR){ + fprintf(stderr,"Error loading topology. Filetype %d unknown\n", arch_file_type); + } + exit(-1); + } +} + + +void tm_display_topology(tm_topology_t *topology) +{ + int i; + unsigned int j; + unsigned long id; + for( i = 0 ; i < topology->nb_levels ; i++ ){ + printf("%d: ",i); + for( j = 0 ; j < topology->nb_nodes[i] ; j++) + printf("%d ",topology->node_id[i][j]); + printf("\n"); + } + + printf("Last level: "); + for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++) + printf("%d ",topology->node_rank[topology->nb_levels-1][id]); + printf("\n"); + + + if(topology->constraints){ + printf("Constraints: "); + for(i = 0; i < topology->nb_constraints; i++) + printf("%d ",topology->constraints[i]); + printf("\n"); + } + + printf("\tnb_levels=%d\n\tnb_constraints=%d\n\toversub_fact=%d\n\tnb proc units=%d\n\n", + topology->nb_levels, topology->nb_constraints, topology->oversub_fact, topology->nb_proc_units); + +} + + +void tm_display_arity(tm_topology_t *topology){ + int depth; + for(depth=0; depth < topology->nb_levels; depth++) + printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]); + + printf("\n"); +} + +int int_cmp_inc(const void* x1,const void* x2) +{ + return *((int *)x1) < *((int *)x2) ? -1 : 1; +} + + +int topo_check_constraints(tm_topology_t *topology){ + int n = topology->nb_constraints; + int i; + int depth = topology->nb_levels-1; + for (i=0;inode_id[depth], topology->nb_nodes[depth], topology->constraints[i])){ + if(tm_get_verbose_level() >= CRITICAL){ + fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]); + } + return 0; + } + } + return 1; +} + + + + +/* cpy flag tells if we need to copy the array. + Set to 1 when called from the application level and 0 when called from inside the library*/ +int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){ + + topology -> nb_constraints = nb_constraints; + if(cpy_flag){ + topology -> constraints = (int*)MALLOC(nb_constraints*sizeof(int)); + memcpy(topology -> constraints, constraints, nb_constraints*sizeof(int)); + }else{ + topology -> constraints = constraints; + } + + return topo_check_constraints(topology); +} + +int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology){ + return tm_topology_set_binding_constraints_cpy(constraints, nb_constraints, topology, 1); +} + +int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology) +{ + int *tab = NULL; + FILE *pf = NULL; + char line[LINE_SIZE],*l = NULL; + char *ptr = NULL; + int i,n; + unsigned int vl = tm_get_verbose_level(); + + + if (!(pf = fopen(constraints_filename,"r"))) { + if(vl >= CRITICAL) + fprintf(stderr,"Cannot open %s\n",constraints_filename); + exit(-1); + } + + /* compute the size of the array to store the constraints*/ + n = 0; + fgets(line, LINE_SIZE, pf); + l = line; + while((ptr=strtok(l," \t"))){ + l = NULL; + if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)) + n++; + } + + tab = (int*)MALLOC(n*sizeof(int)); + + rewind(pf); + fgets(line, LINE_SIZE, pf); + fclose(pf); + l = line; + i = 0; + while((ptr=strtok(l," \t"))){ + l = NULL; + if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)){ + if(i < n) + tab[i] = atoi(ptr); + else{ + if(vl >= CRITICAL) + fprintf(stderr, "More than %d entries in %s\n", n, constraints_filename); + exit(-1); + } + i++; + } + } + + if( i != n ){ + if(vl >= CRITICAL) + fprintf(stderr, "Read %d entries while expecting %d ones\n", i, n); + exit(-1); + } + + qsort(tab,n,sizeof(int),int_cmp_inc); + + return tm_topology_set_binding_constraints_cpy(tab, n, topology, 0); +} + + +void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes) +{ + int nb_levels; + unsigned int vl = tm_get_verbose_level(); + + nb_levels = topology->nb_levels; + *nb_nodes = topology->nb_nodes[nb_levels-1]; + if(vl >= INFO) + printf("nb_nodes=%d\n",*nb_nodes); + *numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes)); + memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes)); +} + +void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels) +{ + *nb_levels = topology->nb_levels; + *arity = (int*)MALLOC(sizeof(int)*(*nb_levels)); + memcpy(*arity,topology->arity,sizeof(int)*(*nb_levels)); +} + +void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints) +{ + *nb_constraints = topology->nb_constraints; + if(topology->constraints){ + *constraints = (int*)MALLOC(sizeof(int)*(*nb_constraints)); + memcpy(*constraints,topology->constraints,sizeof(int)*(*nb_constraints)); + }else{ + *constraints = NULL; + } +} + +void topology_cost_cpy(tm_topology_t *topology,double **cost) +{ + *cost = (double*)MALLOC(sizeof(double)*(topology->nb_levels)); + memcpy(*cost,topology->cost,sizeof(double)*(topology->nb_levels)); +} + +void optimize_arity(int **arity, double **cost, int *nb_levels,int n) +{ + int a,i; + int *new_arity = NULL; + double *new_cost = NULL; + + if( n < 0 ) + return; + /* printf("n=%d\tnb_levels=%d\n",n,*nb_levels); */ + /* for(i=0;i<*nb_levels;i++) */ + /* printf("%d:",(*arity)[i]); */ + /* printf("\n"); */ + /* if(n==(*nb_levels)-3) */ + /* exit(-1); */ + a = (*arity)[n]; + if( (a%3 == 0) && (a > 3) ){ + /* + check if the arity of level n devides 3 + If this is the case: + Add a level + */ + (*nb_levels)++; + /* Build a new arity and cost arrays */ + new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels)); + new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels)); + /* Copy the begining if the old arrays */ + for( i = 0 ; i < n ; i++){ + new_arity[i] = (*arity)[i]; + new_cost[i] = (*cost)[i]; + } + /* set the nth level to arity 3 */ + new_arity[n] = 3; + /* copy the cost to this level*/ + new_cost[n] = (*cost)[n];; + /* printf("a=%d\n",a); */ + /* Set the (n+1) level to arity a/3 */ + new_arity[n+1] = a/3; + /*Dupliacte the cost as it is the same level originally*/ + new_cost[n+1] = (*cost)[n]; + /* Copy the end of the arrays */ + for( i = n+2 ; i < *nb_levels ; i++){ + new_arity[i] = (*arity)[i-1]; + new_cost[i] = (*cost)[i-1]; + } + FREE(*arity); + FREE(*cost); + /* if a/3 =3 then go to the next level */ + if(new_arity[n+1] == 3) + optimize_arity(&new_arity,&new_cost,nb_levels,n); + else /* continue to this level (remember we just add a new level */ + optimize_arity(&new_arity,&new_cost,nb_levels,n+1); + *arity=new_arity; + *cost=new_cost; + }else if( (a%2==0) && (a>2) ){/* same as above but for arity == 2 instead of 3 */ + (*nb_levels)++; + new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels)); + new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels)); + for( i = 0 ; i < n ; i++ ){ + new_arity[i] = (*arity)[i]; + new_cost[i] = (*cost)[i]; + } + new_arity[n] = 2; + new_cost[n] = (*cost)[n];; + /* printf("a=%d\n",a); */ + new_arity[n+1] = a/2; + new_cost[n+1] = (*cost)[n]; + for( i = n+2 ; i < *nb_levels ; i++ ){ + new_arity[i] = (*arity)[i-1]; + new_cost[i] = (*cost)[i-1]; + } + FREE(*arity); + FREE(*cost); + if(new_arity[n+1] == 2) + optimize_arity(&new_arity, &new_cost, nb_levels, n); + else + optimize_arity(&new_arity, &new_cost, nb_levels, n+1); + *arity = new_arity; + *cost= new_cost; + }else /* if nothing works go to next level. */ + optimize_arity(arity, cost, nb_levels,n-1); +} + + + + +void tm_optimize_topology(tm_topology_t **topology){ + int *arity = NULL,nb_levels; + int *numbering = NULL,nb_nodes; + tm_topology_t *new_topo; + double *cost; + unsigned int vl = tm_get_verbose_level(); + int *constraints = NULL, nb_constraints; + int i; + + if(vl >= DEBUG) + tm_display_arity(*topology); + + topology_arity_cpy(*topology,&arity,&nb_levels); + topology_numbering_cpy(*topology,&numbering,&nb_nodes); + topology_constraints_cpy(*topology,&constraints,&nb_constraints); + topology_cost_cpy(*topology,&cost); + + + optimize_arity(&arity,&cost,&nb_levels,nb_levels-2); + new_topo = tm_build_synthetic_topology(arity, NULL, nb_levels,numbering,nb_nodes); + new_topo->cost = cost; + new_topo->constraints = constraints; + new_topo->nb_constraints = nb_constraints; + new_topo->nb_proc_units = (*topology)->nb_proc_units; + new_topo->oversub_fact = (*topology)->oversub_fact; + + + + if(vl >= DEBUG){ + if(constraints){ + printf("Constraints: "); + for(i=0;inb_constraints = 0; + topology->oversub_fact = 1; + topology->constraints = NULL; + topology->nb_levels = nb_levels; + topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels); + topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels); + topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels); + topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels); + if(cost) + topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double)); + else + topology->cost = NULL; + + memcpy(topology->arity, arity, sizeof(int)*nb_levels); + if(cost) + memcpy(topology->cost, cost, sizeof(double)*nb_levels); + + n = 1; + for( i = 0 ; i < topology->nb_levels ; i++ ){ + topology->nb_nodes[i] = n; + topology->node_id[i] = (int*)MALLOC(sizeof(int)*n); + topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n); + if( i < topology->nb_levels-1){ + for( j = 0 ; j < n ; j++ ){ + topology->node_id[i][j] = j; + topology->node_rank[i][j]=j; + } + }else{ + for( j = 0 ; j < n ; j++ ){ + int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes); + topology->node_id[i][j] = id; + topology->node_rank[i][id] = j; + } + } + + + if (i == topology->nb_levels-1){ + topology->nb_constraints = n; + topology->nb_proc_units = n; + } + + n *= topology->arity[i]; + } + if(cost){ + /*aggregate costs*/ + for( i = topology->nb_levels-2 ; i >= 0 ; i-- ) + topology->cost[i] += topology->cost[i+1]; + } + + return topology; +} + + +void build_synthetic_proc_id(tm_topology_t *topology) +{ + int i; + size_t j,n = 1; + + topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels); + topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels); + topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels); + + for( i = 0 ; i < topology->nb_levels ; i++ ){ + /* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */ + topology->nb_nodes[i] = n; + topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n); + topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n); + if ( !topology->node_id[i] ){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n); + exit(-1); + } + + if (i == topology->nb_levels-1){ + topology->nb_constraints = n; + topology->nb_proc_units = n; + } + + + + for( j = 0 ; j < n ; j++ ){ + topology->node_id[i][j] = j; + topology->node_rank[i][j] = j; + } + n *= topology->arity[i]; + } + +} + + + +void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){ +{ + int i,j,n; + + if(oversub_fact <=1) + return; + + topology -> nb_levels ++; + topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels); + topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels); + topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels); + topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels); + topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels); + topology -> oversub_fact = oversub_fact; + + i = topology->nb_levels - 1; + n = topology->nb_nodes[i-1] * oversub_fact; + topology->arity[i-1] = oversub_fact; + topology->cost[i-1] = 0; + topology->node_id[i] = (int*)MALLOC(sizeof(int)*n); + topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n); + topology->nb_nodes[i] = n; + + for( j = 0 ; j < n ; j++ ){ + int id = topology->node_id[i-1][j/oversub_fact]; + topology->node_id[i][j] = id; + topology->node_rank[i][id] = j; + } + } + +} diff --git a/ompi/mca/topo/treematch/treematch/tm_topology.h b/ompi/mca/topo/treematch/treematch/tm_topology.h new file mode 100644 index 0000000000..1cd0c5b417 --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/tm_topology.h @@ -0,0 +1,22 @@ +#include +#include "tm_tree.h" + +tm_topology_t* get_local_topo_with_hwloc(void); +tm_topology_t* hwloc_to_tm(char *filename); +int int_cmp_inc(const void* x1,const void* x2); +void optimize_arity(int **arity, double **cost, int *nb_levels,int n); +int symetric(hwloc_topology_t topology); +tm_topology_t * tgt_to_tm(char *filename); +void tm_display_arity(tm_topology_t *topology); +void tm_display_topology(tm_topology_t *topology); +void tm_free_topology(tm_topology_t *topology); +tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type); +void tm_optimize_topology(tm_topology_t **topology); +int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology); +int topo_nb_proc(hwloc_topology_t topology,int N); +void topology_arity(tm_topology_t *topology,int **arity,int *nb_levels); +void topology_constraints(tm_topology_t *topology,int **constraints,int *nb_constraints); +void topology_cost(tm_topology_t *topology,double **cost); +void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes); +double ** topology_to_arch(hwloc_topology_t topology); + diff --git a/ompi/mca/topo/treematch/treematch/tm_tree.c b/ompi/mca/topo/treematch/treematch/tm_tree.c index 1644b26779..e3c5e29385 100644 --- a/ompi/mca/topo/treematch/treematch/tm_tree.c +++ b/ompi/mca/topo/treematch/treematch/tm_tree.c @@ -3,21 +3,19 @@ #include #include #include +#include + #include "tm_tree.h" +#include "tm_mapping.h" #include "tm_timings.h" #include "tm_bucket.h" #include "tm_kpartitioning.h" -#include "tm_mapping.h" #include "tm_verbose.h" #include "tm_thread_pool.h" -#if !defined(MIN) -#define MIN(a,b) ((a)<(b)?(a):(b)) -#endif -#if !defined(MAX) -#define MAX(a,b) ((a)>(b)?(a):(b)) -#endif +#define MIN(a, b) ((a)<(b)?(a):(b)) +#define MAX(a, b) ((a)>(b)?(a):(b)) #ifndef __CHARMC__ #define __CHARMC__ 0 @@ -26,151 +24,153 @@ #if __CHARMC__ #include "converse.h" #else -static int ilog2(int val) -{ - int i = 0; - for( ; val != 0; val >>= 1, i++ ); - return i; -} -#define CmiLog2(VAL) ilog2((int)(VAL)) +#define CmiLog2(VAL) log2((double)(VAL)) #endif static int verbose_level = ERROR; +static int exhaustive_search_flag = 0; - -void FREE_list_child(tree_t *); -void FREE_tab_child(tree_t *); -unsigned long int choose (long,long); -void display_node(tree_t *); -void clone_tree(tree_t *,tree_t *); -double *aggregate_obj_weight(tree_t *,double *,int); -affinity_mat_t *aggregate_com_mat(tree_t *,affinity_mat_t *,int); -double eval_grouping(affinity_mat_t *,tree_t **,int); -group_list_t *new_group_list(tree_t **,double,group_list_t *); -void add_to_list(group_list_t *,tree_t **,int,double); -void list_all_possible_groups(affinity_mat_t *,tree_t *,int,int,int,tree_t **,group_list_t *); -int independent_groups(group_list_t **,int,group_list_t *,int); -void display_selection (group_list_t**,int,int,double); -void display_grouping (tree_t *,int,int,double); -int recurs_select_independent_groups(group_list_t **,int,int,int,int, - int,double,double *,group_list_t **,group_list_t **); -int test_independent_groups(group_list_t **,int,int,int,int,int,double,double *, - group_list_t **,group_list_t **); +void free_list_child(tm_tree_t *);void free_tab_child(tm_tree_t *); +double choose (long, long);void display_node(tm_tree_t *); +void clone_tree(tm_tree_t *, tm_tree_t *); +double *aggregate_obj_weight(tm_tree_t *, double *, int); +tm_affinity_mat_t *aggregate_com_mat(tm_tree_t *, tm_affinity_mat_t *, int); +double eval_grouping(tm_affinity_mat_t *, tm_tree_t **, int); +group_list_t *new_group_list(tm_tree_t **, double, group_list_t *); +void add_to_list(group_list_t *, tm_tree_t **, int, double); +void list_all_possible_groups(tm_affinity_mat_t *, tm_tree_t *, int, int, int, tm_tree_t **, group_list_t *); +int independent_groups(group_list_t **, int, group_list_t *, int); +void display_selection (group_list_t**, int, int, double); +void display_grouping (tm_tree_t *, int, int, double); +int recurs_select_independent_groups(group_list_t **, int, int, int, int, + int, double, double *, group_list_t **, group_list_t **); +int test_independent_groups(group_list_t **, int, int, int, int, int, double, double *, + group_list_t **, group_list_t **); void delete_group_list(group_list_t *); -int group_list_id(const void*,const void*); -int group_list_asc(const void*,const void*); -int group_list_dsc(const void*,const void*); -int weighted_degree_asc(const void*,const void*); -int weighted_degree_dsc(const void*,const void*); -int select_independent_groups(group_list_t **,int,int,int,double *,group_list_t **,int,double); -int select_independent_groups_by_largest_index(group_list_t **,int,int,int,double *, - group_list_t **,int,double); -void list_to_tab(group_list_t *,group_list_t **,int); -void display_tab_group(group_list_t **,int,int); -int independent_tab(tree_t **,tree_t **,int); -void compute_weighted_degree(group_list_t **,int,int); -void group(affinity_mat_t *,tree_t *,tree_t *,int,int,int,double *,tree_t **); -void fast_group(affinity_mat_t *,tree_t *,tree_t *,int,int,int,double *,tree_t **, int *, int); -int adjacency_asc(const void*,const void*); -int adjacency_dsc(const void*,const void*); - void super_fast_grouping(affinity_mat_t *,tree_t *,tree_t *,int, int); -affinity_mat_t *build_cost_matrix(affinity_mat_t *,double *,double); -void group_nodes(affinity_mat_t *,tree_t *,tree_t *,int ,int,double*,double); -void fast_grouping(affinity_mat_t *,tree_t *,tree_t *,int,int,long int); -void complete_aff_mat(affinity_mat_t **,int,int); -void complete_obj_weight(double **,int,int); -void create_dumb_tree(tree_t *,int,tm_topology_t *); -void complete_tab_node(tree_t **,int,int,int,tm_topology_t *); -void set_deb_tab_child(tree_t *,tree_t *,int); -tree_t *build_level_topology(tree_t *,affinity_mat_t *,int,int,tm_topology_t *,double *,double *); -int check_constraints(tm_topology_t *,int **); -tree_t *bottom_up_build_tree_from_topology(tm_topology_t *,double **, int ,double *,double *); -void FREE_non_constraint_tree(tree_t *); -void FREE_constraint_tree(tree_t *); -void FREE_tab_double(double**,int); -void FREE_tab_int(int**,int ); -void partial_aggregate_com_mat (int, void **); -affinity_mat_t *new_affinity_mat(double **, double *, int); -void partial_aggregate_aff_mat (int, void **); -affinity_mat_t *aggregate_aff_mat(tree_t *, affinity_mat_t *, int); -affinity_mat_t * build_affinity_mat(double **, int); +int group_list_id(const void*, const void*); +int group_list_asc(const void*, const void*); +int group_list_dsc(const void*, const void*); +int weighted_degree_asc(const void*, const void*); +int weighted_degree_dsc(const void*, const void*); +int select_independent_groups(group_list_t **, int, int, int, double *, group_list_t **, int, double); +int select_independent_groups_by_largest_index(group_list_t **, int, int, int, double *, + group_list_t **, int, double); +void list_to_tab(group_list_t *, group_list_t **, int); +void display_tab_group(group_list_t **, int, int); +int independent_tab(tm_tree_t **, tm_tree_t **, int); +void compute_weighted_degree(group_list_t **, int, int); +void group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **); +void fast_group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **, int *, int); +int adjacency_asc(const void*, const void*); +int adjacency_dsc(const void*, const void*); + void super_fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int); +tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *, double *, double); +void group_nodes(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int , int, double*, double); +double fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, double); +void complete_aff_mat(tm_affinity_mat_t **, int, int); +void complete_obj_weight(double **, int, int); +void create_dumb_tree(tm_tree_t *, int, tm_topology_t *); +void complete_tab_node(tm_tree_t **, int, int, int, tm_topology_t *); +void set_deb_tab_child(tm_tree_t *, tm_tree_t *, int); +tm_tree_t *build_level_topology(tm_tree_t *, tm_affinity_mat_t *, int, int, tm_topology_t *, double *, double *); +int check_constraints(tm_topology_t *, int **); +tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *, tm_affinity_mat_t *, double *, double *); +void free_non_constraint_tree(tm_tree_t *); +void free_constraint_tree(tm_tree_t *); +void free_tab_double(double**, int); +void free_tab_int(int**, int ); +void partial_aggregate_aff_mat (int, void **, int); +void free_affinity_mat(tm_affinity_mat_t *aff_mat); +int int_cmp_inc(const void* x1, const void* x2); -affinity_mat_t *new_affinity_mat(double **mat, double *sum_row, int order){ - affinity_mat_t *res = (affinity_mat_t *) MALLOC (sizeof(affinity_mat_t)); - res -> mat = mat; - res -> sum_row = sum_row; - res -> order = order; - return res; + + +void tm_set_exhaustive_search_flag(int new_val){ + exhaustive_search_flag = new_val; } -void FREE_list_child(tree_t *tree) -{ - int i; +int tm_get_exhaustive_search_flag(){ + return exhaustive_search_flag; +} - if(NULL == tree) return; + +void free_affinity_mat(tm_affinity_mat_t *aff_mat){ + free_tab_double(aff_mat->mat, aff_mat->order); + FREE(aff_mat->sum_row); + FREE(aff_mat); +} + + + +void free_list_child(tm_tree_t *tree) +{ + int i; + + if(tree){ for(i=0;iarity;i++) - FREE_list_child(tree->child[i]); + free_list_child(tree->child[i]); FREE(tree->child); if(tree->dumb) - FREE(tree); + FREE(tree); + } } - -void FREE_tab_child(tree_t *tree) +void free_tab_child(tm_tree_t *tree) { if(tree){ - FREE_tab_child(tree->tab_child); + free_tab_child(tree->tab_child); FREE(tree->tab_child); } } -void FREE_non_constraint_tree(tree_t *tree) +void free_non_constraint_tree(tm_tree_t *tree) { - int free_tree = tree->dumb; - FREE_tab_child(tree); - FREE_list_child(tree); - if(free_tree) + int d = tree->dumb; + + free_list_child(tree); + free_tab_child(tree); + if(!d) FREE(tree); } -void FREE_constraint_tree(tree_t *tree) +void free_constraint_tree(tm_tree_t *tree) { int i; if(tree){ for(i=0;iarity;i++) - FREE_constraint_tree(tree->child[i]); + free_constraint_tree(tree->child[i]); FREE(tree->child); FREE(tree); } } -void FREE_tree(tree_t *tree) +void tm_free_tree(tm_tree_t *tree) { if(tree->constraint) - FREE_constraint_tree(tree); + free_constraint_tree(tree); else - FREE_non_constraint_tree(tree); + free_non_constraint_tree(tree); } -unsigned long int choose (long n,long k) +double choose (long n, long k) { /* compute C_n_k */ double res = 1; int i; - for( i = 0 ; i < k ; i++ ) - res *= (double)(n-i)/(double)(k-i); - - return (unsigned long int)res; + for( i = 0 ; i < k ; i++ ){ + res *= ((double)(n-i)/(double)(k-i)); + } + return res; } -void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent, - int id,double val,tree_t *tab_child,int depth) +void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent, + int id, double val, tm_tree_t *tab_child, int depth) { static int uniq = 0; node->child = child; @@ -184,14 +184,14 @@ void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent, node->dumb = 0; } -void display_node(tree_t *node) +void display_node(tm_tree_t *node) { if (verbose_level >= DEBUG) printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n", - (void *)(node->child),node->arity,(void *)(node->parent),node->id,node->val,node->uniq); + (void *)(node->child), node->arity, (void *)(node->parent), node->id, node->val, node->uniq); } -void clone_tree(tree_t *new,tree_t *old) +void clone_tree(tm_tree_t *new, tm_tree_t *old) { int i; new->child = old->child; @@ -208,9 +208,9 @@ void clone_tree(tree_t *new,tree_t *old) } -double *aggregate_obj_weight(tree_t *new_tab_node, double *tab, int M) +double *aggregate_obj_weight(tm_tree_t *new_tab_node, double *tab, int M) { - int i,i1,id1; + int i, i1, id1; double *res = NULL; if(!tab) @@ -230,26 +230,26 @@ double *aggregate_obj_weight(tree_t *new_tab_node, double *tab, int M) -void partial_aggregate_aff_mat (int nb_args, void **args){ +void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){ int inf = *(int*)args[0]; int sup = *(int*)args[1]; double **old_mat = (double**)args[2]; - tree_t *tab_node = (tree_t*)args[3]; + tm_tree_t *tab_node = (tm_tree_t*)args[3]; int M = *(int*)args[4]; double **mat = (double**)args[5]; double *sum_row = (double*)args[6]; - int i,j,i1,j1; + int i, j, i1, j1; int id1, id2; - if(nb_args != 6){ + if(nb_args != 7){ if(verbose_level >= ERROR) - fprintf(stderr,"Wrong number of args in %s: %d\n",__func__, nb_args); + fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __FUNCTION__, nb_args); exit(-1); } if(verbose_level >= INFO) - printf("Aggregate in parallel (%d-%d)\n",inf,sup-1); + printf("Aggregate in parallel (%d-%d)\n", inf, sup-1); for( i = inf ; i < sup ; i++ ) for( j = 0 ; j < M ; j++ ){ @@ -259,7 +259,7 @@ void partial_aggregate_aff_mat (int nb_args, void **args){ for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){ id2 = tab_node[j].child[j1]->id; mat[i][j] += old_mat[id1][id2]; - /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n",i,j,id1,id2,old_mat[id1][id2]);*/ + /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/ } sum_row[i] += mat[i][j]; } @@ -268,17 +268,17 @@ void partial_aggregate_aff_mat (int nb_args, void **args){ } -affinity_mat_t *aggregate_aff_mat(tree_t *tab_node, affinity_mat_t *aff_mat, int M) +tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M) { - int i,j,i1,j1,id1,id2; + int i, j, i1, j1, id1, id2; double **new_mat = NULL, **old_mat = aff_mat->mat; double *sum_row = NULL; new_mat = (double**)MALLOC(M*sizeof(double*)); for( i = 0 ; i < M ; i++ ) - new_mat[i] = (double*)CALLOC((M),sizeof(double)); + new_mat[i] = (double*)CALLOC((M), sizeof(double)); - sum_row = (double*)CALLOC(M,sizeof(double)); + sum_row = (double*)CALLOC(M, sizeof(double)); if(M>512){ /* perform this part in parallel*/ int id; @@ -287,7 +287,7 @@ affinity_mat_t *aggregate_aff_mat(tree_t *tab_node, affinity_mat_t *aff_mat, int int *inf; int *sup; - nb_threads = MIN(M/512,get_nb_threads()); + nb_threads = MIN(M/512, get_nb_threads()); works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads); inf = (int*)MALLOC(sizeof(int)*nb_threads); sup = (int*)MALLOC(sizeof(int)*nb_threads); @@ -304,9 +304,9 @@ affinity_mat_t *aggregate_aff_mat(tree_t *tab_node, affinity_mat_t *aff_mat, int args[5]=(void*)new_mat; args[6]=(void*)sum_row; - works[id]= create_work(7,args,partial_aggregate_aff_mat); + works[id]= create_work(7, args, partial_aggregate_aff_mat); if(verbose_level >= DEBUG) - printf("Executing %p\n",(void *)works[id]); + printf("Executing %p\n", (void *)works[id]); submit_work( works[id], id); } @@ -330,60 +330,66 @@ affinity_mat_t *aggregate_aff_mat(tree_t *tab_node, affinity_mat_t *aff_mat, int for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){ id2 = tab_node[j].child[j1]->id; new_mat[i][j] += old_mat[id1][id2]; - /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n",i,j,id1,id2,old_mat[id1][id2]);*/ + /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/ } sum_row[i] += new_mat[i][j]; } } } } - return new_affinity_mat(new_mat,sum_row,M); + return new_affinity_mat(new_mat, sum_row, M); } -void FREE_tab_double(double**tab,int N) +void free_tab_double(double**tab, int mat_order) { int i; - for( i = 0 ; i < N ; i++ ) + for( i = 0 ; i < mat_order ; i++ ) FREE(tab[i]); FREE(tab); } -void FREE_tab_int(int**tab,int N) +void free_tab_int(int**tab, int mat_order) { int i; - for( i = 0 ; i < N ; i++ ) + for( i = 0 ; i < mat_order ; i++ ) FREE(tab[i]); FREE(tab); } -void display_tab(double **tab,int N) +void display_tab(double **tab, int mat_order) { - int i,j; - double line,total = 0; + int i, j; + double line, total = 0; + int vl = tm_get_verbose_level(); - - for( i = 0 ; i < N ; i++ ){ + for( i = 0 ; i < mat_order ; i++ ){ line = 0; - for( j = 0 ; j < N ; j++ ){ - printf("%g ",tab[i][j]); + for( j = 0 ; j < mat_order ; j++ ){ + if(vl >= WARNING) + printf("%g ", tab[i][j]); + else + fprintf(stderr, "%g ", tab[i][j]); line += tab[i][j]; } total += line; - /* printf(": %g",line);*/ - printf("\n"); + /* printf(": %g", line);*/ + if(vl >= WARNING) + printf("\n"); + else + fprintf(stderr, "\n"); } - /* printf("Total: %.2f\n",total);*/ + /* printf("Total: %.2f\n", total);*/ } -double eval_grouping(affinity_mat_t *aff_mat,tree_t **cur_group,int arity) +double eval_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t **cur_group, int arity) { double res = 0; - int i,j,id,id1,id2; + int i, j, id, id1, id2; double **mat = aff_mat->mat; double * sum_row = aff_mat -> sum_row; - /*display_tab(tab,N);*/ + /*display_tab(tab, mat_order);*/ for( i = 0 ; i < arity ; i++ ){ id = cur_group[i]->id; @@ -394,16 +400,16 @@ double eval_grouping(affinity_mat_t *aff_mat,tree_t **cur_group,int arity) id1 = cur_group[i]->id; for( j = 0 ; j < arity ; j++ ){ id2 = cur_group[j]->id; - /*printf("res-=tab[%d][%d]=%f\n",id1,id2,tab[id1][id2]);*/ + /*printf("res-=tab[%d][%d]=%f\n", id1, id2, tab[id1][id2]);*/ res -= mat[id1][id2]; } } - /*printf(" = %f\n",res);*/ + /*printf(" = %f\n", res);*/ return res; } -group_list_t *new_group_list(tree_t **tab,double val,group_list_t *next) +group_list_t *new_group_list(tm_tree_t **tab, double val, group_list_t *next) { group_list_t *res = NULL; @@ -416,74 +422,74 @@ group_list_t *new_group_list(tree_t **tab,double val,group_list_t *next) } -void add_to_list(group_list_t *list,tree_t **cur_group, int arity, double val) +void add_to_list(group_list_t *list, tm_tree_t **cur_group, int arity, double val) { group_list_t *elem = NULL; - tree_t **tab = NULL; + tm_tree_t **tab = NULL; int i; - tab=(tree_t **)MALLOC(sizeof(tree_t *)*arity); + tab=(tm_tree_t **)MALLOC(sizeof(tm_tree_t *)*arity); for( i = 0 ; i < arity ; i++ ){ tab[i] = cur_group[i]; - if(verbose_level>=INFO) - printf("cur_group[%d]=%d ",i,cur_group[i]->id); + if(verbose_level>=DEBUG) + printf("cur_group[%d]=%d ", i, cur_group[i]->id); } - if(verbose_level>=INFO) - printf(": %f\n",val); + if(verbose_level>=DEBUG) + printf(": %f\n", val); /*printf("\n");*/ - elem = new_group_list(tab,val,list->next); + elem = new_group_list(tab, val, list->next); list->next = elem; list->val++; } -void list_all_possible_groups(affinity_mat_t *aff_mat,tree_t *tab_node,int id,int arity, int depth, - tree_t **cur_group, group_list_t *list) +void list_all_possible_groups(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, int id, int arity, int depth, + tm_tree_t **cur_group, group_list_t *list) { double val; int i; - int N = aff_mat->order; + int mat_order = aff_mat->order; if(depth == arity){ - val = eval_grouping(aff_mat,cur_group,arity); - add_to_list(list,cur_group,arity,val); + val = eval_grouping(aff_mat, cur_group, arity); + add_to_list(list, cur_group, arity, val); return; - }else if( (N+depth) >= (arity+id) ){ + }else if( (mat_order+depth) >= (arity+id) ){ /*}else if(1){*/ - for( i = id ; i < N ; i++ ){ + for( i = id ; i < mat_order ; i++ ){ if(tab_node[i].parent) continue; cur_group[depth] = &tab_node[i]; - if(verbose_level>=INFO) - printf("%d<-%d\n",depth,i); - list_all_possible_groups(aff_mat,tab_node,i+1,arity,depth+1,cur_group,list); + if(verbose_level>=DEBUG) + printf("%d<-%d\n", depth, i); + list_all_possible_groups(aff_mat, tab_node, i+1, arity, depth+1, cur_group, list); } } } -void update_val(affinity_mat_t *aff_mat,tree_t *parent) +void update_val(tm_affinity_mat_t *aff_mat, tm_tree_t *parent) { /* int i; */ - parent->val = eval_grouping(aff_mat,parent->child,parent->arity); + parent->val = eval_grouping(aff_mat, parent->child, parent->arity); /*printf("connecting: ");*/ /*for( i = 0 ; i < parent->arity ; i++ ){ */ - /*printf("%d ",parent->child[i]->id);*/ + /*printf("%d ", parent->child[i]->id);*/ /* if(parent->child[i]->parent!=parent){ parent->child[i]->parent=parent; }else{ - fprintf(stderr,"redundant operation!\n"); + fprintf(stderr, "redundant operation!\n"); exit(-1); }*/ /* } */ - /*printf(": %f\n",parent->val);*/ + /*printf(": %f\n", parent->val);*/ } -int independent_groups(group_list_t **selection,int d,group_list_t *elem,int arity) +int independent_groups(group_list_t **selection, int d, group_list_t *elem, int arity) { - int i,j,k; + int i, j, k; if(d == 0) return 1; @@ -496,25 +502,30 @@ int independent_groups(group_list_t **selection,int d,group_list_t *elem,int ari return 1; } -void display_selection (group_list_t** selection,int M,int arity,double val) -{ - int i,j; - if(verbose_leveltab[j]->id); - printf("-- "); + printf("%d ", selection[i]->tab[j]->id); + printf("(%d)-- ", selection[i]->id); + local_val+=selection[i]->val; } - printf(":%f\n",val); + printf(":%f -- %f\n", val, local_val); } -void display_grouping (tree_t *father,int M,int arity,double val) + +void display_grouping (tm_tree_t *father, int M, int arity, double val) { - int i,j; + int i, j; if(verbose_level < INFO) return; @@ -522,14 +533,14 @@ void display_grouping (tree_t *father,int M,int arity,double val) printf("Grouping : "); for( i = 0 ; i < M ; i++ ){ for( j = 0 ; j < arity ; j++ ) - printf("%d ",father[i].child[j]->id); + printf("%d ", father[i].child[j]->id); printf("-- "); } - printf(":%f\n",val); + printf(":%f\n", val); } -int recurs_select_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M,double val,double *best_val,group_list_t **selection,group_list_t **best_selection) +int recurs_select_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection) { group_list_t *elem = NULL; /* @@ -538,8 +549,8 @@ int recurs_select_independent_groups(group_list_t **tab,int i,int n,int arity,in */ if( d == M ){ - if(verbose_level>=INFO) - display_selection(selection,M,arity,val); + if(verbose_level >= DEBUG) + display_selection(selection, M, arity, val); if( val < *best_val ){ *best_val = val; for( i = 0 ; i < M ; i++ ) @@ -551,12 +562,12 @@ int recurs_select_independent_groups(group_list_t **tab,int i,int n,int arity,in while( i < n ){ elem = tab[i]; - if(independent_groups(selection,d,elem,arity)){ - if(verbose_level>=INFO) - printf("%d: %d\n",d,i); + if(independent_groups(selection, d, elem, arity)){ + if(verbose_level >= DEBUG) + printf("%d: %d\n", d, i); selection[d] = elem; val += elem->val; - return recurs_select_independent_groups(tab,i+1,n,arity,d+1,M,val,best_val,selection,best_selection); + return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection); } i++; } @@ -564,22 +575,23 @@ int recurs_select_independent_groups(group_list_t **tab,int i,int n,int arity,in } -int test_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M,double val,double *best_val,group_list_t **selection,group_list_t **best_selection) + +int test_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection) { group_list_t *elem = NULL; if( d == M ){ - /*display_selection(selection,M,arity,val);*/ + /*display_selection(selection, M, arity, val);*/ return 1; } while( i < n ){ elem = tab[i]; - if(independent_groups(selection,d,elem,arity)){ - /*printf("%d: %d\n",d,i);*/ + if(independent_groups(selection, d, elem, arity)){ + /*printf("%d: %d\n", d, i);*/ selection[d] = elem; val += elem->val; - return recurs_select_independent_groups(tab,i+1,n,arity,d+1,M,val,best_val,selection,best_selection); + return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection); } i++; } @@ -588,6 +600,7 @@ int test_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M void delete_group_list(group_list_t *list) { + if(list){ delete_group_list(list->next); FREE(list->tab); @@ -595,9 +608,9 @@ void delete_group_list(group_list_t *list) } } -int group_list_id(const void* x1,const void* x2) +int group_list_id(const void* x1, const void* x2) { - group_list_t *e1 = NULL,*e2= NULL; + group_list_t *e1 = NULL, *e2= NULL; e1 = *((group_list_t**)x1); e2 = *((group_list_t**)x2); @@ -605,9 +618,9 @@ int group_list_id(const void* x1,const void* x2) return (e1->tab[0]->id < e2->tab[0]->id) ? - 1 : 1; } -int group_list_asc(const void* x1,const void* x2) +int group_list_asc(const void* x1, const void* x2) { - group_list_t *e1 = NULL,*e2 = NULL; + group_list_t *e1 = NULL, *e2 = NULL; e1 = *((group_list_t**)x1); e2 = *((group_list_t**)x2); @@ -615,9 +628,9 @@ int group_list_asc(const void* x1,const void* x2) return (e1->val < e2->val) ? - 1 : 1; } -int group_list_dsc(const void* x1,const void* x2) +int group_list_dsc(const void* x1, const void* x2) { - group_list_t *e1 = NULL,*e2 = NULL; + group_list_t *e1 = NULL, *e2 = NULL; e1 = *((group_list_t**)x1); e2 = *((group_list_t**)x2); @@ -625,9 +638,9 @@ int group_list_dsc(const void* x1,const void* x2) return (e1->val > e2->val) ? -1 : 1; } -int weighted_degree_asc(const void* x1,const void* x2) +int weighted_degree_asc(const void* x1, const void* x2) { - group_list_t *e1= NULL,*e2 = NULL; + group_list_t *e1= NULL, *e2 = NULL; e1 = *((group_list_t**)x1); e2 = *((group_list_t**)x2); @@ -635,9 +648,9 @@ int weighted_degree_asc(const void* x1,const void* x2) return (e1->wg > e2->wg) ? 1 : -1; } -int weighted_degree_dsc(const void* x1,const void* x2) +int weighted_degree_dsc(const void* x1, const void* x2) { - group_list_t *e1 = NULL,*e2 = NULL; + group_list_t *e1 = NULL, *e2 = NULL; e1 = *((group_list_t**)x1); e2 = *((group_list_t**)x2); @@ -645,20 +658,20 @@ int weighted_degree_dsc(const void* x1,const void* x2) return (e1->wg > e2->wg) ? - 1 : 1; } -int select_independent_groups(group_list_t **tab_group,int n,int arity,int M,double *best_val, - group_list_t **best_selection,int bound,double max_duration) +int select_independent_groups(group_list_t **tab_group, int n, int arity, int M, double *best_val, + group_list_t **best_selection, int bound, double max_duration) { - int i,j; + int i, j; group_list_t **selection = NULL; - double val,duration; - CLOCK_T time1,time0; + double val, duration; + CLOCK_T time1, time0; - if(verbose_level>=INFO){ + if(verbose_level>=DEBUG){ for(i=0;itab[j]->id); + printf("%d ", tab_group[i]->tab[j]->id); } - printf(" : %f\n",tab_group[i]->val); + printf(" : %f\n", tab_group[i]->val); } } @@ -666,14 +679,14 @@ int select_independent_groups(group_list_t **tab_group,int n,int arity,int M,do selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M); CLOCK(time0); - for( i = 0 ; i < MIN(bound,n) ; i++ ){ - /* if(!(i%100)) {printf("%d/%d ",i, MIN(bound,n)); fflush(stdout);} */ + for( i = 0 ; i < MIN(bound, n) ; i++ ){ + /* if(!(i%100)) {printf("%d/%d ", i, MIN(bound, n)); fflush(stdout);} */ selection[0] = tab_group[i]; val = tab_group[i]->val; - recurs_select_independent_groups(tab_group,i+1,n,arity,1,M,val,best_val,selection,best_selection); + recurs_select_independent_groups(tab_group, i+1, n, arity, 1, M, val, best_val, selection, best_selection); if((!(i%5)) && (max_duration>0)){ CLOCK(time1); - duration = CLOCK_DIFF(time1,time0); + duration = CLOCK_DIFF(time1, time0); if(duration>max_duration){ FREE(selection); return 1; @@ -684,27 +697,688 @@ int select_independent_groups(group_list_t **tab_group,int n,int arity,int M,do if(verbose_level>=INFO) - display_selection(best_selection,M,arity,*best_val); + display_selection(best_selection, M, arity, *best_val); return 0; } -int select_independent_groups_by_largest_index(group_list_t **tab_group,int n,int arity,int M,double *best_val,group_list_t **best_selection,int bound,double max_duration) -{ - int i,dec,nb_groups=0; - group_list_t **selection = NULL; - double val,duration; - CLOCK_T time1,time0; - selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M); +int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){ + int i, j, ii, jj; + int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n); + + for( i=0 ; ij in indep_mat[i][j] */ + for(j=0 ; jtab[ii]->id == elem2->tab[jj]->id){ + indep_mat[i][j] = 0; + goto done; + } + } + } + indep_mat[i][j] = 1; + done: ; + } + } + + + return indep_mat; +} + +int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat) +{ + int i; + int id_elem = elem->id; + int id_select; + + + if(selection_size == 0) + return 1; + + for(i=0; i id; + /* I know that id_elem > id_select, always */ + if(indep_mat[id_elem][id_select] == 0 ) + return 0; + } + return 1; +} + + static long int x=0; + static long int y=0; + + +int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size, + double val, double *best_val, group_list_t **selection, group_list_t **best_selection, + int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){ + + + group_list_t *elem = NULL; + int nb_groups_to_find =0; + int nb_available_groups = 0; + + stack: + nb_groups_to_find = solution_size - depth; + nb_available_groups = nb_groups - i; + if( depth == solution_size ){ + if(verbose_level >= DEBUG) + display_selection(selection, solution_size, arity, val); + if( val < *best_val ){ + pthread_mutex_lock(lock); + if(verbose_level >= INFO) + printf("\n---------%d: best_val= %f\n", thread_id, val); + *best_val = val; + for( i = 0 ; i < solution_size ; i++ ) + best_selection[i] = selection[i]; + pthread_mutex_unlock(lock); + } + if(depth>2) + goto unstack; + else + return 0; + } + + if(nb_groups_to_find > nb_available_groups){ /*if there not enough groups available*/ + if(depth>start_depth) + goto unstack; + else + return 0; + } + + + + while( i < nb_groups ){ + elem = tab_group[i]; + y++; + if(val+elem->val < *best_val){ + if(val+elem->bound[nb_groups_to_find]>*best_val){ + x++; + /* printf("\ni=%d, val=%.0f, elem->val = %.0f, elem->bound[%d] = %.0f, best_val = %.0f\n", */ + /* i,val,elem->val,nb_groups_to_find,elem->bound[nb_groups_to_find],*best_val); */ + /* exit(-1); */ + + /* printf("x=%ld y=%ld\n",x,y); */ + if(depth>start_depth) + goto unstack; + else + return 0; + } + + if(independent_groups_mat(selection, depth, elem, indep_mat)){ + if(verbose_level >= DEBUG) + printf("%d: %d\n", depth, i); + selection[depth] = elem; + val += selection[depth]->val; + tab_i[depth]=i; + depth ++; + i++; + goto stack; + unstack: + depth --; + val -= selection[depth]->val; + i=tab_i[depth]; + } + } + i++; + nb_available_groups = nb_groups - i; + nb_groups_to_find = solution_size - depth; + if(nb_groups_to_find > nb_available_groups){ /*if there not enough groups available*/ + if(depth>start_depth) + goto unstack; + else + return 0; + } + } + + if(depth>start_depth) + goto unstack; + + return 0; +} + + +group_list_t * group_dup(group_list_t *group, int nb_groups){ + group_list_t *elem = NULL; + /* tm_tree_t **tab = NULL; */ + double *bound; + size_t bound_size = nb_groups-group->id+2; + + /* tab = (tm_tree_t **)MALLOC(sizeof(tm_tree_t *)*arity); */ + /* memcpy(tab, group->tab, sizeof(tm_tree_t *)*arity); */ + + bound = (double*) MALLOC(bound_size*sizeof(double)); + memcpy(bound, group->bound, bound_size*sizeof(double)); + + elem = (group_list_t*) MALLOC(sizeof(group_list_t)); + + elem-> tab = group->tab; + elem-> val = group->val; + elem-> sum_neighbour = group->sum_neighbour; + elem-> wg = group ->wg; + elem-> id = group->id; + elem-> bound = bound; + elem-> next = NULL; + return elem; + +} + +group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){ + group_list_t **res; + int i; + + res = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups); + + for(i=0 ; inext = res[i]; + } + + return res; +} + +int8_t **indep_mat_dup(int8_t** mat, int n){ + int i; + int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n); + int row_len; + /* use indep_mat[i][j] with inb_work; + int cur_work = 0; + + TIC; + + if(nb_args!=9){ + if(verbose_level>=ERROR){ + fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __FUNCTION__, nb_args); + return; + } + } + + pthread_mutex_lock(lock); + TIC; + pthread_mutex_unlock(lock); + + tab_i = (int*) MALLOC(sizeof(int)*solution_size); + selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size); + + + + while(work->tab_group){ + pthread_mutex_lock(lock); + if(!work->done){ + work->done = 1; + pthread_mutex_unlock(lock); + }else{ + pthread_mutex_unlock(lock); + work=work->next; + cur_work++; + continue; + } + + /* for(i=0;inb_groups;i++){ */ + /* printf("%d ",work->tab_group[i]); */ + /* } */ + if(verbose_level>=INFO){ + fprintf(stdout, "\r%d: %.2f%% of search space explored...", thread_id,(100.0*cur_work)/total_work); + fflush(stdout); + } + for(i=0;inb_groups;i++){ + id1 = work->tab_group[i]; + for(j=i+1;jnb_groups;j++){ + id2 = work->tab_group[j]; + if(!indep_mat[id2][id1]){ + goto next_work; + } + } + } + + + val = 0; + for(i=0;inb_groups;i++){ + id = work->tab_group[i]; + selection[i] = tab_group[id]; + val += tab_group[id]->val; + } + thread_derecurs_exhaustive_search(tab_group, id+1, n, arity, work->nb_groups, solution_size, val, best_val, selection, best_selection, indep_mat, lock, thread_id, tab_i, work->nb_groups); + next_work: + work=work->next; + cur_work++; + } + + + + + + /* for( i=0 ; itab); *\/ */ + /* FREE(tab_group[i]->bound); */ + /* FREE(tab_group[i]); */ + /* } */ + /* FREE(tab_group); */ + FREE(selection); + FREE(tab_i); + /* for( i=0 ; i=INFO){ + printf("Thread %d done in %.3f!\n" , thread_id, duration); + } +} + + + +int dbl_cmp_dec(const void* x1,const void* x2) +{ + return *((double *)x1) > *((double *)x2) ? -1 : 1; +} +int dbl_cmp_inc(const void* x1,const void* x2) +{ + return *((double *)x1) < *((double *)x2) ? -1 : 1; +} + + + +double *build_bound_array(double *tab, int n){ + int i; + double *bound; + + if (n==0) + return NULL; + + bound = (double *)MALLOC(sizeof(double)*(n+2)); + qsort(tab, n, sizeof(double), dbl_cmp_inc); + + + + if(verbose_level>=DEBUG){ + printf("T(%d): ",n); + for(i = 0; itab_group = tab_group; + cur->nb_groups = size; + cur->done = 0; + cur->next = res; + return res; +} + +work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){ + + tab_group[i] = id; + if(i==size-1){ + return create_work_unit(cur,tab_group,size); + } + + if(id == id_max-1){ + return cur; + } + + id++; + for(;id < id_max;id++){ + cur = generate_work_units(cur,i+1,id,tab_group, size, id_max); + } + + return cur; +} + + +work_unit_t *create_tab_work(int n){ + int work_size = 4; + int i; + work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t)); + int *tab_group = MALLOC(work_size*sizeof(int)); + cur = res; + cur = generate_work_units(cur,0,0,tab_group,3,n); + cur = generate_work_units(cur,0,1,tab_group,2,n); + cur = generate_work_units(cur,0,2,tab_group,2,n); + + for(i=3;itab_group; cur = cur-> next) + res->nb_work++; + + printf("nb_work= %d\n",res->nb_work); + + FREE(tab_group); + + return res; +} + + +int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val, + group_list_t **best_selection){ + + pthread_mutex_t lock; + int nb_threads; + work_t **works; + int i, j; + int id; + /* matrix of indepedency between groups (i.e; 2 groups are independent if they + are composed of different ids) */ + int8_t **indep_mat; + double *val_array; + double duration; + work_unit_t *work_list; + TIC; + + pthread_mutex_init(&lock, NULL); + nb_threads = get_nb_threads(); + nb_threads = 4; + works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads); + + work_list = create_tab_work(nb_groups); + + if(verbose_level>=DEBUG){ + for(i=0;itab[j]->id); + } + printf(" : %.0f\nb_groups", tab_group[i]->val); + } + } + + fflush(stderr); + + val_array = (double *)MALLOC(nb_groups*sizeof(double)); + + for( i=nb_groups-1 ; i>=0 ; i--){ + val_array[nb_groups-i-1] = tab_group[i]->val; + /* this is allocated here and therefore released here*/ + tab_group[i]->bound = build_bound_array(val_array,nb_groups-i); + + if(verbose_level>=DEBUG){ + printf("-->(%d--%d) %.0f: ", i, nb_groups-i-1, tab_group[i]->val); + for(j=1 ; jbound[j]); + } + printf("\n"); + } + } + + FREE(val_array); + + indep_mat = init_independent_group_mat(nb_groups, tab_group, arity); + + for(id=0;id= DEBUG) + printf("Executing %p\n", (void *)works[id]); + + submit_work( works[id], id); + } + + for(id=0;idargs); + } + + exit(-1); + + if(verbose_level>=INFO) + fprintf(stdout, "\nx=%ld, y=%ld\n",x,y); + + + for( i=0 ; ibound); + } + + FREE(indep_mat); + /* FREE(search_space); */ + FREE(works); + + if(verbose_level>=INFO) + display_selection(best_selection, solution_size, arity, *best_val); + + duration = TOC; + printf("Thread exhaustive search = %g\n",duration); + exit(-1); + return 0; +} + + +int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat) +{ + group_list_t *elem = NULL; + + + + if( d == solution_size ){ + if(verbose_level >= DEBUG) + display_selection(selection, solution_size, arity, val); + if( val < *best_val ){ + *best_val = val; + for( i = 0 ; i < solution_size ; i++ ) + best_selection[i] = selection[i]; + return 1; + } + return 0; + } + + if(solution_size-d>n-i){ /*if there not enough groups available*/ + return 0; + } + + while( i < n ){ + elem = tab[i]; + if(val+elem->val<*best_val){ + if(independent_groups_mat(selection, d, elem, indep_mat)){ + if(verbose_level >= DEBUG) + printf("%d: %d\n", d, i); + selection[d] = elem; + val += elem->val; + old_recurs_exhaustive_search(tab, i+1, n, arity, d+1, solution_size, val, best_val, selection, best_selection, indep_mat); + val -= elem->val; + } + } + i++; + } + + return 0; +} + + + +int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i) +{ + group_list_t *elem = NULL; + + check: + if( d == solution_size ){ + if(verbose_level >= DEBUG) + display_selection(selection, solution_size, arity, val); + if( val < *best_val ){ + *best_val = val; + for( i = 0 ; i < solution_size ; i++ ) + best_selection[i] = selection[i]; + goto uncheck; + } + goto uncheck; + } + + if(solution_size-d>n-i){ /*if there not enough groups available*/ + if(d>1) + goto uncheck; + else + return 0; + } + + while( i < n ){ + elem = tab[i]; + if(val+elem->val<*best_val){ + if(independent_groups_mat(selection, d, elem, indep_mat)){ + if(verbose_level >= DEBUG) + printf("%d: %d\n", d, i); + selection[d] = elem; + val += selection[d]->val; + tab_i[d]=i; + d++; + i++; + goto check; + uncheck: + d--; + val -= selection[d]->val; + i=tab_i[d]; + } + } + i++; + } + + if(d>1) + goto uncheck; + + return 0; +} + + + +int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, + group_list_t **best_selection) +{ + int i, j; + group_list_t **selection = NULL; + double val; +/* matrix of indepedency between groups (i.e; 2 groups are independent if they + are composed of different ids): lazy data structure filled only once we have + already computed if two groups are independent. otherwise it is initialized at + -1*/ + int8_t **indep_mat; + int *tab_i = (int*) MALLOC(sizeof(int)*solution_size); + double duration; + TIC; + + if(verbose_level>=DEBUG){ + for(i=0;itab[j]->id); + } + printf(" : %f\n", tab_group[i]->val); + } + } + + + + indep_mat = init_independent_group_mat(n, tab_group, arity); + + selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size); + for( i = 0 ; i < n ; i++ ){ + if(verbose_level>=INFO){ + fprintf(stdout, "\r%.2f%% of search space explored...", (100.0*i)/n); + fflush(stdout); + } + selection[0] = tab_group[i]; + val = tab_group[i]->val; + /* recurs_exhaustive_search(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection, indep_mat, tab_i); */ + old_recurs_exhaustive_search(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection, indep_mat); + } + + if(verbose_level>=INFO) + fprintf(stdout, "\n"); + + FREE(selection); + + for( i=0 ; i=INFO) + display_selection(best_selection, solution_size, arity, *best_val); + duration = TOC; + printf("Seq exhaustive search = %g\n",duration); + exit(-1); + + return 0; +} + + + +int select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration) +{ + int i, dec, nb_groups=0; + group_list_t **selection = NULL; + double val, duration; + CLOCK_T time1, time0; + + selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size); CLOCK(time0); - dec = MAX(n/10000,2); + dec = MAX(n/10000, 2); for( i = n-1 ; i >= 0 ; i -= dec*dec){ selection[0] = tab_group[i]; val = tab_group[i]->val; - nb_groups += test_independent_groups(tab_group,i+1,n,arity,1,M,val,best_val,selection,best_selection); + nb_groups += test_independent_groups(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection); if(verbose_level>=DEBUG) - printf("%d:%d\n",i,nb_groups); + printf("%d:%d\n", i, nb_groups); if(nb_groups >= bound){ FREE(selection); @@ -712,7 +1386,7 @@ int select_independent_groups_by_largest_index(group_list_t **tab_group,int n,i } if((!(i%5)) && (max_duration>0)){ CLOCK(time1); - duration=CLOCK_DIFF(time1,time0); + duration=CLOCK_DIFF(time1, time0); if(duration>max_duration){ FREE(selection); return 1; @@ -721,65 +1395,68 @@ int select_independent_groups_by_largest_index(group_list_t **tab_group,int n,i } FREE(selection); + + if(verbose_level>=INFO) + display_selection(best_selection, solution_size, arity, *best_val); + return 0; } -void list_to_tab(group_list_t *list,group_list_t **tab,int n) +void list_to_tab(group_list_t *list, group_list_t **tab, int n) { int i; for( i = 0 ; i < n ; i++ ){ if(!list){ if(verbose_level>=CRITICAL) - fprintf(stderr,"Error not enough elements. Only %d on %d\n",i,n); + fprintf(stderr, "Error not enough elements. Only %d on %d\n", i, n); exit(-1); } tab[n-i-1] = list; + tab[n-i-1]->id = n-i-1; list = list->next; } if(list){ - if(verbose_level>=DEBUG) - fprintf(stderr,"Error too many elements\n"); + if(verbose_level>=CRITICAL) + fprintf(stderr, "Error too many elements\n"); exit(-1); } } -void display_tab_group(group_list_t **tab, int n,int arity) +void display_tab_group(group_list_t **tab, int n, int arity) { - int i,j; + int i, j; if(verbose_leveltab[j]->id); - printf(": %.2f %.2f\n",tab[i]->val,tab[i]->wg); + printf("%d ", tab[i]->tab[j]->id); + printf(": %.2f %.2f\n", tab[i]->val, tab[i]->wg); } } -int independent_tab(tree_t **tab1,tree_t **tab2,int n) +int independent_tab(tm_tree_t **tab1, tm_tree_t **tab2, int arity) { - int i = 0,j = 0; - - while( (iid == tab2[j]->id) - return 0; - else if(tab1[i]->id > tab2[j]->id) - j++; - else - i++; + int ii, jj; + for( ii = 0 ; ii < arity ; ii++ ){ + for( jj = 0 ; jj < arity ; jj++ ){ + if(tab1[ii]->id == tab2[jj]->id){ + return 0; + } + } } return 1; } -void compute_weighted_degree(group_list_t **tab, int n,int arity) +void compute_weighted_degree(group_list_t **tab, int n, int arity) { - int i,j; + int i, j; for( i = 0 ; i < n ; i++) tab[i]->sum_neighbour = 0; for( i = 0 ; i < n ; i++ ){ - /*printf("%d/%d=%f%%\n",i,n,(100.0*i)/n);*/ + /*printf("%d/%d=%f%%\n", i, n, (100.0*i)/n);*/ for( j = i+1 ; j < n ; j++ ) - /*if(!independent_groups(&tab[i],1,tab[j],arity)){*/ - if(!independent_tab(tab[i]->tab,tab[j]->tab,arity)){ + /*if(!independent_groups(&tab[i], 1, tab[j], arity)){*/ + if(!independent_tab(tab[i]->tab, tab[j]->tab, arity)){ tab[i]->sum_neighbour += tab[j]->val; tab[j]->sum_neighbour += tab[i]->val; } @@ -787,56 +1464,7 @@ void compute_weighted_degree(group_list_t **tab, int n,int arity) tab[i]->wg = tab[i]->sum_neighbour/tab[i]->val; if(tab[i]->sum_neighbour == 0) tab[i]->wg = 0; - /*printf("%d:%f/%f=%f\n",i,tab[i]->sum_neighbour,tab[i]->val,tab[i]->wg);*/ - } -} - -/* - Very slow: explore all possibilities - aff_mat : the affiity matrix at the considered level (used to evaluate a grouping) - tab_node: array of the node to group - parent: node to which attached the computed group - id: current considered node of tab_node - arity: number of children of parent (i.e.) size of the group to compute - best_val: current value of th grouping - cur_group: current grouping - */ -void group(affinity_mat_t *aff_mat,tree_t *tab_node,tree_t *parent,int id,int arity, int n,double *best_val,tree_t **cur_group) -{ - - int N = aff_mat->order; - double val; - int i; - - /*if we have found enough noide in the group*/ - if( n == arity){ - /* evaluate this group*/ - val = eval_grouping(aff_mat,cur_group,arity); - /* If we improve compared to previous grouping: uodate the children of parent accordingly */ - if( val < *best_val ){ - *best_val = val; - for( i = 0 ; i < arity ; i++ ) - parent->child[i] = cur_group[i]; - parent->arity = arity; - } - return; - } - - /* - If we need more node in the group - Continue to explore avilable nodes - */ - for( i = id+1 ; i < N ; i++ ){ - /* If this node is allready in a group: skip it*/ - if(tab_node[i].parent) - continue; - /*Otherwise, add it to the group at place n*/ - cur_group[n] = &tab_node[i]; - /* - printf("%d<-%d\n",n,i); - recursively add the next element to this group - */ - group(aff_mat,tab_node,parent,i,arity,n+1,best_val,cur_group); + /*printf("%d:%f/%f=%f\n", i, tab[i]->sum_neighbour, tab[i]->val, tab[i]->wg);*/ } } @@ -848,22 +1476,24 @@ void group(affinity_mat_t *aff_mat,tree_t *tab_node,tree_t *parent,int id,int a arity: number of children of parent (i.e.) size of the group to compute best_val: current value of th grouping cur_group: current grouping - N: size of tab and tab_node. i.e. number of nodes at the considered level + mat_order: size of tab and tab_node. i.e. number of nodes at the considered level */ -void fast_group(affinity_mat_t *aff_mat,tree_t *tab_node,tree_t *parent,int id,int arity, int n, - double *best_val,tree_t **cur_group, int *nb_groups,int max_groups) +void fast_group(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *parent, int id, int arity, int n, + double *best_val, tm_tree_t **cur_group, int *nb_groups, int max_groups) { double val; int i; - int N = aff_mat->order; + int mat_order = aff_mat->order; - /*printf("Max groups=%d\n",max_groups);*/ + /* printf("Max groups=%d, nb_groups= %d, n= %d, arity = %d\n", max_groups, *nb_groups, n, arity); */ /*if we have found enough node in the group*/ if( n == arity ){ (*nb_groups)++; /*evaluate this group*/ - val = eval_grouping(aff_mat,cur_group,arity); + val = eval_grouping(aff_mat, cur_group, arity); + if(verbose_level>=DEBUG) + printf("Grouping %d: %f\n", *nb_groups, val); /* If we improve compared to previous grouping: uodate the children of parent accordingly*/ if( val < *best_val ){ *best_val = val; @@ -879,59 +1509,100 @@ void fast_group(affinity_mat_t *aff_mat,tree_t *tab_node,tree_t *parent,int id, If we need more node in the group Continue to explore avilable nodes */ - for( i = id+1 ; i < N ; i++ ){ + for( i = id+1 ; i < mat_order ; i++ ){ /* If this node is allready in a group: skip it*/ if(tab_node[i].parent) continue; /*Otherwise, add it to the group at place n */ cur_group[n] = &tab_node[i]; /* - printf("%d<-%d %d/%d\n",n,i,*nb_groups,max_groups); + printf("%d<-%d %d/%d\n", n, i, *nb_groups, max_groups); exit(-1); recursively add the next element to this group */ - fast_group(aff_mat,tab_node,parent,i,arity,n+1,best_val,cur_group,nb_groups,max_groups); + fast_group(aff_mat, tab_node, parent, i, arity, n+1, best_val, cur_group, nb_groups, max_groups); if(*nb_groups > max_groups) return; } } -void fast_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, int arity, int M,long int k) -{ - tree_t **cur_group = NULL; - int l,i,nb_groups; - double best_val,val=0; - cur_group = (tree_t**)MALLOC(sizeof(tree_t*)*arity); - for( l = 0 ; l < M ; l++ ){ + + +double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size, double nb_groups) +{ + tm_tree_t **cur_group = NULL; + int l, i, nb_done; + double best_val, val=0; + + cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity); + for( l = 0 ; l < solution_size ; l++ ){ best_val = DBL_MAX; - nb_groups = 0; - /*printf("k%d/%d, k=%ld\n",l,M,k);*/ + nb_done = 0; + /*printf("nb_groups%d/%d, nb_groups=%ld\n", l, M, nb_groups);*/ /* select the best greedy grouping among the 10 first one*/ - /*fast_group(tab,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,N,&nb_groups,MAX(2,(int)(50-log2(k))-M/10));*/ - fast_group(aff_mat,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,&nb_groups,MAX(1,(int)(50-CmiLog2(k))-M/10)); + /*fast_group(tab, tab_node, &new_tab_node[l], -1, arity, 0, &best_val, cur_group, mat_order, &nb_done, MAX(2, (int)(50-log2(nb_groups))-M/10));*/ + fast_group(aff_mat, tab_node, &new_tab_node[l], -1, arity, 0, &best_val, cur_group, &nb_done, MAX(10, (int)(50-CmiLog2(nb_groups))-solution_size/10)); val += best_val; for( i = 0 ; i < new_tab_node[l].arity ; i++ ) new_tab_node[l].child[i]->parent=&new_tab_node[l]; - update_val(aff_mat,&new_tab_node[l]); + update_val(aff_mat, &new_tab_node[l]); + if(new_tab_node[l].val != best_val){ + if(verbose_level>=CRITICAL) + printf("Error: best_val = %f, new_tab_node[%d].val = %f\n", best_val, l, new_tab_node[l].val); + exit(-1); + } } FREE(cur_group); - if(verbose_level>=INFO) - printf("val=%f\n",val); - /*exit(-1);*/ + return val; +} - if(verbose_level>=INFO) - display_grouping(new_tab_node,M,arity,val); +double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) { + int *partition = NULL; + int n = aff_mat->order; + com_mat_t com_mat; + int i,j,k; + double val = 0; + + com_mat.comm = aff_mat->mat; + com_mat.n = n; + + if(verbose_level>=DEBUG) + printf("K-Partitionning: n=%d, solution_size=%d, arity=%d\n",n, solution_size,arity); + + partition = kpartition(solution_size, &com_mat, n, NULL, 0); + + /* new_tab_node[i]->child[j] = &tab_node[k] where 0<=i< solution size, 0<=jparent = &new_tab_node[i]; + } + + for( i = 0 ; i < solution_size ; i++ ){ + new_tab_node[i].arity = arity; + update_val(aff_mat, &new_tab_node[i]); + val += new_tab_node[i].val; + } + + FREE(j_tab); + FREE(partition); + + return val; } - -int adjacency_asc(const void* x1,const void* x2) +int adjacency_asc(const void* x1, const void* x2) { - adjacency_t *e1 = NULL,*e2 = NULL; + adjacency_t *e1 = NULL, *e2 = NULL; e1 = ((adjacency_t*)x1); e2 = ((adjacency_t*)x2); @@ -939,9 +1610,9 @@ int adjacency_asc(const void* x1,const void* x2) return (e1->val < e2->val) ? - 1 : 1; } -int adjacency_dsc(const void* x1,const void* x2) +int adjacency_dsc(const void* x1, const void* x2) { - adjacency_t *e1 = NULL,*e2 = NULL; + adjacency_t *e1 = NULL, *e2 = NULL; e1 = ((adjacency_t*)x1); e2 = ((adjacency_t*)x2); @@ -950,99 +1621,100 @@ int adjacency_dsc(const void* x1,const void* x2) return (e1->val > e2->val) ? -1 : 1; } -void super_fast_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, int arity, int M) +void super_fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) { - double val = 0,duration; - adjacency_t *graph; - int i,j,e,l,nb_groups; - int N = aff_mat->order; - double **mat = aff_mat->mat; + double val = 0, duration; + adjacency_t *graph; + int i, j, e, l, nb_groups; + int mat_order = aff_mat->order; + double **mat = aff_mat->mat; - assert( 2 == arity); + assert( 2 == arity); - TIC; - graph = (adjacency_t*)MALLOC(sizeof(adjacency_t)*((N*N-N)/2)); - e = 0; - for( i = 0 ; i < N ; i++ ) - for( j = i+1 ; j < N ; j++){ - graph[e].i = i; - graph[e].j = j; - graph[e].val = mat[i][j]; - e++; - } - - duration = TOC; - if(verbose_level>=DEBUG) - printf("linearization=%fs\n",duration); - - - assert( e == (N*N-N)/2); - TIC; - qsort(graph,e,sizeof(adjacency_t),adjacency_dsc); - duration = TOC; - if(verbose_level>=DEBUG) - printf("sorting=%fs\n",duration); - - TIC; - - TIC; - l = 0; - nb_groups = 0; - for( i = 0 ; (i < e) && (l < M) ; i++ ) - if(try_add_edge(tab_node,&new_tab_node[l],arity,graph[i].i,graph[i].j,&nb_groups)) - l++; - - for( l = 0 ; l < M ; l++ ){ - update_val(aff_mat,&new_tab_node[l]); - val += new_tab_node[l].val; + TIC; + graph = (adjacency_t*)MALLOC(sizeof(adjacency_t)*((mat_order*mat_order-mat_order)/2)); + e = 0; + for( i = 0 ; i < mat_order ; i++ ) + for( j = i+1 ; j < mat_order ; j++){ + graph[e].i = i; + graph[e].j = j; + graph[e].val = mat[i][j]; + e++; } - duration = TOC; - if(verbose_level>=DEBUG) - printf("Grouping=%fs\n",duration); + duration = TOC; + if(verbose_level>=DEBUG) + printf("linearization=%fs\n", duration); - if(verbose_level>=DEBUG) - printf("val=%f\n",val); + assert( e == (mat_order*mat_order-mat_order)/2); + TIC; + qsort(graph, e, sizeof(adjacency_t), adjacency_dsc); + duration = TOC; + if(verbose_level>=DEBUG) + printf("sorting=%fs\n", duration); + + TIC; + +TIC; + l = 0; + nb_groups = 0; + for( i = 0 ; (i < e) && (l < solution_size) ; i++ ) + if(try_add_edge(tab_node, &new_tab_node[l], arity, graph[i].i, graph[i].j, &nb_groups)) + l++; + + for( l = 0 ; l < solution_size ; l++ ){ + update_val(aff_mat, &new_tab_node[l]); + val += new_tab_node[l].val; + } + + duration = TOC; + if(verbose_level>=DEBUG) + printf("Grouping=%fs\n", duration); - display_grouping(new_tab_node,M,arity,val); - FREE(graph); + if(verbose_level>=DEBUG) + printf("val=%f\n", val); + + + display_grouping(new_tab_node, solution_size, arity, val); + + FREE(graph); } -affinity_mat_t *build_cost_matrix(affinity_mat_t *aff_mat, double* obj_weight, double comm_speed) +tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_weight, double comm_speed) { double **mat = NULL, *sum_row; double **old_mat; double avg; - int i,j,N; + int i, j, mat_order; if(!obj_weight) return aff_mat; - N = aff_mat->order; + mat_order = aff_mat->order; old_mat = aff_mat -> mat; - mat = (double**)MALLOC(N*sizeof(double*)); - for( i = 0 ; i < N ; i++ ) - mat[i] = (double*)MALLOC(N*sizeof(double)); + mat = (double**)MALLOC(mat_order*sizeof(double*)); + for( i = 0 ; i < mat_order ; i++ ) + mat[i] = (double*)MALLOC(mat_order*sizeof(double)); - sum_row = (double*)CALLOC(N,sizeof(double)); + sum_row = (double*)CALLOC(mat_order, sizeof(double)); avg = 0; - for( i = 0 ; i < N ; i++ ) + for( i = 0 ; i < mat_order ; i++ ) avg += obj_weight[i]; - avg /= N; + avg /= mat_order; if(verbose_level>=DEBUG) - printf("avg=%f\n",avg); + printf("avg=%f\n", avg); - for( i = 0 ; i < N ; i++ ) - for( j = 0 ; j < N ; j++){ + for( i = 0 ; i < mat_order ; i++ ) + for( j = 0 ; j < mat_order ; j++){ if( i == j ) mat[i][j] = 0; else{ @@ -1050,7 +1722,7 @@ affinity_mat_t *build_cost_matrix(affinity_mat_t *aff_mat, double* obj_weight, d sum_row[i] += mat[i][j]; } } - return new_affinity_mat(mat,sum_row,N); + return new_affinity_mat(mat, sum_row, mat_order); } @@ -1060,200 +1732,229 @@ affinity_mat_t *build_cost_matrix(affinity_mat_t *aff_mat, double* obj_weight, d tab_node: array of the node to group new_tab_node: array of nodes at the next level (the parents of the node in tab_node once the grouping will be done). arity: number of children of parent (i.e.) size of the group to compute - M: size of new_tab_node (i.e) the number of parents + solution_size: size of new_tab_node (i.e) the number of parents */ -void group_nodes(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, int arity, int M, double* obj_weigth, double comm_speed) -{ +void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, + int arity, int solution_size, double* obj_weigth, double comm_speed){ + + /* + mat_order: size of tab and tab_node. i.e. number of nodes at the considered level + Hence we have: M*arity=mat_order + */ + int mat_order = aff_mat -> order; + tm_tree_t **cur_group = NULL; + int j, l; + unsigned long int list_size; + unsigned long int i; + group_list_t list, **best_selection = NULL, **tab_group = NULL; + double best_val, last_best; + int timeout; + tm_affinity_mat_t *cost_mat = NULL; /*cost matrix taking into account the communiocation cost but also the weight of the object*/ + double duration; + double val; + double nbg; + TIC; + + + + /* might return aff_mat (if obj_weight==NULL): do not free this tab in this case*/ + cost_mat = build_cost_matrix(aff_mat, obj_weigth, comm_speed); + + nbg = choose(mat_order, arity); + + if(verbose_level>=INFO) + printf("Number of possible groups:%.0lf\n", nbg); + + /* Todo: check if the depth is a criteria for speeding up the computation*/ + /* if(nb_groups>30000||depth>5){*/ + if( nbg > 30000 ){ - /* - N: size of tab and tab_node. i.e. number of nodes at the considered level - Hence we have: M*arity=N - */ - int N = aff_mat -> order; - tree_t **cur_group = NULL; - int j,l; - unsigned int n; - unsigned long int k; - group_list_t list,**best_selection = NULL,**tab_group = NULL; - double best_val,last_best; - int timeout; - affinity_mat_t *cost_mat = NULL; /*cost matrix taking into account the communiocation cost but also the weight of the object*/ double duration; TIC; - - /* might return aff_mat (if obj_weight==NULL): do not FREE this tab in this case*/ - cost_mat = build_cost_matrix(aff_mat,obj_weigth,comm_speed); - - k = choose(N,arity); - if(verbose_level>=INFO) - printf("Number of groups:%ld\n",k); - - /* Todo: check if the depth is a criteria for speeding up the computation*/ - /* if(k>30000||depth>5){*/ - if( k > 30000 ) { - - double duration; - - TIC; - if( arity <= 2 ) { - /*super_fast_grouping(tab,tab_node,new_tab_node,arity,N,M,k);*/ - if(verbose_level >= INFO ) - printf("Bucket Grouping...\n"); - bucket_grouping(cost_mat,tab_node,new_tab_node,arity,M); - } else { - if(verbose_level >= INFO) - printf("Fast Grouping...\n"); - fast_grouping(cost_mat,tab_node,new_tab_node,arity,M,k); - } - - duration = TOC; - if(verbose_level>=INFO) - printf("Fast grouping duration=%f\n",duration); - - if(verbose_level>=DEBUG) - display_grouping(new_tab_node,M,arity,-1); - - } else { - if(verbose_level>=INFO) - printf("Grouping nodes...\n"); - list.next = NULL; - list.val = 0; /*number of elements in the list*/ - cur_group = (tree_t**)MALLOC(sizeof(tree_t*)*arity); - best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M); - - list_all_possible_groups(cost_mat,tab_node,0,arity,0,cur_group,&list); - n = (int)list.val; - assert( n == k ); - tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*n); - list_to_tab(list.next,tab_group,n); - if(verbose_level>=INFO) - printf("List to tab done\n"); - - best_val = DBL_MAX; - - /* perform the pack mapping fist*/ - /* timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,1,0.1); */ - timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,1,100); - if((verbose_level>=INFO) && timeout) - printf("Packed mapping timeout!\n"); - /* give this mapping an exra credit (in general MPI application are made such that - neighbour process communicates more than distant ones) */ - best_val /= 1.001; - /* best_val *= 1.001; */ - if(verbose_level>=INFO) - printf("Packing computed\n"); - - /* perform a mapping trying to use group that cost less first*/ - qsort(tab_group,n,sizeof(group_list_t*),group_list_asc); - last_best = best_val; - timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,10,0.1); - /* timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,n,0); */ - if(verbose_level>=INFO){ - if(timeout) { - printf("Cost less first timeout!\n"); - } else if(last_best>best_val) { - printf("Cost less first Impoved solution\n"); - } - printf("----\n"); - } - /* perform a mapping trying to minimize the use of groups that cost a lot */ - qsort(tab_group,n,sizeof(group_list_t*),group_list_dsc); - last_best=best_val; - timeout=select_independent_groups_by_largest_index(tab_group,n,arity,M,&best_val,best_selection,10,0.1); - if(verbose_level>=DEBUG) { - if(timeout) - printf("Cost most last timeout!\n"); - else if(last_best>best_val) - printf("Cost most last impoved solution\n"); - } - if( n < 10000 ){ - /* perform a mapping in the weighted degree order */ - - - if(verbose_level>=INFO) - printf("----WG----\n"); - - compute_weighted_degree(tab_group,n,arity); - - if(verbose_level>=INFO) - printf("Weigted degree computed\n"); - - qsort(tab_group,n,sizeof(group_list_t*),weighted_degree_dsc); - /* display_tab_group(tab_group,n,arity);*/ - last_best = best_val; - timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,10,0.1); - /* timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,n,0); */ - - if(verbose_level>=DEBUG){ - if(timeout) - printf("WG timeout!\n"); - else if(last_best>best_val) - printf("WG impoved solution\n"); - } - } - - qsort(best_selection,M,sizeof(group_list_t*),group_list_id); - - for( l = 0 ; l < M ; l++ ){ - for( j = 0 ; j < arity ; j++ ){ - new_tab_node[l].child[j] = best_selection[l]->tab[j]; - new_tab_node[l].child[j]->parent = &new_tab_node[l]; - } - new_tab_node[l].arity = arity; - - /* printf("arity=%d\n",new_tab_node[l].arity); */ - update_val(cost_mat,&new_tab_node[l]); - } - - delete_group_list((&list)->next); - FREE(best_selection); - FREE(tab_group); - FREE(cur_group); - } - - if(cost_mat != aff_mat){ - FREE_tab_double(cost_mat->mat,N); - FREE(cost_mat->sum_row); - FREE(cost_mat); + if( arity <= 2 ){ + /*super_fast_grouping(tab, tab_node, new_tab_node, arity, mat_order, solution_size, k);*/ + if(verbose_level >= INFO ) + printf("Bucket Grouping...\n"); + val = bucket_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size); + }else if( arity <= 5){ + if(verbose_level >= INFO) + printf("Fast Grouping...\n"); + val = fast_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size, nbg); + } else{ + if(verbose_level >= INFO) + printf("K-partition Grouping...\n"); + val = k_partition_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size); } duration = TOC; + if(verbose_level >= INFO) + printf("Fast grouping duration=%f\n", duration); + if(verbose_level >= INFO) + display_grouping(new_tab_node, solution_size, arity, val); + + }else{ + unsigned long int nb_groups = (unsigned long int) nbg; + if(verbose_level >= INFO) + printf("Grouping nodes...\n"); + list.next = NULL; + list.val = 0; /*number of elements in the list*/ + cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity); + best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size); + + list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list); + list_size = (int)list.val; + assert( list_size == nb_groups); + tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups); + list_to_tab(list.next, tab_group, nb_groups); if(verbose_level>=INFO) - display_grouping(new_tab_node,M,arity,-1); + printf("List to tab done\n"); + best_val = DBL_MAX; + /* perform the pack mapping fist*/ + /* timeout = select_independent_groups(tab_group, n, arity, M, &best_val, best_selection, 1, 0.1); */ + timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 1, 100); if(verbose_level>=INFO) - printf("Grouping done in %.4fs!\n",duration); + if(timeout) + printf("Packed mapping timeout!\n"); + /* give this mapping an exra credit (in general MPI application are made such that + neighbour process communicates more than distant ones) */ + best_val /= 1.001; + /* best_val *= 1.001; */ + if(verbose_level>=INFO) + printf("Packing computed\n"); + + + + /* perform a mapping trying to use group that cost less first*/ + qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_asc); + last_best = best_val; + timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1); + /* timeout = select_independent_groups(tab_group, n, arity, solution_size, &best_val, best_selection, n, 0); */ + if(verbose_level>=INFO){ + if(timeout){ + printf("Cost less first timeout!\n"); + } + if(last_best>best_val){ + printf("Cost less first Impoved solution\n"); + } + } + /* perform a mapping trying to minimize the use of groups that cost a lot */ + qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_dsc); + last_best=best_val; + timeout=select_independent_groups_by_largest_index(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1); + if(verbose_level>=INFO){ + if(timeout) + printf("Cost most last timeout!\n"); + if(last_best>best_val) + printf("Cost most last impoved solution\n"); + } + if( nb_groups < 1000000 ){ + /* perform a mapping in the weighted degree order */ + + + if(verbose_level>=INFO) + printf("----WG----\n"); + + + compute_weighted_degree(tab_group, nb_groups, arity); + + if(verbose_level>=INFO) + printf("Weigted degree computed\n"); + + qsort(tab_group, nb_groups, sizeof(group_list_t*), weighted_degree_dsc); + + for( i=0 ; iid = i; + + /* display_tab_group(tab_group, n, arity);*/ + last_best = best_val; + timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1); + /* timeout = select_independent_groups(tab_group, n, arity, solution_size, &best_val, best_selection, n, 0); */ + + if(verbose_level>=INFO){ + if(timeout) + printf("WG timeout!\n"); + if(last_best>best_val) + printf("WG impoved solution\n"); + } + } + + if(tm_get_exhaustive_search_flag()){ + if(verbose_level>=INFO) + printf("Running exhaustive search on %ld groups, please wait...\n",nb_groups); + + last_best = best_val; + thread_exhaustive_search(tab_group, nb_groups, arity, solution_size, &best_val, best_selection); + /* exhaustive_search(tab_group, nb_groups, arity, solution_size, &best_val, best_selection); */ + if(verbose_level>=INFO){ + if(last_best>best_val){ + printf("Exhaustive search improved solution by: %.3f\n",(last_best-best_val)/last_best); + } else { + printf("Exhaustive search did not improved solution\n"); + } + } + } + + /* Reorder solution and apply it to new_tab_node: returned array */ + qsort(best_selection, solution_size, sizeof(group_list_t*), group_list_id); + + for( l = 0 ; l < solution_size ; l++ ){ + for( j = 0 ; j < arity ; j++ ){ + new_tab_node[l].child[j] = best_selection[l]->tab[j]; + new_tab_node[l].child[j]->parent = &new_tab_node[l]; + } + new_tab_node[l].arity = arity; + + /* printf("arity=%d\n", new_tab_node[l].arity); */ + update_val(cost_mat, &new_tab_node[l]); + } + + delete_group_list((&list)->next); + FREE(best_selection); + FREE(tab_group); + FREE(cur_group); + } + + if(cost_mat != aff_mat){ + free_affinity_mat(cost_mat); + } + + duration = TOC; + + + if(verbose_level>=INFO) + printf("Grouping done in %.4fs!\n", duration); } -void complete_aff_mat(affinity_mat_t **aff_mat ,int N, int K) +void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K) { - double **old_mat = NULL,**new_mat = NULL; double *sum_row; - int M,i; + double **old_mat = NULL, **new_mat = NULL; double *sum_row; + int M, i; old_mat = (*aff_mat) -> mat; - M = N+K; + M = mat_order+K; new_mat = (double**)MALLOC(M*sizeof(double*)); for( i = 0 ; i < M ; i++ ) - new_mat[i] = (double*)CALLOC((M),sizeof(double)); + new_mat[i] = (double*)CALLOC((M), sizeof(double)); - sum_row = (double*) CALLOC(M,sizeof(double)); + sum_row = (double*) CALLOC(M, sizeof(double)); - for( i = 0 ; i < N ; i++ ){ - memcpy(new_mat[i],old_mat[i],N*sizeof(double)); + for( i = 0 ; i < mat_order ; i++ ){ + memcpy(new_mat[i], old_mat[i], mat_order*sizeof(double)); sum_row[i] = (*aff_mat)->sum_row[i]; } - *aff_mat = new_affinity_mat(new_mat,sum_row,M); + *aff_mat = new_affinity_mat(new_mat, sum_row, M); } -void complete_obj_weight(double **tab,int N, int K) +void complete_obj_weight(double **tab, int mat_order, int K) { - double *old_tab = NULL,*new_tab = NULL,avg; - int M,i; + double *old_tab = NULL, *new_tab = NULL, avg; + int M, i; old_tab = *tab; @@ -1261,63 +1962,62 @@ void complete_obj_weight(double **tab,int N, int K) return; avg = 0; - for( i = 0 ; i < N ; i++ ) + for( i = 0 ; i < mat_order ; i++ ) avg += old_tab[i]; - avg /= N; + avg /= mat_order; - M = N+K; + M = mat_order+K; new_tab = (double*)MALLOC(M*sizeof(double)); *tab = new_tab; for( i = 0 ; i < M ; i++ ) - if(i < N) + if(i < mat_order) new_tab[i] = old_tab[i]; else new_tab[i] = avg; } -void create_dumb_tree(tree_t *node,int depth,tm_topology_t *topology) +void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology) { - tree_t **list_child = NULL; - int arity,i; + tm_tree_t **list_child = NULL; + int arity, i; if( depth == topology->nb_levels-1) { - set_node(node,NULL,0,NULL,-1,0,NULL,depth); + set_node(node, NULL, 0, NULL, -1, 0, NULL, depth); return; } arity = topology->arity[depth]; assert(arity>0); - list_child = (tree_t**)CALLOC(arity,sizeof(tree_t*)); + list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*)); for( i = 0 ; i < arity ; i++ ){ - list_child[i] = (tree_t*)MALLOC(sizeof(tree_t)); - create_dumb_tree(list_child[i],depth+1,topology); + list_child[i] = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)); + create_dumb_tree(list_child[i], depth+1, topology); list_child[i]->parent = node; list_child[i]->dumb = 1; } - set_node(node,list_child,arity,NULL,-1,0,list_child[0], depth); + set_node(node, list_child, arity, NULL, -1, 0, list_child[0], depth); } - -void complete_tab_node(tree_t **tab,int N, int K,int depth,tm_topology_t *topology) +void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology) { - tree_t *old_tab = NULL,*new_tab = NULL; - int M,i; + tm_tree_t *old_tab = NULL, *new_tab = NULL; + int M, i; if( K == 0 ) return; old_tab = *tab; - M = N+K; - new_tab = (tree_t*)MALLOC(M*sizeof(tree_t)); + M = mat_order+K; + new_tab = (tm_tree_t*)MALLOC(M*sizeof(tm_tree_t)); *tab = new_tab; for( i = 0 ; i < M ; i++ ) - if(i < N) - clone_tree(&new_tab[i],&old_tab[i]); + if(i < mat_order) + clone_tree(&new_tab[i], &old_tab[i]); else{ - create_dumb_tree(&new_tab[i],depth,topology); + create_dumb_tree(&new_tab[i], depth, topology); new_tab[i].id = i; } @@ -1325,11 +2025,11 @@ void complete_tab_node(tree_t **tab,int N, int K,int depth,tm_topology_t *topolo FREE(old_tab); } -void set_deb_tab_child(tree_t *tree, tree_t *child,int depth) +void set_deb_tab_child(tm_tree_t *tree, tm_tree_t *child, int depth) { - /* printf("depth=%d\t%p\t%p\n",depth,child,tree);*/ + /* printf("depth=%d\t%p\t%p\n", depth, child, tree);*/ if( depth > 0 ) - set_deb_tab_child(tree->tab_child,child,depth-1); + set_deb_tab_child(tree->tab_child, child, depth-1); else tree->tab_child=child; } @@ -1346,63 +2046,63 @@ depth: current depth of the algorithm toplogy: description of the hardware topology. constraints: set of constraints: core ids where to bind the processes */ -tree_t *build_level_topology(tree_t *tab_node, affinity_mat_t *aff_mat,int arity,int depth,tm_topology_t *topology, +tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int arity, int depth, tm_topology_t *topology, double *obj_weight, double *comm_speed) { - /* N: number of nodes. Order of com_mat, size of obj_weight */ - int N=aff_mat->order ; - int i,K=0,M; /*M = N/Arity: number the groups*/ - tree_t *new_tab_node = NULL; /*array of node for this level (of size M): there will be linked to the nodes of tab_nodes*/ - affinity_mat_t * new_aff_mat= NULL; /*New communication matrix (after grouyping nodes together)*/ - tree_t *res = NULL; /*resulting tree*/ + /* mat_order: number of nodes. Order of com_mat, size of obj_weight */ + int mat_order=aff_mat->order ; + int i, K=0, M; /*M = mat_order/Arity: number the groups*/ + tm_tree_t *new_tab_node = NULL; /*array of node for this level (of size M): there will be linked to the nodes of tab_nodes*/ + tm_affinity_mat_t * new_aff_mat= NULL; /*New communication matrix (after grouyping nodes together)*/ + tm_tree_t *res = NULL; /*resulting tree*/ int completed = 0; double speed; /* communication speed at this level*/ double *new_obj_weight = NULL; double duration; if( 0 == depth ){ - if((1 == N) && (0 == depth)) + if((1 == mat_order) && (0 == depth)) return &tab_node[0]; else { if(verbose_level >= CRITICAL) - fprintf(stderr,"Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n",N,depth); + fprintf(stderr, "Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n", mat_order, depth); exit(-1); } } /* If the number of nodes does not divide the arity: we add K nodes */ - if( N%arity != 0 ){ + if( mat_order%arity != 0 ){ TIC; - K = arity*((N/arity)+1)-N; - /*printf("****N=%d arity=%d K=%d\n",N,arity,K); */ - /*display_tab(tab,N);*/ + K = arity*((mat_order/arity)+1)-mat_order; + /*printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K); */ + /*display_tab(tab, mat_order);*/ /* add K rows and columns to comm_matrix*/ - complete_aff_mat(&aff_mat,N,K); + complete_aff_mat(&aff_mat, mat_order, K); /* add K element to the object weight*/ - complete_obj_weight(&obj_weight,N,K); - /*display_tab(tab,N+K);*/ + complete_obj_weight(&obj_weight, mat_order, K); + /*display_tab(tab, mat_order+K);*/ /* add a dumb tree to the K new "virtual nodes"*/ - complete_tab_node(&tab_node,N,K,depth,topology); + complete_tab_node(&tab_node, mat_order, K, depth, topology); completed = 1; /*flag this addition*/ - N += K; /*increase the number of nodes accordingly*/ + mat_order += K; /*increase the number of nodes accordingly*/ duration = TOC; if(verbose_level >= INFO) - fprintf(stderr,"Completing matrix duration= %fs\n ", duration); - } /*display_tab(tab,N);*/ + printf("Completing matrix duration= %fs\n ", duration); + } /*display_tab(tab, mat_order);*/ - M = N/arity; + M = mat_order/arity; if(verbose_level >= INFO) - printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n",depth,N,M,arity); + printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n", depth, mat_order, M, arity); TIC; /*create the new nodes*/ - new_tab_node = (tree_t*)MALLOC(sizeof(tree_t)*M); + new_tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*M); /*intitialize each node*/ for( i = 0 ; i < M ; i++ ){ - tree_t **list_child = NULL; - list_child = (tree_t**)CALLOC(arity,sizeof(tree_t*)); - set_node(&new_tab_node[i],list_child,arity,NULL,i,0,tab_node,depth); + tm_tree_t **list_child = NULL; + list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*)); + set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth); } duration = TOC; if(verbose_level >= INFO) @@ -1417,7 +2117,7 @@ tree_t *build_level_topology(tree_t *tab_node, affinity_mat_t *aff_mat,int arity TIC; /*based on that grouping aggregate the communication matrix*/ - new_aff_mat = aggregate_aff_mat(new_tab_node,aff_mat,M); + new_aff_mat = aggregate_aff_mat(new_tab_node, aff_mat, M); duration = TOC; if(verbose_level >= INFO) printf("Aggregate_com_mat= %fs\n", duration); @@ -1425,18 +2125,18 @@ tree_t *build_level_topology(tree_t *tab_node, affinity_mat_t *aff_mat,int arity /*based on that grouping aggregate the object weight matrix*/ - new_obj_weight = aggregate_obj_weight(new_tab_node,obj_weight,M); + new_obj_weight = aggregate_obj_weight(new_tab_node, obj_weight, M); duration = TOC; if(verbose_level >= INFO) printf("Aggregate obj_weight= %fs\n ", duration); /* set ID of virtual nodes to -1*/ - for( i = N-K ; i < N ; i++ ) + for( i = mat_order-K ; i < mat_order ; i++ ) tab_node[i].id = -1; /* - for(i=0;imat,aff_mat->order); - FREE(aff_mat->sum_row); - FREE(aff_mat); + free_affinity_mat(aff_mat); FREE(obj_weight); } - FREE_tab_double(new_aff_mat->mat,new_aff_mat->order); - FREE(new_aff_mat->sum_row); - FREE(new_aff_mat); + free_affinity_mat(new_aff_mat); + FREE(new_obj_weight); return res; } -double speed(int depth) -{ - /* - Bertha values - double tab[5]={21,9,4.5,2.5,0.001}; - double tab[5]={1,1,1,1,1}; - double tab[6]={100000,10000,1000,500,100,10}; - */ - double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1}; - - return 1.0/tab[depth]; - /* - return 10*log(depth+2); - return (depth+1); - return (long int)pow(100,depth); - */ -} -/* check the leaf numbering of the topology - this number must be between 0 and n-1 (the number of leaves) - teh number must all be different - However if a given leaf number is -1, it means that this - leaf cannot bee used for the mapping +tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, + double *obj_weight, double *comm_speed){ + int depth, i; + tm_tree_t *res = NULL, *tab_node = NULL; + int mat_order = aff_mat->order; - The function returns the number of constraints (leaves that can be used) - and their numbers (in increasing order) in the array pointed by contraints - -*/ - -int check_constraints(tm_topology_t *topology, int **constraints) -{ - int j,i,n = nb_processing_units(topology); - int *tab_constraints = NULL, nb_constraints = 0; - int *tab_node = NULL; - int *count = NULL; - - /* tab_node: array of core numbers. - tab_node[i]=-1 if this core is forbiden - numbering is such that - 0<=tab_node[i]node_id[topology->nb_levels-1]; - - /* "count" counts the number of cores of a given number. - count[i]: number of cores of number i. - 0<=count[i]<=1 - */ - count = (int *)CALLOC(n,sizeof(int)); - for( i = 0 ; i < n ; i++ ) - if (tab_node[i] != -1){ - if( (tab_node[i] >= 0) && (tab_node[i] < n)){ - /* In the remaining, we assume that the core numbering is logical from 0 to n - so if tab_node[i]!=-1 this mean sthat we have to use core number i*/ - count[i]++; - nb_constraints++; - }else{ - if(verbose_level >= ERROR) - fprintf(stderr, "*** Error: Core numbering not between 0 and %d: tab_node[%d]=%d\n", n , i, tab_node[i]); - *constraints = NULL; - FREE(count); - return 0; - } - } - - if(nb_constraints == 0){ - FREE(count); - *constraints = NULL; - return 0; - } - - tab_constraints = (int*) MALLOC(sizeof(int)*nb_constraints); - - /* we can now use the "counting sort" to sort the constraint tab in increasing order in linear time*/ - j = 0; - for( i = 0 ; i < n ; i++ ) - if(count[i]) - tab_constraints[j++] = i; - - /* if the constraint_tab is not full, this means that some count[i]>1*/ - if( j != nb_constraints ){ - if(verbose_level >= ERROR) - fprintf(stderr,"*** Error: Duplicate numbering: j=%d, nb_constraints= %d\n",j, nb_constraints); - FREE(tab_constraints); - FREE(count); - *constraints = NULL; - return 0; - } - - /* FREE local variables, assign result, return result*/ - FREE(count); - *constraints = tab_constraints; - return nb_constraints; -} - -affinity_mat_t * build_affinity_mat(double **mat, int order){ - int i,j; - double *sum_row = (double*) CALLOC (order, sizeof(double)); - - for (i=0 ; inb_levels; - for( i = 0 ; i < N ; i++ ) - set_node(&tab_node[i],NULL,0,NULL,i,0,NULL,depth); + for( i = 0 ; i < mat_order ; i++ ) + set_node(&tab_node[i], NULL, 0, NULL, i, 0, NULL, depth); - aff_mat = build_affinity_mat(com_mat,N); if(verbose_level >= INFO) - printf("nb_levels=%d\n",depth); + printf("nb_levels=%d\n", depth); /* assume all objects have the same arity*/ res = build_level_topology(tab_node, aff_mat , topology->arity[depth-2], depth-1, topology, obj_weight, comm_speed); if(verbose_level >= INFO) @@ -1596,8 +2186,6 @@ tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology,double **com_ /* tell the system it is not a constraint tree, this is usefull for freeing pointers*/ res->constraint = 0; - FREE(aff_mat -> sum_row); - FREE(aff_mat); return res; } @@ -1605,32 +2193,93 @@ tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology,double **com_ -tree_t * build_tree_from_topology(tm_topology_t *topology, double **com_mat, int N, double *obj_weight, double *com_speed) +/* + The function returns the number of constraints (leaves that can be used) + and their numbers (in increasing order) in the array pointed by contraints + + Also take into account the oversubscribing factor to expand the constraints tab + to fit with oversuscibing of the nodes. + +*/ + +int check_constraints(tm_topology_t *topology, int **constraints) +{ + + int sorted = 1; + int last = -1; + int i, shift; + int nb_constraints = topology->nb_constraints*topology->oversub_fact; + if(nb_constraints && topology->constraints){ + *constraints = (int*)MALLOC(sizeof(int)*(nb_constraints)); + /* renumber constarints logically as it is the way the k-partitionner use it*/ + for(i = 0 ; i < nb_constraints ; i++){ + /* in case of oversubscrining node ids at topology->nb_levels-1 are as follows (for the logocal numbering case): + 0, 0, .., 0, 1, 1, ..., 1, 2, 2, 2, ..., 2, ... where the number of identical consecutive number is topology->oversub_fact. + However, topology->node_rank refers only to the last rank of the id. Hence, + topology->node_rank[topology->nb_levels-1][i] == i*topology->oversub_fact + In order to have all the ranks of a given id we need to shift them as follows: + */ + shift = 1 + i%topology->oversub_fact - topology->oversub_fact; + (*constraints)[i] = topology->node_rank[topology->nb_levels-1][topology->constraints[i/topology->oversub_fact]] +shift; + if((*constraints)[i] < last) + sorted = 0; + last = (*constraints)[i]; + } + + if(!sorted){ + qsort(*constraints, nb_constraints , sizeof(int), int_cmp_inc); + } + + }else{ + *constraints = NULL; + } + + return nb_constraints; +} + + + + + +tm_tree_t * tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed) { int *constraints = NULL, nb_constraints; - tree_t * result; + tm_tree_t * result; + int npu, nb_processes, oversub_fact, nb_slots; - verbose_level = get_verbose_level(); + verbose_level = tm_get_verbose_level(); + oversub_fact = topology->oversub_fact; + /* Here constraints expended to take into account the oversuscribing factor */ nb_constraints = check_constraints (topology, &constraints); + nb_processes = aff_mat->order; + npu = nb_processing_units(topology); + nb_slots = npu * oversub_fact; - if(verbose_level>=INFO) - printf("nb_constraints = %d, N= %d; nb_processing units = %d\n",nb_constraints, N, nb_processing_units(topology)); + if(verbose_level >= INFO){ + printf("Com matrix size : %d\n", nb_processes); + printf("nb_constraints : %d\n", nb_constraints); + if(constraints) + print_1D_tab(constraints, nb_constraints); + printf("nb_processing units : %d\n", npu); + printf("Oversubscrbing factor: %d\n", oversub_fact); + printf("Nb of slots : %d\n", nb_slots); + } - if(N>nb_constraints){ + if(nb_processes > nb_constraints){ if(verbose_level >= CRITICAL){ - printf("Error : More processes (%d) than number of constraints (%d)!\n",N ,nb_constraints); + fprintf(stderr, "Error : Not enough slots/constraints (%d) for the communication matrix order (%d)!\n", + nb_constraints, nb_processes); } exit(-1); } - if(verbose_level >= INFO){ - printf("Com matrix size: %d\n",N); - printf("nb_constraints: %d\n",nb_constraints); - } - - if(nb_constraints == nb_processing_units(topology)) + if(nb_constraints == nb_slots) { + if(verbose_level >= INFO){ + printf("No need to use %d constraints for %d slots!\n", nb_constraints, nb_slots); + } + nb_constraints = 0; FREE(constraints); } @@ -1639,7 +2288,9 @@ tree_t * build_tree_from_topology(tm_topology_t *topology, double **com_mat, int if(verbose_level >= INFO){ printf("Partitionning with constraints\n"); } - result = kpartition_build_tree_from_topology(topology, com_mat, N, constraints, nb_constraints, obj_weight, com_speed); + result = kpartition_build_tree_from_topology(topology, aff_mat->mat, nb_processes, constraints, nb_constraints, + obj_weight, com_speed); + result->nb_processes = aff_mat->order; FREE(constraints); return result; } @@ -1647,6 +2298,9 @@ tree_t * build_tree_from_topology(tm_topology_t *topology, double **com_mat, int if(verbose_level >= INFO){ printf("Partitionning without constraints\n"); } - return bottom_up_build_tree_from_topology(topology, com_mat, N, obj_weight, com_speed); + + result = bottom_up_build_tree_from_topology(topology, aff_mat, obj_weight, com_speed); + result->nb_processes = aff_mat->order; + return result; } } diff --git a/ompi/mca/topo/treematch/treematch/tm_tree.h b/ompi/mca/topo/treematch/treematch/tm_tree.h index 342a61bd4f..6168f50161 100644 --- a/ompi/mca/topo/treematch/treematch/tm_tree.h +++ b/ompi/mca/topo/treematch/treematch/tm_tree.h @@ -1,69 +1,22 @@ -#ifndef __TREE_H__ -#define __TREE_H__ +#ifndef __TM_TREE_H__ +#define __TM_TREE_H__ #include +#include "treematch.h" - -typedef struct _node_info_t{ - int submit_date; - int job_id; - int finish_date; -} job_info_t; - -typedef struct _tree_t{ - int constraint; /* tells if the tree has been constructed with constraints on the nodes or not. usefull for freeing it. needs to be set on the root only*/ - struct _tree_t **child; - struct _tree_t *parent; - struct _tree_t *tab_child; /*the pointer to be freed*/ - double val; - int arity; - int depth; - int id; - int uniq; - int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/ - job_info_t *job_info; -}tree_t; - -/* Maximum number of levels in the tree*/ -#define MAX_LEVELS 100 - -typedef struct { - int *arity; /* arity of the nodes of each level*/ - int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/ - int *nb_nodes; /*nb of nodes of each level*/ - int *nb_free_nodes; /*nb of available nodes of each level*/ - int **node_id; /*ID of the nodes of the tree for each level*/ - int **free_nodes; /*ID of the nodes of the tree for each level*/ -}tm_topology_t; - - -typedef struct { - double ** mat; - double * sum_row; - int order; -} affinity_mat_t; - - - -tree_t * build_tree(double **tab,int N); -tree_t * build_tree_from_topology(tm_topology_t *topology,double **tab,int N, double *obj_weight, double *comm_speed); -void map_tree(tree_t *,tree_t*); +void update_val(tm_affinity_mat_t *aff_mat,tm_tree_t *parent); void display_tab(double **tab,int N); -double speed(int depth); -void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,int id,double val,tree_t *deb_tab_child, int depth); -void free_constraint_tree(tree_t *tree); -void free_tree(tree_t *tree); -void free_tab_double(double**tab,int N); -void free_tab_int(int**tab,int N); -void update_val(affinity_mat_t *aff_mat,tree_t *parent); -void FREE_tree(tree_t *tree); -void FREE_tab_double(double**,int); +void set_node(tm_tree_t *node,tm_tree_t ** child, int arity,tm_tree_t *parent, + int id,double val,tm_tree_t *tab_child,int depth); + typedef struct _group_list_t{ struct _group_list_t *next; - tree_t **tab; + tm_tree_t **tab; double val; double sum_neighbour; double wg; + int id; + double *bound; }group_list_t; @@ -74,21 +27,13 @@ typedef struct{ }adjacency_t; - -/* for debugging malloc */ -/* #define __DEBUG_MY_MALLOC__ */ -#undef __DEBUG_MY_MALLOC__ -#ifdef __DEBUG_MY_MALLOC__ -#include "tm_malloc.h" -#define MALLOC(x) my_malloc(x,__FILE__,__LINE__) -#define CALLOC(x,y) my_calloc(x,y,__FILE__,__LINE__) -#define FREE my_free -#define MEM_CHECK my_mem_check -#else -#define MALLOC malloc -#define CALLOC calloc -#define FREE free -#define MEM_CHECK my_mem_check -#endif +typedef struct _work_unit_t{ + int nb_groups; + int *tab_group; + int done; + int nb_work; + struct _work_unit_t *next; +}work_unit_t; #endif + diff --git a/ompi/mca/topo/treematch/treematch/tm_verbose.c b/ompi/mca/topo/treematch/treematch/tm_verbose.c index 9ff8319121..e360d7122b 100644 --- a/ompi/mca/topo/treematch/treematch/tm_verbose.c +++ b/ompi/mca/topo/treematch/treematch/tm_verbose.c @@ -1,11 +1,34 @@ #include "tm_verbose.h" +#include static unsigned int verbose_level = ERROR; +static FILE *output = NULL; -void set_verbose_level(unsigned int level){ +void tm_set_verbose_level(unsigned int level){ verbose_level = level; } - -unsigned int get_verbose_level(){ +unsigned int tm_get_verbose_level(){ return verbose_level; } + +int tm_open_verbose_file(char *filename){ + output = fopen(filename,"w"); + if(output == NULL) + return 0; + else + return 1; +} + +int tm_close_verbose_file(void){ + if(output != NULL) + return fclose(output); + + return 0; +} + +FILE *tm_get_verbose_output(){ + if(!output) + return stdout; + else + return output; +} diff --git a/ompi/mca/topo/treematch/treematch/tm_verbose.h b/ompi/mca/topo/treematch/treematch/tm_verbose.h index eafb0942f4..e16cbbc6c0 100644 --- a/ompi/mca/topo/treematch/treematch/tm_verbose.h +++ b/ompi/mca/topo/treematch/treematch/tm_verbose.h @@ -1,11 +1,22 @@ +#include + #define NONE 0 +/* output in stderr*/ #define CRITICAL 1 #define ERROR 2 +/* output in stdout*/ #define WARNING 3 -#define INFO 4 -#define DEBUG 5 - -void set_verbose_level(unsigned int level); -unsigned int get_verbose_level(void); +#define TIMING 4 +#define INFO 5 +#define DEBUG 6 +/* return 0 on errror and 1 on success */ +int tm_open_verbose_file(char *filename); +int tm_close_verbose_file(void); +void tm_set_verbose_level(unsigned int level); +unsigned int tm_get_verbose_level(void); +FILE * tm_get_verbose_output(void); + +#define tm_verbose_printf(level, ...) level <= tm_get_verbose_level()?fprintf(tm_get_verbose_output(),__VA_ARGS__):0 + diff --git a/ompi/mca/topo/treematch/treematch/treematch.h b/ompi/mca/topo/treematch/treematch/treematch.h new file mode 100644 index 0000000000..920dffcbae --- /dev/null +++ b/ompi/mca/topo/treematch/treematch/treematch.h @@ -0,0 +1,188 @@ +#ifndef __TREEMATCH_H__ +#define __TREEMATCH_H__ + +/* size_t definition */ +#include +#include "tm_verbose.h" + +/********* TreeMatch Public Enum **********/ + +/*type of topology files that can be read*/ +typedef enum{ + TM_FILE_TYPE_UNDEF, + TM_FILE_TYPE_XML, + TM_FILE_TYPE_TGT +} tm_file_type_t; + +/* different metrics to evaluate the solution */ +typedef enum{ + TM_METRIC_SUM_COM = 1, + TM_METRIC_MAX_COM = 2, + TM_METRIC_HOP_BYTE = 3 +} tm_metric_t; + + +/********* TreeMatch Public Structures **********/ + +typedef struct _job_info_t{ + int submit_date; + int job_id; + int finish_date; +} tm_job_info_t; + +typedef struct _tree_t{ + int constraint; /* tells if the tree has been constructed with constraints on the nodes or not. + Usefull for freeing it. needs to be set on the root only*/ + struct _tree_t **child; + struct _tree_t *parent; + struct _tree_t *tab_child; /*the pointer to be freed*/ + double val; + int arity; + int depth; + int id; + int uniq; + int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/ + tm_job_info_t *job_info; + int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only*/ +}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ?*/ + +/* Maximum number of levels in the tree*/ +#define TM_MAX_LEVELS 100 + +typedef struct { + int *arity; /* arity of the nodes of each level*/ + int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/ + size_t *nb_nodes; /*nb of nodes of each level*/ + int **node_id; /*ID of the nodes of the tree for each level*/ + int **node_rank ; /*rank of the nodes of the tree for each level given its ID: this is the inverse tab of node_id*/ + size_t *nb_free_nodes; /*nb of available nodes of each level*/ + int **free_nodes; /*tab of node that are free: useful to simulate batch scheduler*/ + double *cost; /*cost of the communication depending on the distance: + cost[i] is the cost for communicating at distance nb_levels-i*/ + int *constraints; /* array of constraints: id of the nodes where it is possible to map processes */ + int nb_constraints; /* Size of the above array */ + int oversub_fact; /* maximum number of processes to be mapped on a given node */ + int nb_proc_units; /* the real number of units used for computation */ +}tm_topology_t; + + +typedef struct { + double ** mat; + double * sum_row; + int order; +} tm_affinity_mat_t; + +/* + sigma_i is such that process i is mapped on core sigma_i + k_i is such that core i exectutes process k_i_j (0<=j<<=oversubscribing factor - 1) + + size of sigma is the number of processes (nb_objs) + size of k is the number of cores/nodes (nb_compute_units) + size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing) + + We must have numbe of process<=number of cores + + k[i] == NULL if no process is mapped on core i +*/ + +typedef struct { + int *sigma; + size_t sigma_length; + int **k; + size_t k_length; + int oversub_fact; +}tm_solution_t; + + +/************ TreeMatch Public API ************/ + +/* load XML or TGT topology */ +tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type); +/* + Alternatively, build a synthetic balanced topology. + + nb_levels : number of levels of the topology +1 (the last level must be of cost 0 and arity 0). + arity : array of arity of the first nb_level (of size nb_levels) + cost : array of costs between the levels (of size nb_levels) + core_numbering: numbering of the core by the system. Array of size nb_core_per_node + + nb_core_per_nodes: number of cores of a given node. Size of the array core_numbering + + both arity and cost are copied inside tm_build_synthetic_topology + + The numbering of the cores is done in round robin fashion after a width traversal of the topology. + for example: + {0,1,2,3} becomes 0,1,2,3,4,5,6,7... + and + {0,2,1,3} becomes 0,2,1,3,4,6,5,7,... + + Example of call to build the 128.tgt file: tleaf 4 16 500 2 100 2 50 2 10 + + double cost[5] = {500,100,50,10,0}; + int arity[5] = {16,2,2,2,0}; + int cn[5]={0,1}; + + topology = tm_build_synthetic_topology(arity,cost,5,cn,2); + + */ +tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes); +/* load affinity matrix */ +tm_affinity_mat_t *tm_load_aff_mat(char *com_filename); +/* + Alternativelly, build the affinity matrix from a array of array of matrix of size order by order + For performance reason mat is not copied. +*/ +tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order); +/* Add constraints to toplogy + Return 1 on success and 0 if the constari,ts id are not compatible withe nodes id */ +int tm_topology_add_binding_constraints(char *bind_filename, tm_topology_t *topology); +/* Alternatively, set the constraints from an array. + Return 1 on success and 0 if the constari,ts id are not compatible withe nodes id + + The array constraints is copied inside tm_topology_set_binding_constraints + +*/ +int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology); +/* display arity of the topology */ +void tm_display_arity(tm_topology_t *topology); +/* display the full topology */ +void tm_display_topology(tm_topology_t *topology); +/* Optimize the topology by decomposing arities */ +void tm_optimize_topology(tm_topology_t **topology); +/* Manage oversubscribing */ +void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact); +/* core of the treematch: compute the solution tree */ +tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed); +/* compute the mapping according to teh tree an dthe core numbering*/ +tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree); +/* display the solution*/ +double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric); +/* display RR, packed, MPIPP*/ +void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric); +/* free TM strutures*/ +void tm_free_topology(tm_topology_t *topology); +void tm_free_tree(tm_tree_t *comm_tree); +void tm_free_solution(tm_solution_t *sol); +void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat); +/* manage verbosity of TM*/ +void tm_set_verbose_level(unsigned int level); +unsigned int tm_get_verbose_level(void); +/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/ +void tm_finalize(); + +/* +Ask for exhaustive search: may be very long + new_val == 0 : no exhuative search + new_val != 0 : exhuative search +*/ +void tm_set_exhaustive_search_flag(int new_val); +int tm_get_exhaustive_search_flag(); + + +/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */ +void tm_set_max_nb_threads(unsigned int val); + + +#include "tm_malloc.h" + +#endif diff --git a/ompi/mca/topo/treematch/treematch/uthash.h b/ompi/mca/topo/treematch/treematch/uthash.h index 7b98cad5cc..3a3dd9a69a 100644 --- a/ompi/mca/topo/treematch/treematch/uthash.h +++ b/ompi/mca/topo/treematch/treematch/uthash.h @@ -22,7 +22,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef UTHASH_H -#define UTHASH_H +#define UTHASH_H #include /* memcmp,strlen */ #include /* ptrdiff_t */ @@ -49,7 +49,7 @@ do { char **_da_dst = (char**)(&(dst)); \ *_da_dst = (char*)(src); \ } while(0) -#else +#else #define DECLTYPE_ASSIGN(dst,src) \ do { \ (dst) = DECLTYPE(dst)(src); \ @@ -121,9 +121,9 @@ do { HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) #else -#define HASH_BLOOM_MAKE(tbl) -#define HASH_BLOOM_FREE(tbl) -#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) #define HASH_BLOOM_TEST(tbl,hashv) (1) #endif @@ -148,7 +148,7 @@ do { #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) - + #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ do { \ unsigned _ha_bkt; \ @@ -300,10 +300,10 @@ do { } \ } while (0) #else -#define HASH_FSCK(hh,head) +#define HASH_FSCK(hh,head) #endif -/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to * the descriptor to which this macro is defined for tuning the hash function. * The app can #include to get the prototype for write(2). */ #ifdef HASH_EMIT_KEYS @@ -313,12 +313,12 @@ do { write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ write(HASH_EMIT_KEYS, keyptr, fieldlen); \ } while (0) -#else -#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) #endif /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ -#ifdef HASH_FUNCTION +#ifdef HASH_FUNCTION #define HASH_FCN HASH_FUNCTION #else #define HASH_FCN HASH_JEN @@ -335,7 +335,7 @@ do { } while (0) -/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ do { \ @@ -356,7 +356,7 @@ do { hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ bkt = hashv & (num_bkts-1); \ } while(0); - + #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _ho_i; \ @@ -485,14 +485,14 @@ do { #ifdef HASH_USING_NO_STRICT_ALIASING /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. - * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. * * Note the preprocessor built-in defines can be emitted using: * * gcc -m64 -dM -E - < /dev/null (on gcc) * cc -## a.c (where a.c is a simple test file) (Sun Studio) */ -#if (defined(__i386__) || defined(__x86_64__)) +#if (defined(__i386__) || defined(__x86_64__)) #define MUR_GETBLOCK(p,i) p[i] #else /* non intel */ #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) @@ -562,7 +562,7 @@ do { \ #endif /* HASH_USING_NO_STRICT_ALIASING */ /* key comparison function; return 0 if keys equal */ -#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) /* iterate over items in a known bucket to find desired item */ #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ @@ -603,36 +603,36 @@ do { } \ if (hh_del->hh_next) { \ hh_del->hh_next->hh_prev = hh_del->hh_prev; \ - } + } /* Bucket expansion has the effect of doubling the number of buckets * and redistributing the items into the new buckets. Ideally the * items will distribute more or less evenly into the new buckets * (the extent to which this is true is a measure of the quality of - * the hash function as it applies to the key domain). - * + * the hash function as it applies to the key domain). + * * With the items distributed into more buckets, the chain length * (item count) in each bucket is reduced. Thus by expanding buckets - * the hash keeps a bound on the chain length. This bounded chain + * the hash keeps a bound on the chain length. This bounded chain * length is the essence of how a hash provides constant time lookup. - * + * * The calculation of tbl->ideal_chain_maxlen below deserves some * explanation. First, keep in mind that we're calculating the ideal * maximum chain length based on the *new* (doubled) bucket count. * In fractions this is just n/b (n=number of items,b=new num buckets). - * Since the ideal chain length is an integer, we want to calculate + * Since the ideal chain length is an integer, we want to calculate * ceil(n/b). We don't depend on floating point arithmetic in this * hash, so to calculate ceil(n/b) with integers we could write - * + * * ceil(n/b) = (n/b) + ((n%b)?1:0) - * + * * and in fact a previous version of this hash did just that. * But now we have improved things a bit by recognizing that b is * always a power of two. We keep its base 2 log handy (call it lb), * so now we can write this with a bit shift and logical AND: - * + * * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) - * + * */ #define HASH_EXPAND_BUCKETS(tbl) \ do { \ @@ -684,7 +684,7 @@ do { /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ -/* Note that HASH_SORT assumes the hash handle name to be hh. +/* Note that HASH_SORT assumes the hash handle name to be hh. * HASH_SRT was added to allow the hash handle name to be passed in. */ #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) #define HASH_SRT(hh,head,cmpfcn) \ @@ -766,10 +766,10 @@ do { } \ } while (0) -/* This function selects items from one hash into another hash. - * The end result is that the selected items have dual presence - * in both hashes. There is no copy of the items made; rather - * they are added into the new hash through a secondary hash +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash * hash handle that must be present in the structure. */ #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ do { \ @@ -823,7 +823,7 @@ do { #ifdef NO_DECLTYPE #define HASH_ITER(hh,head,el,tmp) \ for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ - el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) #else #define HASH_ITER(hh,head,el,tmp) \ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ @@ -831,7 +831,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); #endif /* obtain a count of items in the hash */ -#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_COUNT(head) HASH_CNT(hh,head) #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) typedef struct UT_hash_bucket { @@ -840,7 +840,7 @@ typedef struct UT_hash_bucket { /* expand_mult is normally set to 0. In this situation, the max chain length * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If - * the bucket's chain exceeds this length, bucket expansion is triggered). + * the bucket's chain exceeds this length, bucket expansion is triggered). * However, setting expand_mult to a non-zero value delays bucket expansion * (that would be triggered by additions to this particular bucket) * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. @@ -848,7 +848,7 @@ typedef struct UT_hash_bucket { * multiplier is to reduce bucket expansions, since they are expensive, in * situations where we know that a particular bucket tends to be overused. * It is better to let its chain length grow to a longer yet-still-bounded - * value, than to do an O(n) bucket expansion too often. + * value, than to do an O(n) bucket expansion too often. */ unsigned expand_mult; @@ -874,7 +874,7 @@ typedef struct UT_hash_table { * hash distribution; reaching them in a chain traversal takes >ideal steps */ unsigned nonideal_items; - /* ineffective expands occur when a bucket doubling was performed, but + /* ineffective expands occur when a bucket doubling was performed, but * afterward, more than half the items in the hash had nonideal chain * positions. If this happens on two consecutive expansions we inhibit any * further expansion, as it's not helping; this happens when the hash