First step of the integration with the new TreeMatch.
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
f784ce3459
Коммит
c6f73e8883
@ -199,7 +199,7 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
|
||||
/* display_bucket(bucket);*/
|
||||
if(verbose_level >= DEBUG){
|
||||
printf("Extending bucket %d (%p) from size %d to size %d!\n",
|
||||
id, (void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
|
||||
id,(void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
|
||||
}
|
||||
|
||||
bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
|
||||
@ -525,7 +525,7 @@ void partial_update_val (int nb_args, void **args, int thread_id){
|
||||
|
||||
if(nb_args != 5){
|
||||
if(verbose_level >= ERROR)
|
||||
fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __func__, nb_args);
|
||||
fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __FUNCTION__, nb_args);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -648,6 +648,7 @@ double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t
|
||||
wait_work_completion(works[id]);
|
||||
val+=tab_val[id];
|
||||
FREE(works[id]->args);
|
||||
destroy_work(works[id]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -6,6 +6,11 @@
|
||||
#include <stdio.h>
|
||||
#include "config.h"
|
||||
|
||||
#if HAVE_LIBSCOTCH
|
||||
#include <scotch.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define USE_KL_KPART 0
|
||||
#define KL_KPART_GREEDY_TRIALS 0
|
||||
|
||||
@ -33,6 +38,253 @@ void free_const_tab(constraint_t *,int);
|
||||
void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *,
|
||||
int *,int *,int,double *,double *);
|
||||
|
||||
static int greedy_flag = 0;
|
||||
|
||||
void tm_set_greedy_flag(int new_val){
|
||||
greedy_flag = new_val;
|
||||
}
|
||||
|
||||
int tm_get_greedy_flag(){
|
||||
return greedy_flag;
|
||||
}
|
||||
|
||||
|
||||
#if HAVE_LIBSCOTCH
|
||||
|
||||
SCOTCH_Graph* com_mat_to_scotch_graph(com_mat_t *com_mat, int n){
|
||||
double **mat = com_mat->comm;
|
||||
SCOTCH_Num vertnbr = n; // number of vertices
|
||||
SCOTCH_Num edgenbr = vertnbr*vertnbr; // number of edges
|
||||
/* adjacency list */
|
||||
SCOTCH_Num *verttab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (vertnbr+1));
|
||||
/* loads of vertices */
|
||||
/* SCOTCH_Num *velotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr); */
|
||||
/* id of the neighbors */
|
||||
SCOTCH_Num *edgetab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
|
||||
/* number of bytes exchanged */
|
||||
SCOTCH_Num *edlotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
|
||||
SCOTCH_Graph *graphptr = SCOTCH_graphAlloc();
|
||||
|
||||
int edgeNum = 0;
|
||||
int i,j;
|
||||
|
||||
/* Building with the communication matrix */
|
||||
for(i = 0; i < com_mat->n ; i++) {
|
||||
verttab[i] = edgeNum;
|
||||
for(j = 0; j < i; j++) {
|
||||
if(mat[i][j]){
|
||||
edgetab[edgeNum] = j;
|
||||
edlotab[edgeNum] = (SCOTCH_Num)mat[i][j];
|
||||
edgeNum++;
|
||||
}
|
||||
}
|
||||
/* ensure i!=j. Hence, avoid to test it...*/
|
||||
for(j = i+1 ; j < com_mat->n ; j++) {
|
||||
if(mat[i][j]){
|
||||
edgetab[edgeNum] = j;
|
||||
edlotab[edgeNum] = (SCOTCH_Num)mat[i][j];
|
||||
edgeNum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* for(i = baseval; i < com_mat->n ; i++) { */
|
||||
/* verttab[i] = edgeNum; */
|
||||
/* /\* velotab[i] = (SCOTCH_Num) ceil(ogr->vertices[i].getVertexLoad() * ratio); *\/ */
|
||||
/* for(j = baseval; j < com_mat->n ; j++) { */
|
||||
/* if((mat[i][j] || mat[j][i]) && (i!=j)){ */
|
||||
/* edgetab[edgeNum] = j; */
|
||||
/* edlotab[edgeNum] = (SCOTCH_Num) ((mat[i][j] + mat[j][i])/2); */
|
||||
/* edgeNum++; */
|
||||
/* } */
|
||||
/* } */
|
||||
/* } */
|
||||
|
||||
/* adding the dumb vertices: they have no neighbor*/
|
||||
for(i = com_mat->n ; i<vertnbr ; i++) {
|
||||
verttab[i] = edgeNum;
|
||||
}
|
||||
|
||||
verttab[i] = edgeNum;
|
||||
|
||||
if(tm_get_verbose_level() >=DEBUG){
|
||||
printf("Graph converted to Scotch format: edgeNum=%d, edgenbr = %lld, vertnbr = %lld\n",edgeNum, (long long int)edgenbr, (long long int)vertnbr);
|
||||
}
|
||||
|
||||
assert(edgeNum <= edgenbr);
|
||||
edgenbr = edgeNum;
|
||||
|
||||
SCOTCH_graphInit(graphptr);
|
||||
SCOTCH_graphBuild(graphptr, 0, vertnbr, verttab, verttab+1, NULL, NULL, edgenbr, edgetab, edlotab);
|
||||
|
||||
return graphptr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int check_partition(SCOTCH_Num *parttab, int k, int n){
|
||||
int *count = CALLOC(sizeof(int), k);
|
||||
int i;
|
||||
for(i=0; i<n; i++){
|
||||
count[parttab[i]]++;
|
||||
}
|
||||
|
||||
int target= n/k;
|
||||
|
||||
for(i = 0; i<k ; i++){
|
||||
if(count[i] != target){
|
||||
if(tm_get_verbose_level()>=INFO)
|
||||
fprintf(stdout, "Error in partition: %d vertices in partition %d while expecting %d vertices\n",count[i], i, target);
|
||||
FREE(count);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
FREE(count);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* n is the number of element in teh graoh with dumlb_vertices
|
||||
comm_mat->n is the nulber of processes (i.e. the size of teh graph without dumb veritcies*/
|
||||
int *kpartition_scotch(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints){
|
||||
SCOTCH_Num partnbr = (SCOTCH_Num) k;
|
||||
SCOTCH_Graph* graphptr;
|
||||
SCOTCH_Strat strat;
|
||||
SCOTCH_Num straval;
|
||||
SCOTCH_Num *parttab = (SCOTCH_Num *)MALLOC(sizeof(SCOTCH_Num) * n);
|
||||
int *partition = (int *)MALLOC(sizeof(int) * n);
|
||||
int i, j;
|
||||
int *nb_dumb = (int *)MALLOC(sizeof(int) * k); /*number of dumb vertices per partition */
|
||||
int dumb_id, min_nb_dumb = n, sum_dumb = 0, p;
|
||||
/* if(SCOTCH_graphCheck(graphptr) == 1){ */
|
||||
/* fprintf(stderr,"Bad scotch graph! Exiting program...\n"); */
|
||||
/* exit(-1); */
|
||||
/* } */
|
||||
|
||||
/* printf("Correct scotch graph (%d, %d)!\n", SCOTCH_numSizeof(), sizeof(SCOTCH_Num)); */
|
||||
|
||||
for(i=0;i<n;i++)
|
||||
parttab[i] = -1;
|
||||
|
||||
|
||||
/* put "dumb" vertices in the correct partition if there are any*/
|
||||
/*constraints are leaves that can be used */
|
||||
if (nb_constraints){
|
||||
int end, start = 0;
|
||||
for( i = 0 ; i < k ; i ++){
|
||||
int max_val = (i+1)* (n/k);
|
||||
end = start;
|
||||
while( end < nb_constraints){
|
||||
if(constraints[end] >= max_val)
|
||||
break;
|
||||
end++;
|
||||
}
|
||||
/* now end - start is the number of constraints for the ith subtree
|
||||
hence the number of dumb vertices in partition i is the differences between the
|
||||
number of leaves of the subtree (n/k) and the number of constraints
|
||||
*/
|
||||
nb_dumb[i] = n/k - (end-start);
|
||||
sum_dumb += nb_dumb[i];
|
||||
if(nb_dumb[i] < min_nb_dumb){
|
||||
min_nb_dumb = nb_dumb[i];
|
||||
}
|
||||
start=end;
|
||||
}
|
||||
|
||||
/* Imagine we have n=12, k=3, nb_dumb[0] = 3, nb_dumb[1] = 2, nb_dumb[2] = 3, hence min_nb_dumb = 2 and sum_dumb = 8
|
||||
So, we have 8 fix vertices and 12-8 = 4 free vertices
|
||||
We want scotch to allocate the 6 free vertices such that the whole partition is balanced (4 vertex in each) :
|
||||
1 in parttion 0, 2 in partition 1 and 1 in partition 2.
|
||||
To do so we can fill partab as follows:
|
||||
{-1, -1, -1, -1, 0, 0, 0, 1, 1, 2, 2, 2} and call scotch with a n=12 vertices graph with SCOTCH_STRATBALANCE
|
||||
dumb_id = n - sum_dumb;
|
||||
for(i = 0;i<k;i++){
|
||||
for( j = 0; j < nb_dumb[i]; j ++ ){
|
||||
parttab[dumb_id] = i;
|
||||
dumb_id++;
|
||||
}
|
||||
}
|
||||
|
||||
A more efficient solution is to fill partab as follows
|
||||
{-1, -1, -1, -1, 0, 2, 0, 0, 1, 1, 2, 2} and call Scotch with
|
||||
a p = 6 (n-sum_dumb+ sum_{i}(nb_dumb[i]-min_dumb) vertices graph.
|
||||
Scotch will then only use the 8 fist element of partab
|
||||
*/
|
||||
|
||||
dumb_id = n - sum_dumb; /* now dumb_id is the number of free vertices*/
|
||||
for(i = 0 ; i < k ; i++){
|
||||
for( j = 0; j < nb_dumb[i] - min_nb_dumb; j ++ ){
|
||||
parttab[dumb_id] = i;
|
||||
dumb_id++;
|
||||
}
|
||||
}
|
||||
p = dumb_id;
|
||||
for(i = 0 ; i < k ; i++){
|
||||
for( j = 0 ; j < min_nb_dumb ; j ++ ){
|
||||
parttab[dumb_id] = i;
|
||||
dumb_id++;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
p=n; /* if no constraint use n vertices */
|
||||
}
|
||||
|
||||
|
||||
graphptr = com_mat_to_scotch_graph(com_mat, p);
|
||||
|
||||
SCOTCH_stratInit (&strat);
|
||||
straval = SCOTCH_STRATBALANCE;
|
||||
if(k>4)
|
||||
straval = SCOTCH_STRATSPEED;
|
||||
SCOTCH_stratGraphMapBuild (&strat, straval, partnbr, 0);
|
||||
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG){
|
||||
printf("Before Scotch (p=%d, n=%d): \n", p, n);
|
||||
for(i = 0 ; i < n; i++){
|
||||
printf("%d ",(int)parttab[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if(SCOTCH_graphPartFixed(graphptr, partnbr, &strat, parttab) == 0){
|
||||
if(tm_get_verbose_level()>=DEBUG){
|
||||
printf("After Scotch: \n");
|
||||
for(i = 0 ; i < n; i++){
|
||||
printf("%d ",(int)parttab[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}else{
|
||||
if(tm_get_verbose_level()>=CRITICAL){
|
||||
fprintf(stderr,"Scotch Partitionning failed\n");
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if(!check_partition(parttab, partnbr, n)){
|
||||
if(tm_get_verbose_level()>=INFO){
|
||||
printf("falling from Scotch to greedy partionning\n");
|
||||
}
|
||||
FREE(partition);
|
||||
partition = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
}else{
|
||||
for(i=0;i<n;i++)
|
||||
partition[i] = parttab [i];
|
||||
}
|
||||
|
||||
SCOTCH_stratExit (&strat);
|
||||
SCOTCH_graphExit(graphptr);
|
||||
SCOTCH_memFree(graphptr);
|
||||
FREE(parttab);
|
||||
FREE(nb_dumb);
|
||||
|
||||
return partition;
|
||||
}
|
||||
|
||||
#endif /* HAVE_LIBSCOTCH */
|
||||
|
||||
|
||||
void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int max_size)
|
||||
@ -128,6 +380,7 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
|
||||
|
||||
|
||||
/* put "dumb" vertices in the correct partition if there are any*/
|
||||
/*constraints are leaves that can be used */
|
||||
if (nb_constraints){
|
||||
start = 0;
|
||||
dumb_id = n-1;
|
||||
@ -139,7 +392,7 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
|
||||
break;
|
||||
end++;
|
||||
}
|
||||
/* now end - start is the number of constarints for the ith subtree
|
||||
/* now end - start is the number of constraints for the ith subtree
|
||||
hence the number of dumb vertices is the differences between the
|
||||
number of leaves of the subtree (n/k) and the number of constraints
|
||||
*/
|
||||
@ -223,10 +476,18 @@ int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_const
|
||||
|
||||
|
||||
#if HAVE_LIBSCOTCH
|
||||
/*printf("Using Scotch\n");*/
|
||||
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
if(!greedy_flag){
|
||||
if(verbose_level >= DEBUG)
|
||||
printf("Using Scotch\n");
|
||||
res = kpartition_scotch(k, com_mat, n, constraints, nb_constraints);
|
||||
}else{
|
||||
if(verbose_level >= DEBUG)
|
||||
printf("Using greedy partitionning\n");
|
||||
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
}
|
||||
#else
|
||||
/*printf("Using default\n");*/
|
||||
if(verbose_level >= DEBUG)
|
||||
printf("Using greedy partitionning\n");
|
||||
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
#endif
|
||||
return res;
|
||||
@ -242,7 +503,7 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
|
||||
const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
|
||||
|
||||
/* nb_leaves is the number of leaves of the current subtree
|
||||
this will help to detremine where to split constraints and how to shift values
|
||||
this will help to determine where to split constraints and how to shift values
|
||||
*/
|
||||
nb_leaves = compute_nb_leaves_from_level( depth + 1, topology );
|
||||
|
||||
@ -251,8 +512,6 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
|
||||
*/
|
||||
start = 0;
|
||||
|
||||
|
||||
|
||||
for( i = 0; i < k; i++ ){
|
||||
/*returns the indice in constraints that contains the smallest value not copied
|
||||
end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
|
||||
@ -294,7 +553,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
|
||||
printf("Partition: "); print_1D_tab(partition,n);
|
||||
display_tab(com_mat->comm,com_mat->n);
|
||||
printf("m=%d,n=%d,k=%d\n",m,n,k);
|
||||
printf("perm=%p\n", (void*)perm);
|
||||
printf("perm=%p\n", (void *)perm);
|
||||
}
|
||||
|
||||
perm = (int*)MALLOC(sizeof(int)*m);
|
||||
@ -425,8 +684,8 @@ void free_const_tab(constraint_t *const_tab, int k)
|
||||
FREE(const_tab);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void check_com_mat(com_mat_t *com_mat){
|
||||
|
||||
void check_com_mat(com_mat_t *com_mat){
|
||||
int i,j;
|
||||
|
||||
for( i = 0 ; i < com_mat->n ; i++ )
|
||||
@ -435,8 +694,29 @@ static void check_com_mat(com_mat_t *com_mat){
|
||||
printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void print_tab(int n){
|
||||
for(;n;n--)
|
||||
fprintf(stdout,"\t");
|
||||
}
|
||||
|
||||
void display_partition(int *partition, int *local_vertices, int n, int depth, int k){
|
||||
int cur_part, j;
|
||||
print_tab(depth);fprintf(stdout,"Partitions at depth=%d\n",depth);
|
||||
for( cur_part = 0; cur_part < k ; cur_part ++){
|
||||
print_tab(depth); fprintf(stdout,"%d :",cur_part);
|
||||
for( j = 0; j < n; j ++){
|
||||
if ( partition[j] == cur_part ){
|
||||
if(local_vertices[j]!=-1)
|
||||
fprintf(stdout,"%d ",local_vertices[j]);
|
||||
}
|
||||
}
|
||||
fprintf(stdout,"\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
|
||||
tm_topology_t *topology, int *local_vertices,
|
||||
@ -471,6 +751,10 @@ void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, in
|
||||
/* partition the com_matrix in k partitions*/
|
||||
partition = kpartition(k, com_mat, N, constraints, nb_constraints);
|
||||
|
||||
if(verbose_level>=INFO)
|
||||
display_partition(partition, local_vertices, N, depth, k);
|
||||
|
||||
/* exit(-1); */
|
||||
/* split the communication matrix in k parts according to the partition just found above */
|
||||
tab_com_mat = split_com_mat( com_mat, N, k, partition);
|
||||
|
||||
@ -558,7 +842,7 @@ tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **
|
||||
the value of this array will be used to number the leaves of the tm_tree_t tree
|
||||
that start at "root"
|
||||
|
||||
min(N,nb_contraints) is used to takle the case where thre is less processes than constraints
|
||||
min(N,nb_contraints) is used to tackle the case where there is less processes than constraints
|
||||
|
||||
*/
|
||||
|
||||
|
@ -36,7 +36,7 @@ static void init_extra_data(void);
|
||||
|
||||
|
||||
|
||||
static char *my_strdup(char* string){
|
||||
char *my_strdup(char* string){
|
||||
int size = 1+strlen(string);
|
||||
char *res = (char*)malloc(size*sizeof(char));
|
||||
|
||||
@ -55,7 +55,7 @@ void save_ptr(void *ptr, size_t size, char *file, int line) {
|
||||
elem -> line = line;
|
||||
elem -> file = my_strdup(file);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("Storing (%p,%ld)\n",ptr,size);
|
||||
printf("Storing (%p,%ld)\n", (void *)ptr,size);
|
||||
HASH_ADD_PTR( size_hash, key, elem );
|
||||
}
|
||||
|
||||
@ -66,14 +66,14 @@ size_t retreive_size(void *someaddr){
|
||||
HASH_FIND_PTR(size_hash, &someaddr, elem);
|
||||
if(!elem){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot find ptr %p to free!\n",someaddr);
|
||||
fprintf(stderr,"Cannot find ptr %p to free!\n", (void *)someaddr);
|
||||
abort();
|
||||
return 0;
|
||||
}
|
||||
|
||||
res = elem->size;
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("Retreiving (%p,%ld)\n",someaddr, res);
|
||||
printf("Retreiving (%p,%ld)\n",(void *)someaddr, res);
|
||||
|
||||
free(elem->file);
|
||||
HASH_DEL( size_hash, elem);
|
||||
@ -86,7 +86,7 @@ void tm_mem_check(void){
|
||||
int nb_errors = 0;
|
||||
for(s=size_hash; s != NULL; s=s->hh.next) {
|
||||
if(tm_get_verbose_level()>=ERROR)
|
||||
printf("pointer %p of size %ld (%s: %d) has not been freed!\n", s->key, s->size, s->file, s->line);
|
||||
printf("pointer %p of size %ld (%s: %d) has not been freed!\n", (void *)s->key + EXTRA_BYTE, s->size, s->file, s->line);
|
||||
nb_errors ++;
|
||||
}
|
||||
|
||||
@ -119,7 +119,7 @@ void *tm_malloc(size_t size, char *file, int line){
|
||||
ptr = malloc(size);
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,(void*)ptr,file,line);
|
||||
printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE, (void *)ptr,file,line);
|
||||
|
||||
save_ptr(ptr, size, file, line);
|
||||
|
||||
@ -128,7 +128,7 @@ void *tm_malloc(size_t size, char *file, int line){
|
||||
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_malloc returning: %p\n",(void*)(ptr+EXTRA_BYTE));
|
||||
printf("tm_malloc returning: %p\n",(void *)(ptr+EXTRA_BYTE));
|
||||
|
||||
return (void *)(ptr + EXTRA_BYTE);
|
||||
}
|
||||
@ -147,14 +147,14 @@ void *tm_calloc(size_t count, size_t size, char *file, int line){
|
||||
save_ptr(ptr, full_size, file, line);
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void*)ptr, file, line);
|
||||
printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void *)ptr, file, line);
|
||||
|
||||
|
||||
memcpy(ptr, extra_data, EXTRA_BYTE);
|
||||
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_calloc returning: %p\n", (void*)(ptr+EXTRA_BYTE));
|
||||
printf("tm_calloc returning: %p\n",(void *)(ptr+EXTRA_BYTE));
|
||||
|
||||
return (void *)(ptr+EXTRA_BYTE);
|
||||
}
|
||||
@ -172,7 +172,7 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
|
||||
save_ptr(ptr, full_size, file, line);
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE, (void*)ptr, file, line);
|
||||
printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE, (void *)ptr, file, line);
|
||||
|
||||
|
||||
memcpy(ptr, extra_data, EXTRA_BYTE);
|
||||
@ -185,17 +185,17 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
|
||||
memcpy(ptr + EXTRA_BYTE, old_ptr, MIN(old_ptr_size - 2 * EXTRA_BYTE, size));
|
||||
|
||||
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", (void*)original_ptr);
|
||||
fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", (void *)original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if((bcmp(original_ptr + old_ptr_size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", (void*)original_ptr);
|
||||
fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", (void *)original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_free freeing: %p\n", (void*)original_ptr);
|
||||
printf("tm_free freeing: %p\n",(void *)original_ptr);
|
||||
|
||||
|
||||
free(original_ptr);
|
||||
@ -203,7 +203,7 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
|
||||
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_realloc returning: %p (----- %p)\n",(void*)(ptr+EXTRA_BYTE),(void*)(((byte *)ptr) - EXTRA_BYTE));
|
||||
printf("tm_realloc returning: %p (----- %p)\n", (void *)(ptr+EXTRA_BYTE), (void *)(ptr - EXTRA_BYTE));
|
||||
|
||||
|
||||
return (void *)(ptr+EXTRA_BYTE);
|
||||
@ -219,17 +219,17 @@ void tm_free(void *ptr){
|
||||
size = retreive_size(original_ptr);
|
||||
|
||||
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", (void*)original_ptr);
|
||||
fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", (void *)original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", (void*)original_ptr);
|
||||
fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", (void *)original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_free freeing: %p\n", (void*)original_ptr);
|
||||
printf("tm_free freeing: %p\n", (void *)original_ptr);
|
||||
|
||||
|
||||
free(original_ptr);
|
||||
|
@ -1,3 +1,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -18,6 +22,15 @@
|
||||
#include <winbase.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_LIBSCOTCH
|
||||
#include <scotch.h>
|
||||
#endif
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
|
||||
#define MIN(a,b) (a)<(b)?(a):(b)
|
||||
|
||||
#define TEST_ERROR(n) do{ \
|
||||
if( (n) != 0 ){ \
|
||||
fprintf(stderr,"Error %d Line %d\n",n,__LINE__); \
|
||||
@ -34,6 +47,8 @@ typedef struct {
|
||||
} hash2_t;
|
||||
|
||||
|
||||
static tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
|
||||
|
||||
/* compute the number of leaves of any subtree starting froma node of depth depth*/
|
||||
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
|
||||
{
|
||||
@ -45,15 +60,11 @@ int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
|
||||
return res;
|
||||
}
|
||||
|
||||
void tm_finalize(){
|
||||
void tm_finalize(void){
|
||||
terminate_thread_pool();
|
||||
tm_mem_check();
|
||||
}
|
||||
|
||||
int nb_processing_units(tm_topology_t *topology)
|
||||
{
|
||||
return topology->nb_proc_units;
|
||||
}
|
||||
|
||||
|
||||
void print_1D_tab(int *tab,int N)
|
||||
@ -89,14 +100,15 @@ int nb_lines(char *filename)
|
||||
return N;
|
||||
}
|
||||
|
||||
void init_mat(char *filename,int N, double **mat, double *sum_row)
|
||||
{
|
||||
|
||||
|
||||
long int init_mat(char *filename,int N, double **mat, double *sum_row){
|
||||
FILE *pf = NULL;
|
||||
char *ptr= NULL;
|
||||
char line[LINE_SIZE];
|
||||
int i,j;
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
|
||||
long int nnz = 0;
|
||||
|
||||
if(!(pf=fopen(filename,"r"))){
|
||||
if(vl >= CRITICAL)
|
||||
@ -107,7 +119,6 @@ void init_mat(char *filename,int N, double **mat, double *sum_row)
|
||||
j = -1;
|
||||
i = 0;
|
||||
|
||||
|
||||
while(fgets(line,LINE_SIZE,pf)){
|
||||
char *l = line;
|
||||
j = 0;
|
||||
@ -116,6 +127,7 @@ void init_mat(char *filename,int N, double **mat, double *sum_row)
|
||||
l = NULL;
|
||||
if((ptr[0]!='\n')&&(!isspace(ptr[0]))&&(*ptr)){
|
||||
mat[i][j] = atof(ptr);
|
||||
if(mat[i][j]) nnz++;
|
||||
sum_row[i] += mat [i][j];
|
||||
if(mat[i][j]<0){
|
||||
if(vl >= WARNING)
|
||||
@ -140,15 +152,124 @@ void init_mat(char *filename,int N, double **mat, double *sum_row)
|
||||
}
|
||||
|
||||
fclose (pf);
|
||||
return nnz;
|
||||
}
|
||||
|
||||
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order){
|
||||
|
||||
size_t get_filesize(char* filename) {
|
||||
struct stat st;
|
||||
stat(filename, &st);
|
||||
return st.st_size;
|
||||
}
|
||||
|
||||
|
||||
char *parse_line(int i, double **mat, double *sum_row, int N, char *data, char *filename, long int *nnz){
|
||||
/* now parse the buffer byte per byte for the current line i until we reach '\n'*/
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
long val;
|
||||
sum_row[i] = 0;
|
||||
int j = 0;
|
||||
while(*data != '\n'){
|
||||
while(*data ==' ' || *data == '\t')
|
||||
data++;
|
||||
if(*data != '\n'){
|
||||
val = 0;
|
||||
while(*data !=' ' && *data != '\t' && *data != '\n'){
|
||||
val = val*10 + *data-'0';
|
||||
data++;
|
||||
}
|
||||
mat[i][j] = val;
|
||||
/* printf("mat[%d][%d] = %ld\n",i,j, val); */
|
||||
if (val){
|
||||
(*nnz)++;
|
||||
sum_row[i] += val;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
if( j != N){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Error at %d %d (%d!=%d). Wrong number of columns line %d for file %s\n",i ,j ,j ,N ,i+1, filename);
|
||||
exit(-1);
|
||||
}
|
||||
data++;
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* buffered read with mmap of teh file */
|
||||
long int init_mat_mmap(char *filename,int N, double **mat, double *sum_row){
|
||||
int i;
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
size_t filesize = get_filesize(filename);
|
||||
int fd = open(filename, O_RDONLY, 0);
|
||||
long int nnz = 0;
|
||||
|
||||
if(fd == -1){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Cannot open %s\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
char* data = (char*) mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
|
||||
|
||||
if(data == MAP_FAILED){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Cannot mmap %s\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while(i<N){
|
||||
data = parse_line(i, mat, sum_row, N, data, filename, &nnz);
|
||||
i++;
|
||||
}
|
||||
|
||||
munmap(data, filesize);
|
||||
/* fprintf(stderr,"DONE!\n"); */
|
||||
close (fd);
|
||||
return nnz;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
long int init_mat_long(char *filename,int N, double **mat, double *sum_row){
|
||||
int i;
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
char line[LINE_SIZE];
|
||||
FILE *pf;
|
||||
long int nnz = 0;
|
||||
|
||||
if(!(pf=fopen(filename,"r"))){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Cannot open %s\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while(i<N){
|
||||
fgets(line,LINE_SIZE,pf);
|
||||
parse_line(i, mat, sum_row, N, line, filename, &nnz);
|
||||
i++;
|
||||
}
|
||||
|
||||
|
||||
/* fprintf(stderr,"DONE!\n"); */
|
||||
fclose (pf);
|
||||
return nnz;
|
||||
}
|
||||
|
||||
|
||||
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz){
|
||||
tm_affinity_mat_t * aff_mat;
|
||||
|
||||
aff_mat = (tm_affinity_mat_t *) MALLOC(sizeof(tm_affinity_mat_t));
|
||||
aff_mat -> mat = mat;
|
||||
aff_mat -> sum_row = sum_row;
|
||||
aff_mat -> order = order;
|
||||
aff_mat -> nnz = nnz;
|
||||
|
||||
return aff_mat;
|
||||
}
|
||||
@ -157,15 +278,20 @@ tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order){
|
||||
tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order){
|
||||
double *sum_row = NULL;
|
||||
int i,j;
|
||||
long int nnz = 0;
|
||||
sum_row = (double*)MALLOC(order*sizeof(double));
|
||||
|
||||
for( i = 0 ; i < order ; i++){
|
||||
sum_row[i] = 0;
|
||||
for(j = 0 ; j < order ; j++)
|
||||
sum_row[i] += mat [i][j];
|
||||
for(j = 0 ; j < order ; j++){
|
||||
if(mat[i][j]){
|
||||
nnz++;
|
||||
sum_row[i] += mat [i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new_affinity_mat(mat, sum_row, order);
|
||||
return new_affinity_mat(mat, sum_row, order, nnz);
|
||||
}
|
||||
|
||||
|
||||
@ -190,7 +316,8 @@ tm_affinity_mat_t *tm_load_aff_mat(char *filename)
|
||||
double **mat = NULL;
|
||||
double *sum_row = NULL;
|
||||
int i, order;
|
||||
|
||||
long int nnz;
|
||||
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
printf("Reading matrix file: %s\n",filename);
|
||||
|
||||
@ -201,13 +328,34 @@ tm_affinity_mat_t *tm_load_aff_mat(char *filename)
|
||||
for( i = 0 ; i < order ; i++)
|
||||
/* the last column stores the sum of the line*/
|
||||
mat[i] = (double*)MALLOC((order)*sizeof(double));
|
||||
init_mat(filename,order, mat, sum_row);
|
||||
/* on my mac parsing large file is better done with fopen than mmap */
|
||||
#ifdef __MACH__
|
||||
if (get_filesize(filename) > 1024*1024*1014) {
|
||||
nnz = init_mat_long(filename,order, mat, sum_row);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("New parser\n");
|
||||
}else{
|
||||
nnz = init_mat_mmap(filename,order, mat, sum_row);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("MMap parser\n");
|
||||
}
|
||||
#else
|
||||
nnz = init_mat_mmap(filename,order, mat, sum_row);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("MMap parser\n");
|
||||
#endif
|
||||
|
||||
/* TIC; */
|
||||
/* init_mat(filename,order, mat, sum_row); */
|
||||
/* double duration_fl = TOC; */
|
||||
/* printf("Old parser = %.3f\n",duration_fl); */
|
||||
|
||||
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
printf("Affinity matrix built from %s!\n",filename);
|
||||
|
||||
return new_affinity_mat(mat, sum_row, order);
|
||||
return new_affinity_mat(mat, sum_row, order, nnz);
|
||||
|
||||
|
||||
}
|
||||
@ -261,7 +409,7 @@ int nb_leaves(tm_tree_t *comm_tree)
|
||||
}
|
||||
|
||||
/* find the first '-1 in the array of size n and put the value there*/
|
||||
static void set_val(int *tab, int val, int n){
|
||||
void set_val(int *tab, int val, int n){
|
||||
int i = 0;
|
||||
|
||||
while (i < n ){
|
||||
@ -300,7 +448,7 @@ void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
|
||||
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
M = nb_leaves(comm_tree);
|
||||
nodes_id = topology->node_id[level];
|
||||
nodes_id = topology->node_id;
|
||||
N = topology->nb_nodes[level];
|
||||
|
||||
if(vl >= INFO){
|
||||
|
@ -5,13 +5,13 @@
|
||||
#include "tm_timings.h"
|
||||
#include "tm_verbose.h"
|
||||
|
||||
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order);
|
||||
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz);
|
||||
void build_synthetic_proc_id(tm_topology_t *topology);
|
||||
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
|
||||
void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i);
|
||||
int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift);
|
||||
void init_mat(char *filename,int N, double **mat, double *sum_row);
|
||||
long int init_mat(char *filename,int N, double **mat, double *sum_row);
|
||||
void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
|
||||
int *sigma, int nb_processes, int **k, int nb_compute_units);
|
||||
int nb_leaves(tm_tree_t *comm_tree);
|
||||
@ -19,7 +19,9 @@ int nb_lines(char *filename);
|
||||
int nb_processing_units(tm_topology_t *topology);
|
||||
void print_1D_tab(int *tab,int N);
|
||||
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
|
||||
void tm_finalize(void);
|
||||
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
/* load affinity matrix */
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
|
||||
void update_comm_speed(double **comm_speed,int old_size,int new_size);
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include <float.h>
|
||||
#include "tm_solution.h"
|
||||
#include "tm_mt.h"
|
||||
#include "tm_mapping.h"
|
||||
#include "tm_topology.h"
|
||||
|
||||
typedef struct {
|
||||
int val;
|
||||
@ -10,6 +10,27 @@ typedef struct {
|
||||
} hash_t;
|
||||
|
||||
|
||||
|
||||
void tm_free_solution(tm_solution_t *sol);
|
||||
int distance(tm_topology_t *topology,int i, int j);
|
||||
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma);
|
||||
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric);
|
||||
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
|
||||
tm_metric_t metric);
|
||||
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
|
||||
int in_tab(int *tab, int n, int val);
|
||||
void map_Packed(tm_topology_t *topology, int N, int *sigma);
|
||||
void map_RR(tm_topology_t * topology, int N, int *sigma);
|
||||
int hash_asc(const void* x1,const void* x2);
|
||||
int *generate_random_sol(tm_topology_t *topology,int N, int seed);
|
||||
double eval_sol(int *sol,int N,double **comm, double **arch);
|
||||
void exchange(int *sol,int i,int j);
|
||||
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
|
||||
void select_max(int *l,int *m,double **gain,int N,int *state);
|
||||
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch);
|
||||
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch);
|
||||
|
||||
|
||||
void tm_free_solution(tm_solution_t *sol){
|
||||
int i,n;
|
||||
|
||||
@ -41,8 +62,8 @@ int distance(tm_topology_t *topology,int i, int j)
|
||||
int vl = tm_get_verbose_level();
|
||||
int depth = topology->nb_levels-1;
|
||||
|
||||
f_i = topology->node_rank[depth][i];
|
||||
f_j = topology->node_rank[depth][j];
|
||||
f_i = topology->node_rank[i];
|
||||
f_j = topology->node_rank[j];
|
||||
|
||||
if(vl >= DEBUG)
|
||||
printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j);
|
||||
@ -58,7 +79,7 @@ int distance(tm_topology_t *topology,int i, int j)
|
||||
} while((f_i!=f_j) && (level < depth));
|
||||
|
||||
if(vl >= DEBUG)
|
||||
printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level);
|
||||
printf("distance(%d,%d):%d\n",topology->node_rank[i], topology->node_rank[j], level);
|
||||
/* exit(-1); */
|
||||
return level;
|
||||
}
|
||||
@ -85,7 +106,7 @@ double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
|
||||
a = cost[depth-distance(topology,sigma[i],sigma[j])];
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
|
||||
sol += c*a;
|
||||
sol += c*a;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
@ -99,7 +120,7 @@ double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
|
||||
}
|
||||
|
||||
|
||||
static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
{
|
||||
double a,c,sol;
|
||||
int i,j;
|
||||
@ -135,7 +156,7 @@ static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *af
|
||||
return sol;
|
||||
}
|
||||
|
||||
static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
{
|
||||
double c,sol;
|
||||
int nb_hops;
|
||||
@ -150,7 +171,7 @@ static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *a
|
||||
nb_hops = 2*distance(topology,sigma[i],sigma[j]);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops);
|
||||
sol += c*nb_hops;
|
||||
sol += c*nb_hops;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
@ -257,7 +278,7 @@ int in_tab(int *tab, int n, int val){
|
||||
if(tab[i] == val)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void map_Packed(tm_topology_t *topology, int N, int *sigma)
|
||||
@ -270,10 +291,10 @@ void map_Packed(tm_topology_t *topology, int N, int *sigma)
|
||||
|
||||
for( i = 0 ; i < topology->nb_nodes[depth] ; i++){
|
||||
/* printf ("%d -> %d\n",objs[i]->os_index,i); */
|
||||
if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){
|
||||
if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[i]))){
|
||||
if(vl >= DEBUG)
|
||||
printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]);
|
||||
sigma[j++]=topology->node_id[depth][i];
|
||||
printf ("%lu: %d -> %d\n", i, j, topology->node_id[i]);
|
||||
sigma[j++]=topology->node_id[i];
|
||||
if(j == N)
|
||||
break;
|
||||
}
|
||||
@ -306,14 +327,14 @@ int hash_asc(const void* x1,const void* x2)
|
||||
}
|
||||
|
||||
|
||||
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed)
|
||||
int *generate_random_sol(tm_topology_t *topology,int N, int seed)
|
||||
{
|
||||
hash_t *hash_tab = NULL;
|
||||
int *sol = NULL;
|
||||
int *nodes_id= NULL;
|
||||
int i;
|
||||
|
||||
nodes_id = topology->node_id[level];
|
||||
nodes_id = topology->node_id;
|
||||
|
||||
hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N);
|
||||
sol = (int*)MALLOC(sizeof(int)*N);
|
||||
@ -428,7 +449,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **com
|
||||
state = (int*)MALLOC(sizeof(int)*N);
|
||||
temp = (double*)MALLOC(sizeof(double)*N);
|
||||
|
||||
sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++);
|
||||
sol = generate_random_sol(topology, N, seed++);
|
||||
for( i = 0 ; i < N ; i++)
|
||||
sigma[i] = sol[i];
|
||||
|
||||
@ -488,7 +509,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **com
|
||||
}
|
||||
}while( max > 0 );
|
||||
FREE(sol);
|
||||
sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
|
||||
sol=generate_random_sol(topology, N, seed++);
|
||||
}
|
||||
|
||||
|
||||
|
@ -14,7 +14,7 @@ int in_tab(int *tab, int n, int val);
|
||||
void map_Packed(tm_topology_t *topology, int N, int *sigma);
|
||||
void map_RR(tm_topology_t *topology, int N, int *sigma);
|
||||
int hash_asc(const void* x1,const void* x2);
|
||||
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed);
|
||||
int *generate_random_sol(tm_topology_t *topology,int N, int seed);
|
||||
double eval_sol(int *sol,int N,double **comm, double **arch);
|
||||
void exchange(int *sol,int i,int j);
|
||||
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
|
||||
|
@ -23,7 +23,6 @@ static thread_pool_t *create_threads(void);
|
||||
|
||||
static void f1 (int nb_args, void **args, int thread_id);
|
||||
static void f2 (int nb_args, void **args, int thread_id);
|
||||
static void destroy_work(work_t *work);
|
||||
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
|
@ -41,6 +41,7 @@ void wait_work_completion(work_t *work);
|
||||
void terminate_thread_pool(void);
|
||||
work_t *create_work(int nb_args, void **args, void (int, void **, int));
|
||||
int test_main(void);
|
||||
void destroy_work(work_t *work);
|
||||
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "tm_solution.h"
|
||||
|
||||
|
||||
tm_topology_t* get_local_topo_with_hwloc(void);
|
||||
tm_topology_t* tm_get_local_topo_with_hwloc(void);
|
||||
tm_topology_t* hwloc_to_tm(char *filename);
|
||||
int int_cmp_inc(const void* x1,const void* x2);
|
||||
void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
|
||||
@ -27,11 +27,25 @@ void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_node
|
||||
double ** topology_to_arch(hwloc_topology_t topology);
|
||||
void build_synthetic_proc_id(tm_topology_t *topology);
|
||||
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
tm_numbering_t tm_get_numbering(); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
|
||||
|
||||
#define LINE_SIZE (1000000)
|
||||
|
||||
|
||||
static tm_numbering_t numbering = TM_NUMBERING_LOGICAL;
|
||||
|
||||
void tm_set_numbering(tm_numbering_t new_val){
|
||||
numbering = new_val;
|
||||
}
|
||||
|
||||
tm_numbering_t tm_get_numbering(){
|
||||
return numbering;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* transform a tgt scotch file into a topology file*/
|
||||
tm_topology_t * tgt_to_tm(char *filename)
|
||||
{
|
||||
@ -101,6 +115,13 @@ tm_topology_t * tgt_to_tm(char *filename)
|
||||
return topology;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int nb_processing_units(tm_topology_t *topology)
|
||||
{
|
||||
return topology->nb_proc_units;
|
||||
}
|
||||
|
||||
int topo_nb_proc(hwloc_topology_t topology,int N)
|
||||
{
|
||||
hwloc_obj_t *objs = NULL;
|
||||
@ -115,7 +136,7 @@ int topo_nb_proc(hwloc_topology_t topology,int N)
|
||||
|
||||
|
||||
|
||||
static double link_cost(int depth)
|
||||
double link_cost(int depth)
|
||||
{
|
||||
/*
|
||||
Bertha values
|
||||
@ -184,6 +205,46 @@ int symetric(hwloc_topology_t topology)
|
||||
return 1;
|
||||
}
|
||||
|
||||
void build_process_tab_id(tm_topology_t *topology, hwloc_obj_t *objs, char* filename){
|
||||
unsigned int i,j;
|
||||
unsigned int nb_nodes = topology->nb_proc_units;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
/* Build process id tab */
|
||||
if(numbering == TM_NUMBERING_LOGICAL){
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
topology->node_id[i] = i;
|
||||
topology->node_rank[i] = i;
|
||||
}
|
||||
}else if(numbering == TM_NUMBERING_PHYSICAL){
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
if(objs[i]->os_index > nb_nodes){
|
||||
if(vl >= CRITICAL){
|
||||
fprintf(stderr, "Cannot use forced physical numbering!\n\tIndex of PU %d is %d and larger than number of nodes : %d\n",
|
||||
i, objs[i]->os_index, nb_nodes);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
for(j = 0; j < i; j++){
|
||||
if((unsigned int)topology->node_id[j] == objs[i]->os_index){
|
||||
if(vl >= CRITICAL){
|
||||
fprintf(stderr, "Cannot use forced physical numbering!\n\tDuplicated physical number of some PUs in %s.\n\tPU %d and PU %d have the same physical number: (os_index[%d] = %d) == (os_index[%d] = %d)\n", filename, j, i, j, objs[j]->os_index, i, objs[i]->os_index);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
topology->node_id[i] = objs[i]->os_index;
|
||||
topology->node_rank[objs[i]->os_index] = i;
|
||||
}
|
||||
}else{
|
||||
if(vl >= CRITICAL){
|
||||
fprintf(stderr, "Unknown numbering %d\n", (int)numbering);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
tm_topology_t* hwloc_to_tm(char *filename)
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
@ -193,43 +254,46 @@ tm_topology_t* hwloc_to_tm(char *filename)
|
||||
unsigned int nb_nodes;
|
||||
double *cost;
|
||||
int err, l;
|
||||
unsigned int i;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
/* Build the topology */
|
||||
hwloc_topology_init(&topology);
|
||||
err = hwloc_topology_set_xml(topology,filename);
|
||||
err = hwloc_topology_set_xml(topology, filename);
|
||||
if(err == -1){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
#if HWLOC_API_VERSION >= 0x00020000
|
||||
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
|
||||
#else /* HWLOC_API_VERSION >= 0x00020000 */
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
hwloc_topology_ignore_all_keep_structure(topology);
|
||||
#endif /* HWLOC_API_VERSION >= 0x00020000 */
|
||||
hwloc_topology_load(topology);
|
||||
#else
|
||||
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
|
||||
#endif
|
||||
|
||||
err = hwloc_topology_load(topology);
|
||||
if(err == -1){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Error: the content of the xml topology file %s is not compatible with the version installed on this machine.\nPlease use compatible versions to generate the file and to use it!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
||||
/* Test if symetric */
|
||||
if(!symetric(topology)){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"%s not symetric!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* work on depth */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
|
||||
|
||||
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
res->oversub_fact = 1;
|
||||
res->nb_constraints = 0;
|
||||
res->constraints = NULL;
|
||||
res->nb_levels = topodepth;
|
||||
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
|
||||
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
|
||||
@ -240,35 +304,24 @@ tm_topology_t* hwloc_to_tm(char *filename)
|
||||
for( depth = 0 ; depth < topodepth ; depth++ ){
|
||||
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
res->nb_nodes[depth] = nb_nodes;
|
||||
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
|
||||
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology, depth, NULL);
|
||||
hwloc_get_closest_objs(topology, objs[0], objs+1, nb_nodes-1);
|
||||
res->arity[depth] = objs[0]->arity;
|
||||
|
||||
if (depth == topodepth -1){
|
||||
res->nb_constraints = nb_nodes;
|
||||
res->nb_proc_units = nb_nodes;
|
||||
}
|
||||
|
||||
|
||||
if(vl >= DEBUG)
|
||||
printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]);
|
||||
|
||||
/* Build process id tab */
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
if(objs[i]->os_index > nb_nodes){
|
||||
if(vl >= CRITICAL){
|
||||
fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n",
|
||||
i, depth, objs[i]->os_index, nb_nodes);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
res->node_id[depth][i] = objs[i]->os_index;
|
||||
res->node_rank[depth][objs[i]->os_index] = i;
|
||||
/* if(depth==topodepth-1) */
|
||||
|
||||
if (depth == topodepth -1){
|
||||
res->nb_constraints = nb_nodes;
|
||||
res->nb_proc_units = nb_nodes;
|
||||
res->node_id = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
res->node_rank = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
build_process_tab_id(res, objs, filename);
|
||||
|
||||
}
|
||||
FREE(objs);
|
||||
|
||||
@ -292,21 +345,23 @@ tm_topology_t* hwloc_to_tm(char *filename)
|
||||
return res;
|
||||
}
|
||||
|
||||
tm_topology_t* get_local_topo_with_hwloc(void)
|
||||
tm_topology_t* tm_get_local_topology_with_hwloc(void)
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
tm_topology_t *res = NULL;
|
||||
hwloc_obj_t *objs = NULL;
|
||||
unsigned topodepth,depth;
|
||||
int nb_nodes,i;
|
||||
int nb_nodes;
|
||||
|
||||
/* Build the topology */
|
||||
hwloc_topology_init(&topology);
|
||||
#if HWLOC_API_VERSION >= 0x00020000
|
||||
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
|
||||
#else /* HWLOC_API_VERSION >= 0x00020000 */
|
||||
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
hwloc_topology_ignore_all_keep_structure(topology);
|
||||
#endif /* HWLOC_API_VERSION >= 0x00020000 */
|
||||
#else
|
||||
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
|
||||
#endif
|
||||
|
||||
hwloc_topology_load(topology);
|
||||
|
||||
/* Test if symetric */
|
||||
@ -323,17 +378,15 @@ tm_topology_t* get_local_topo_with_hwloc(void)
|
||||
res->nb_constraints = 0;
|
||||
res->constraints = NULL;
|
||||
res->nb_levels = topodepth;
|
||||
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
|
||||
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
res->oversub_fact = 1; //defaut
|
||||
res->cost = NULL;
|
||||
|
||||
/* Build TreeMatch topology */
|
||||
for( depth = 0 ; depth < topodepth ; depth++ ){
|
||||
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
res->nb_nodes[depth] = nb_nodes;
|
||||
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
|
||||
@ -342,15 +395,14 @@ tm_topology_t* get_local_topo_with_hwloc(void)
|
||||
|
||||
if (depth == topodepth -1){
|
||||
res->nb_constraints = nb_nodes;
|
||||
res->nb_proc_units = nb_nodes;
|
||||
}
|
||||
res->nb_proc_units = nb_nodes;
|
||||
res->node_id = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
res->node_rank = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
/* printf("%d:",res->arity[depth]); */
|
||||
|
||||
/* Build process id tab */
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
res->node_id[depth][i] = objs[i]->os_index;
|
||||
res->node_rank[depth][objs[i]->os_index] = i;
|
||||
/* if(depth==topodepth-1) */
|
||||
/* Build process id tab */
|
||||
|
||||
build_process_tab_id(res, objs, "Local node topology");
|
||||
}
|
||||
FREE(objs);
|
||||
}
|
||||
@ -367,15 +419,9 @@ tm_topology_t* get_local_topo_with_hwloc(void)
|
||||
|
||||
void tm_free_topology(tm_topology_t *topology)
|
||||
{
|
||||
int i;
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
FREE(topology->node_id[i]);
|
||||
FREE(topology->node_rank[i]);
|
||||
}
|
||||
|
||||
FREE(topology->constraints);
|
||||
FREE(topology->node_id);
|
||||
FREE(topology->node_rank);
|
||||
FREE(topology->constraints);
|
||||
FREE(topology->nb_nodes);
|
||||
FREE(topology->arity);
|
||||
FREE(topology->cost);
|
||||
@ -400,18 +446,15 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty
|
||||
void tm_display_topology(tm_topology_t *topology)
|
||||
{
|
||||
int i;
|
||||
unsigned int j;
|
||||
unsigned long id;
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
printf("%d: ",i);
|
||||
for( j = 0 ; j < topology->nb_nodes[i] ; j++)
|
||||
printf("%d ",topology->node_id[i][j]);
|
||||
printf("Level %d with arity %d ", i, topology->arity[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Last level: ");
|
||||
for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++)
|
||||
printf("%d ",topology->node_rank[topology->nb_levels-1][id]);
|
||||
printf("%d ",topology->node_rank[id]);
|
||||
printf("\n");
|
||||
|
||||
|
||||
@ -430,9 +473,13 @@ void tm_display_topology(tm_topology_t *topology)
|
||||
|
||||
void tm_display_arity(tm_topology_t *topology){
|
||||
int depth;
|
||||
for(depth=0; depth < topology->nb_levels; depth++)
|
||||
printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]);
|
||||
|
||||
for(depth=0; depth < topology->nb_levels; depth++){
|
||||
printf("%d",topology->arity[depth]);
|
||||
if(topology->cost)
|
||||
printf("(%lf)",topology->cost[depth]);
|
||||
else
|
||||
printf(":");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
@ -442,12 +489,12 @@ int int_cmp_inc(const void* x1,const void* x2)
|
||||
}
|
||||
|
||||
|
||||
static int topo_check_constraints(tm_topology_t *topology){
|
||||
int topo_check_constraints(tm_topology_t *topology){
|
||||
int n = topology->nb_constraints;
|
||||
int i;
|
||||
int depth = topology->nb_levels-1;
|
||||
for (i=0;i<n;i++){
|
||||
if(!in_tab(topology->node_id[depth], topology->nb_nodes[depth], topology->constraints[i])){
|
||||
if(!in_tab(topology->node_id, topology->nb_nodes[depth], topology->constraints[i])){
|
||||
if(tm_get_verbose_level() >= CRITICAL){
|
||||
fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]);
|
||||
}
|
||||
@ -462,7 +509,7 @@ static int topo_check_constraints(tm_topology_t *topology){
|
||||
|
||||
/* cpy flag tells if we need to copy the array.
|
||||
Set to 1 when called from the application level and 0 when called from inside the library*/
|
||||
static int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
|
||||
int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
|
||||
|
||||
topology -> nb_constraints = nb_constraints;
|
||||
if(cpy_flag){
|
||||
@ -548,7 +595,7 @@ void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_node
|
||||
if(vl >= INFO)
|
||||
printf("nb_nodes=%d\n",*nb_nodes);
|
||||
*numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes));
|
||||
memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes));
|
||||
memcpy(*numbering,topology->node_id,sizeof(int)*(*nb_nodes));
|
||||
}
|
||||
|
||||
void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels)
|
||||
@ -701,7 +748,7 @@ void tm_optimize_topology(tm_topology_t **topology){
|
||||
FREE(arity);
|
||||
FREE(numbering);
|
||||
tm_free_topology(*topology);
|
||||
|
||||
|
||||
*topology = new_topo;
|
||||
/* exit(-1); */
|
||||
|
||||
@ -738,8 +785,6 @@ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_lev
|
||||
topology->constraints = NULL;
|
||||
topology->nb_levels = nb_levels;
|
||||
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
|
||||
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels);
|
||||
if(cost)
|
||||
topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
|
||||
@ -753,27 +798,17 @@ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_lev
|
||||
n = 1;
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
topology->nb_nodes[i] = n;
|
||||
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
if( i < topology->nb_levels-1){
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
topology->node_id[i][j] = j;
|
||||
topology->node_rank[i][j]=j;
|
||||
}
|
||||
}else{
|
||||
if (i == topology->nb_levels-1){
|
||||
topology->node_id = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->node_rank = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->nb_constraints = n;
|
||||
topology->nb_proc_units = n;
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
|
||||
topology->node_id[i][j] = id;
|
||||
topology->node_rank[i][id] = j;
|
||||
topology->node_id[j] = id;
|
||||
topology->node_rank[id] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (i == topology->nb_levels-1){
|
||||
topology->nb_constraints = n;
|
||||
topology->nb_proc_units = n;
|
||||
}
|
||||
|
||||
n *= topology->arity[i];
|
||||
}
|
||||
if(cost){
|
||||
@ -791,32 +826,30 @@ void build_synthetic_proc_id(tm_topology_t *topology)
|
||||
int i;
|
||||
size_t j,n = 1;
|
||||
|
||||
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels);
|
||||
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
/* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */
|
||||
topology->nb_nodes[i] = n;
|
||||
topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n);
|
||||
topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n);
|
||||
if ( !topology->node_id[i] ){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
||||
if (i == topology->nb_levels-1){
|
||||
topology->node_rank = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->node_id = (int*)MALLOC(sizeof(int)*n);
|
||||
if ( !topology->node_id ){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot allocate last level (of size %ld) of the topology\n", (unsigned long int)n);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
topology->nb_constraints = n;
|
||||
topology->nb_proc_units = n;
|
||||
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
topology->node_id[j] = j;
|
||||
topology->node_rank[j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
topology->node_id[i][j] = j;
|
||||
topology->node_rank[i][j] = j;
|
||||
}
|
||||
n *= topology->arity[i];
|
||||
}
|
||||
|
||||
@ -827,6 +860,7 @@ void build_synthetic_proc_id(tm_topology_t *topology)
|
||||
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){
|
||||
{
|
||||
int i,j,n;
|
||||
int *node_id, *node_rank;
|
||||
|
||||
if(oversub_fact <=1)
|
||||
return;
|
||||
@ -834,8 +868,6 @@ void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fac
|
||||
topology -> nb_levels ++;
|
||||
topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels);
|
||||
topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels);
|
||||
topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels);
|
||||
topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels);
|
||||
topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels);
|
||||
topology -> oversub_fact = oversub_fact;
|
||||
|
||||
@ -843,15 +875,19 @@ void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fac
|
||||
n = topology->nb_nodes[i-1] * oversub_fact;
|
||||
topology->arity[i-1] = oversub_fact;
|
||||
topology->cost[i-1] = 0;
|
||||
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
node_id = (int*)MALLOC(sizeof(int)*n);
|
||||
node_rank = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->nb_nodes[i] = n;
|
||||
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
int id = topology->node_id[i-1][j/oversub_fact];
|
||||
topology->node_id[i][j] = id;
|
||||
topology->node_rank[i][id] = j;
|
||||
int id = topology->node_id[j/oversub_fact];
|
||||
node_id[j] = id;
|
||||
node_rank[id] = j;
|
||||
}
|
||||
FREE(topology->node_id);
|
||||
FREE(topology->node_rank);
|
||||
topology->node_id = node_id;
|
||||
topology->node_rank = node_rank;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,4 +19,5 @@ void topology_constraints(tm_topology_t *topology,int **constraints,int *nb_cons
|
||||
void topology_cost(tm_topology_t *topology,double **cost);
|
||||
void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes);
|
||||
double ** topology_to_arch(hwloc_topology_t topology);
|
||||
int nb_processing_units(tm_topology_t *topology);
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "treematch.h"
|
||||
#include "tm_tree.h"
|
||||
#include "tm_mapping.h"
|
||||
#include "tm_timings.h"
|
||||
@ -88,6 +87,17 @@ int int_cmp_inc(const void* x1, const void* x2);
|
||||
|
||||
|
||||
|
||||
double choose (long n, long k)
|
||||
{
|
||||
/* compute C_n_k */
|
||||
double res = 1;
|
||||
int i;
|
||||
|
||||
for( i = 0 ; i < k ; i++ ){
|
||||
res *= ((double)(n-i)/(double)(k-i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void tm_set_exhaustive_search_flag(int new_val){
|
||||
@ -105,8 +115,6 @@ void free_affinity_mat(tm_affinity_mat_t *aff_mat){
|
||||
FREE(aff_mat);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void free_list_child(tm_tree_t *tree)
|
||||
{
|
||||
int i;
|
||||
@ -116,13 +124,14 @@ void free_list_child(tm_tree_t *tree)
|
||||
free_list_child(tree->child[i]);
|
||||
|
||||
FREE(tree->child);
|
||||
if(tree->dumb)
|
||||
if(tree->dumb) /*in dumb subtrees internal nodes have been allocated individually, they need to bee freed one by one*/
|
||||
FREE(tree);
|
||||
}
|
||||
}
|
||||
void free_tab_child(tm_tree_t *tree)
|
||||
{
|
||||
if(tree){
|
||||
/*in a non constaint tree internal node are allocated in an array an stored ib tab_child : they are freed globaly here */
|
||||
free_tab_child(tree->tab_child);
|
||||
FREE(tree->tab_child);
|
||||
}
|
||||
@ -130,20 +139,26 @@ void free_tab_child(tm_tree_t *tree)
|
||||
|
||||
void free_non_constraint_tree(tm_tree_t *tree)
|
||||
{
|
||||
int d = tree->dumb;
|
||||
if(tree->dumb){
|
||||
if(tm_get_verbose_level() <= CRITICAL){
|
||||
fprintf(stderr,"Error trying to free a dumb tree!\n. This should never be done like this: the root of a non-constraint tree cannot be a dumb one!\n");
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
free_tab_child(tree);
|
||||
free_list_child(tree);
|
||||
if(!d)
|
||||
FREE(tree);
|
||||
free_list_child(tree); /* free the tree->child array recursively and the nodes in dumb subtree*/
|
||||
free_tab_child(tree); /* free the tree->tab_child array that correspond of all the child nodes of a given node in non dumb subtrees */
|
||||
FREE(tree);
|
||||
}
|
||||
|
||||
void free_constraint_tree(tm_tree_t *tree)
|
||||
{
|
||||
int i;
|
||||
|
||||
if(tree){
|
||||
for(i=0;i<tree->arity;i++)
|
||||
free_constraint_tree(tree->child[i]);
|
||||
/* tab_child field is NULL for all nodes in the constraint tree*/
|
||||
FREE(tree->child);
|
||||
FREE(tree);
|
||||
}
|
||||
@ -155,20 +170,9 @@ void tm_free_tree(tm_tree_t *tree)
|
||||
if(tree->constraint)
|
||||
free_constraint_tree(tree);
|
||||
else
|
||||
free_non_constraint_tree(tree);
|
||||
free_non_constraint_tree(tree); /* tab_child field is NULL for all nodes in the tree*/
|
||||
}
|
||||
|
||||
double choose (long n, long k)
|
||||
{
|
||||
/* compute C_n_k */
|
||||
double res = 1;
|
||||
int i;
|
||||
|
||||
for( i = 0 ; i < k ; i++ ){
|
||||
res *= ((double)(n-i)/(double)(k-i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent,
|
||||
int id, double val, tm_tree_t *tab_child, int depth)
|
||||
@ -239,13 +243,14 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
|
||||
int M = *(int*)args[4];
|
||||
double **mat = (double**)args[5];
|
||||
double *sum_row = (double*)args[6];
|
||||
long int *nnz = (long int *)args[7];
|
||||
int i, j, i1, j1;
|
||||
int id1, id2;
|
||||
|
||||
|
||||
if(nb_args != 7){
|
||||
if(nb_args != 8){
|
||||
if(verbose_level >= ERROR)
|
||||
fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __func__, nb_args);
|
||||
fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __FUNCTION__, nb_args);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -262,6 +267,9 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
|
||||
mat[i][j] += old_mat[id1][id2];
|
||||
/* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/
|
||||
}
|
||||
}
|
||||
if(mat[i][j]){
|
||||
(*nnz)++;
|
||||
sum_row[i] += mat[i][j];
|
||||
}
|
||||
}
|
||||
@ -269,12 +277,13 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
|
||||
}
|
||||
|
||||
|
||||
static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
|
||||
tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
|
||||
{
|
||||
int i, j, i1, j1, id1, id2;
|
||||
double **new_mat = NULL, **old_mat = aff_mat->mat;
|
||||
double *sum_row = NULL;
|
||||
|
||||
long int nnz = 0;
|
||||
|
||||
new_mat = (double**)MALLOC(M*sizeof(double*));
|
||||
for( i = 0 ; i < M ; i++ )
|
||||
new_mat[i] = (double*)CALLOC((M), sizeof(double));
|
||||
@ -287,16 +296,19 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
|
||||
work_t **works;
|
||||
int *inf;
|
||||
int *sup;
|
||||
long int *nnz_tab;
|
||||
|
||||
nb_threads = MIN(M/512, get_nb_threads());
|
||||
works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
|
||||
inf = (int*)MALLOC(sizeof(int)*nb_threads);
|
||||
sup = (int*)MALLOC(sizeof(int)*nb_threads);
|
||||
nnz_tab = (long int*)MALLOC(sizeof(long int)*nb_threads);
|
||||
for(id=0;id<nb_threads;id++){
|
||||
void **args=(void**)MALLOC(sizeof(void*)*7);
|
||||
void **args=(void**)MALLOC(sizeof(void*)*8);
|
||||
inf[id]=id*M/nb_threads;
|
||||
sup[id]=(id+1)*M/nb_threads;
|
||||
if(id == nb_threads-1) sup[id]=M;
|
||||
nnz_tab[id] = 0;
|
||||
args[0]=(void*)(inf+id);
|
||||
args[1]=(void*)(sup+id);
|
||||
args[2]=(void*)old_mat;
|
||||
@ -304,8 +316,9 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
|
||||
args[4]=&M;
|
||||
args[5]=(void*)new_mat;
|
||||
args[6]=(void*)sum_row;
|
||||
args[7]=(void*)(nnz_tab+id);
|
||||
|
||||
works[id]= create_work(7, args, partial_aggregate_aff_mat);
|
||||
works[id]= create_work(8, args, partial_aggregate_aff_mat);
|
||||
if(verbose_level >= DEBUG)
|
||||
printf("Executing %p\n", (void *)works[id]);
|
||||
|
||||
@ -315,13 +328,16 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
|
||||
for(id=0;id<nb_threads;id++){
|
||||
wait_work_completion(works[id]);
|
||||
FREE(works[id]->args);
|
||||
nnz += nnz_tab[id];
|
||||
destroy_work(works[id]);
|
||||
}
|
||||
|
||||
|
||||
FREE(inf);
|
||||
FREE(sup);
|
||||
FREE(works);
|
||||
FREE(nnz_tab);
|
||||
|
||||
|
||||
}else{
|
||||
for( i = 0 ; i < M ; i++ )
|
||||
for( j = 0 ; j < M ; j++ ){
|
||||
@ -333,12 +349,16 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
|
||||
new_mat[i][j] += old_mat[id1][id2];
|
||||
/* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/
|
||||
}
|
||||
}
|
||||
if(new_mat[i][j]){
|
||||
nnz ++;
|
||||
sum_row[i] += new_mat[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new_affinity_mat(new_mat, sum_row, M);
|
||||
|
||||
return new_affinity_mat(new_mat, sum_row, M, nnz);
|
||||
}
|
||||
|
||||
void free_tab_double(double**tab, int mat_order)
|
||||
@ -703,7 +723,7 @@ int select_independent_groups(group_list_t **tab_group, int n, int arity, int M
|
||||
}
|
||||
|
||||
|
||||
static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
|
||||
int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
|
||||
int i, j, ii, jj;
|
||||
int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n);
|
||||
|
||||
@ -731,7 +751,7 @@ static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int
|
||||
return indep_mat;
|
||||
}
|
||||
|
||||
static int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
|
||||
int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
|
||||
{
|
||||
int i;
|
||||
int id_elem = elem->id;
|
||||
@ -754,7 +774,7 @@ static int independent_groups_mat(group_list_t **selection, int selection_size,
|
||||
static long int y=0;
|
||||
|
||||
|
||||
static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
|
||||
int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
|
||||
double val, double *best_val, group_list_t **selection, group_list_t **best_selection,
|
||||
int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){
|
||||
|
||||
@ -842,8 +862,8 @@ static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, in
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static group_list_t * group_dup(group_list_t *group, int nb_groups){
|
||||
|
||||
group_list_t * group_dup(group_list_t *group, int nb_groups){
|
||||
group_list_t *elem = NULL;
|
||||
/* tm_tree_t **tab = NULL; */
|
||||
double *bound;
|
||||
@ -867,10 +887,8 @@ static group_list_t * group_dup(group_list_t *group, int nb_groups){
|
||||
return elem;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
|
||||
group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
|
||||
group_list_t **res;
|
||||
int i;
|
||||
|
||||
@ -884,10 +902,8 @@ static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
|
||||
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static int8_t **indep_mat_dup(int8_t** mat, int n){
|
||||
int8_t **indep_mat_dup(int8_t** mat, int n){
|
||||
int i;
|
||||
int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n);
|
||||
int row_len;
|
||||
@ -900,9 +916,9 @@ static int8_t **indep_mat_dup(int8_t** mat, int n){
|
||||
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
|
||||
|
||||
void partial_exhaustive_search(int nb_args, void **args, int thread_id){
|
||||
int i, j;
|
||||
group_list_t **selection = NULL;
|
||||
double val;
|
||||
@ -918,7 +934,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
|
||||
work_unit_t *work = (work_unit_t *) args[7];
|
||||
pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
|
||||
int *tab_i;
|
||||
int id = 0, id1, id2;
|
||||
int id = -1, id1, id2;
|
||||
int total_work = work->nb_work;
|
||||
int cur_work = 0;
|
||||
|
||||
@ -926,7 +942,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
|
||||
|
||||
if(nb_args!=9){
|
||||
if(verbose_level>=ERROR){
|
||||
fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __func__, nb_args);
|
||||
fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __FUNCTION__, nb_args);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1009,20 +1025,19 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
static int dbl_cmp_dec(const void* x1,const void* x2)
|
||||
|
||||
int dbl_cmp_dec(const void* x1,const void* x2)
|
||||
{
|
||||
return *((double *)x1) > *((double *)x2) ? -1 : 1;
|
||||
}
|
||||
#endif
|
||||
static int dbl_cmp_inc(const void* x1,const void* x2)
|
||||
int dbl_cmp_inc(const void* x1,const void* x2)
|
||||
{
|
||||
return *((double *)x1) < *((double *)x2) ? -1 : 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static double *build_bound_array(double *tab, int n){
|
||||
double *build_bound_array(double *tab, int n){
|
||||
int i;
|
||||
double *bound;
|
||||
|
||||
@ -1051,7 +1066,7 @@ static double *build_bound_array(double *tab, int n){
|
||||
return bound;
|
||||
}
|
||||
|
||||
static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
|
||||
work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
|
||||
work_unit_t *res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
|
||||
int *tab_group = MALLOC(size*sizeof(int));
|
||||
memcpy(tab_group, tab, size*sizeof(int));
|
||||
@ -1062,7 +1077,7 @@ static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
|
||||
return res;
|
||||
}
|
||||
|
||||
static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){
|
||||
work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){
|
||||
|
||||
tab_group[i] = id;
|
||||
if(i==size-1){
|
||||
@ -1082,7 +1097,7 @@ static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *t
|
||||
}
|
||||
|
||||
|
||||
static work_unit_t *create_tab_work(int n){
|
||||
work_unit_t *create_tab_work(int n){
|
||||
int work_size = 4;
|
||||
int i;
|
||||
work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
|
||||
@ -1106,7 +1121,7 @@ static work_unit_t *create_tab_work(int n){
|
||||
}
|
||||
|
||||
|
||||
static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
|
||||
int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
|
||||
group_list_t **best_selection){
|
||||
|
||||
pthread_mutex_t lock;
|
||||
@ -1181,6 +1196,7 @@ static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int
|
||||
for(id=0;id<nb_threads;id++){
|
||||
wait_work_completion(works[id]);
|
||||
FREE(works[id]->args);
|
||||
destroy_work(works[id]);
|
||||
}
|
||||
|
||||
exit(-1);
|
||||
@ -1209,8 +1225,8 @@ static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
|
||||
|
||||
int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
|
||||
{
|
||||
group_list_t *elem = NULL;
|
||||
|
||||
@ -1249,10 +1265,10 @@ static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int ar
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
|
||||
|
||||
|
||||
int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
|
||||
{
|
||||
group_list_t *elem = NULL;
|
||||
|
||||
@ -1302,10 +1318,10 @@ static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity,
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
|
||||
|
||||
|
||||
int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
|
||||
group_list_t **best_selection)
|
||||
{
|
||||
int i, j;
|
||||
@ -1365,7 +1381,7 @@ static int exhaustive_search(group_list_t **tab_group, int n, int arity, int so
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
int select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration)
|
||||
@ -1566,7 +1582,7 @@ double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t
|
||||
return val;
|
||||
}
|
||||
|
||||
static double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
|
||||
double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
|
||||
int *partition = NULL;
|
||||
int n = aff_mat->order;
|
||||
com_mat_t com_mat;
|
||||
@ -1695,7 +1711,8 @@ tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_wei
|
||||
double **old_mat;
|
||||
double avg;
|
||||
int i, j, mat_order;
|
||||
|
||||
long int nnz = 0;
|
||||
|
||||
if(!obj_weight)
|
||||
return aff_mat;
|
||||
|
||||
@ -1727,8 +1744,9 @@ tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_wei
|
||||
mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
|
||||
sum_row[i] += mat[i][j];
|
||||
}
|
||||
if(mat[i][j]) nnz++;
|
||||
}
|
||||
return new_affinity_mat(mat, sum_row, mat_order);
|
||||
return new_affinity_mat(mat, sum_row, mat_order,nnz);
|
||||
|
||||
}
|
||||
|
||||
@ -1952,7 +1970,7 @@ void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K)
|
||||
sum_row[i] = (*aff_mat)->sum_row[i];
|
||||
}
|
||||
|
||||
*aff_mat = new_affinity_mat(new_mat, sum_row, M);
|
||||
*aff_mat = new_affinity_mat(new_mat, sum_row, M, (*aff_mat)->nnz);
|
||||
}
|
||||
|
||||
void complete_obj_weight(double **tab, int mat_order, int K)
|
||||
@ -2001,7 +2019,9 @@ void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology)
|
||||
list_child[i]->dumb = 1;
|
||||
}
|
||||
|
||||
set_node(node, list_child, arity, NULL, -1, 0, list_child[0], depth);
|
||||
/* list_child => node->child ; list_child[0] => node->tab_child */
|
||||
/* printf("list_child[0] = %p\n",list_child[0]); */
|
||||
set_node(node, list_child, arity, NULL, -1, 0, NULL, depth);
|
||||
}
|
||||
void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology)
|
||||
{
|
||||
@ -2080,6 +2100,8 @@ tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat,
|
||||
TIC;
|
||||
K = arity*((mat_order/arity)+1)-mat_order;
|
||||
/*printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K); */
|
||||
if(verbose_level >= INFO)
|
||||
printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K);
|
||||
/*display_tab(tab, mat_order);*/
|
||||
/* add K rows and columns to comm_matrix*/
|
||||
complete_aff_mat(&aff_mat, mat_order, K);
|
||||
@ -2106,8 +2128,8 @@ tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat,
|
||||
for( i = 0 ; i < M ; i++ ){
|
||||
tm_tree_t **list_child = NULL;
|
||||
list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
|
||||
set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth);
|
||||
}
|
||||
set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth);
|
||||
}
|
||||
duration = TOC;
|
||||
if(verbose_level >= INFO)
|
||||
printf("New nodes creation= %fs\n ", duration);
|
||||
@ -2224,7 +2246,7 @@ int check_constraints(tm_topology_t *topology, int **constraints)
|
||||
In order to have all the ranks of a given id we need to shift them as follows:
|
||||
*/
|
||||
shift = 1 + i%topology->oversub_fact - topology->oversub_fact;
|
||||
(*constraints)[i] = topology->node_rank[topology->nb_levels-1][topology->constraints[i/topology->oversub_fact]] +shift;
|
||||
(*constraints)[i] = topology->node_rank[topology->constraints[i/topology->oversub_fact]] +shift;
|
||||
if((*constraints)[i] < last)
|
||||
sorted = 0;
|
||||
last = (*constraints)[i];
|
||||
|
@ -21,6 +21,11 @@ typedef enum{
|
||||
TM_METRIC_HOP_BYTE = 3
|
||||
} tm_metric_t;
|
||||
|
||||
/* numbering */
|
||||
typedef enum{
|
||||
TM_NUMBERING_LOGICAL = 0,
|
||||
TM_NUMBERING_PHYSICAL = 1
|
||||
} tm_numbering_t;
|
||||
|
||||
/********* TreeMatch Public Structures **********/
|
||||
|
||||
@ -30,39 +35,40 @@ typedef struct _job_info_t{
|
||||
int finish_date;
|
||||
} tm_job_info_t;
|
||||
|
||||
typedef struct _tree_t{
|
||||
typedef struct _tm_tree_t{
|
||||
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not.
|
||||
Usefull for freeing it. needs to be set on the root only*/
|
||||
struct _tree_t **child;
|
||||
struct _tree_t *parent;
|
||||
struct _tree_t *tab_child; /*the pointer to be freed*/
|
||||
struct _tm_tree_t **child;
|
||||
struct _tm_tree_t *parent;
|
||||
struct _tm_tree_t *tab_child; /* The pointer to be freed */
|
||||
double val;
|
||||
int arity;
|
||||
int depth;
|
||||
int id;
|
||||
int uniq;
|
||||
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
|
||||
int id; /* id of the node or the leaf. Ids are different onmly on a given level */
|
||||
int uniq; /* uniq id in the whole tree */
|
||||
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately */
|
||||
tm_job_info_t *job_info;
|
||||
int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only*/
|
||||
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ?*/
|
||||
int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only */
|
||||
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ? */
|
||||
|
||||
/* Maximum number of levels in the tree*/
|
||||
#define TM_MAX_LEVELS 100
|
||||
|
||||
typedef struct {
|
||||
int *arity; /* arity of the nodes of each level*/
|
||||
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
|
||||
size_t *nb_nodes; /*nb of nodes of each level*/
|
||||
int **node_id; /*ID of the nodes of the tree for each level*/
|
||||
int **node_rank ; /*rank of the nodes of the tree for each level given its ID: this is the inverse tab of node_id*/
|
||||
size_t *nb_free_nodes; /*nb of available nodes of each level*/
|
||||
int **free_nodes; /*tab of node that are free: useful to simulate batch scheduler*/
|
||||
double *cost; /*cost of the communication depending on the distance:
|
||||
cost[i] is the cost for communicating at distance nb_levels-i*/
|
||||
int *constraints; /* array of constraints: id of the nodes where it is possible to map processes */
|
||||
int nb_constraints; /* Size of the above array */
|
||||
int oversub_fact; /* maximum number of processes to be mapped on a given node */
|
||||
int nb_proc_units; /* the real number of units used for computation */
|
||||
int *arity; /* Arity of the nodes of each level*/
|
||||
int nb_levels; /* Number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
|
||||
size_t *nb_nodes; /* Number of nodes of each level*/
|
||||
int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
|
||||
int *node_id; /* ID of the nodes of the tree of the last level*/
|
||||
int *node_rank ; /* Rank of the nodes of the tree for the last level given its ID: this is the inverse tab of node_id*/
|
||||
size_t *nb_free_nodes; /* Nb of available nodes of each level*/
|
||||
int **free_nodes; /* array of node that are free: useful to simulate batch scheduler*/
|
||||
double *cost; /* Cost of the communication depending on the distance:
|
||||
cost[i] is the cost for communicating at distance nb_levels-i*/
|
||||
int *constraints; /* Array of constraints: id of the nodes where it is possible to map processes */
|
||||
int nb_constraints; /* Size of the above array */
|
||||
int oversub_fact; /* Maximum number of processes to be mapped on a given node */
|
||||
int nb_proc_units; /* The real number of units used for computation */
|
||||
}tm_topology_t;
|
||||
|
||||
|
||||
@ -70,17 +76,18 @@ typedef struct {
|
||||
double ** mat;
|
||||
double * sum_row;
|
||||
int order;
|
||||
long int nnz; /* number of non zero entries */
|
||||
} tm_affinity_mat_t;
|
||||
|
||||
/*
|
||||
sigma_i is such that process i is mapped on core sigma_i
|
||||
k_i is such that core i exectutes process k_i_j (0<=j<<=oversubscribing factor - 1)
|
||||
sigma[i] is such that process i is mapped on core sigma[i]
|
||||
k[i][j] is such that core i executes process k[i][j] (0<=j<<=oversubscribing factor - 1)
|
||||
|
||||
size of sigma is the number of processes (nb_objs)
|
||||
size of k is the number of cores/nodes (nb_compute_units)
|
||||
size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing)
|
||||
|
||||
We must have numbe of process<=number of cores
|
||||
We must have number of process<=number of cores
|
||||
|
||||
k[i] == NULL if no process is mapped on core i
|
||||
*/
|
||||
@ -95,8 +102,10 @@ typedef struct {
|
||||
|
||||
|
||||
/************ TreeMatch Public API ************/
|
||||
/* construct topology from local one using hwloc */
|
||||
tm_topology_t* tm_get_local_topology_with_hwloc(void);
|
||||
|
||||
/* load XML or TGT topology */
|
||||
/* Aletrnatively, load XML or TGT topology */
|
||||
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
|
||||
/*
|
||||
Alternatively, build a synthetic balanced topology.
|
||||
@ -120,14 +129,12 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty
|
||||
|
||||
double cost[5] = {500,100,50,10,0};
|
||||
int arity[5] = {16,2,2,2,0};
|
||||
int cn[5]={0,1};
|
||||
int cn[2]={0,1};
|
||||
|
||||
topology = tm_build_synthetic_topology(arity,cost,5,cn,2);
|
||||
|
||||
*/
|
||||
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
/* load affinity matrix */
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
|
||||
/*
|
||||
Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
|
||||
For performance reason mat is not copied.
|
||||
@ -153,7 +160,7 @@ void tm_optimize_topology(tm_topology_t **topology);
|
||||
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact);
|
||||
/* core of the treematch: compute the solution tree */
|
||||
tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed);
|
||||
/* compute the mapping according to teh tree an dthe core numbering*/
|
||||
/* compute the mapping according to the tree and the core numbering*/
|
||||
tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree);
|
||||
/* display the solution*/
|
||||
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric);
|
||||
@ -168,7 +175,6 @@ void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
void tm_set_verbose_level(unsigned int level);
|
||||
unsigned int tm_get_verbose_level(void);
|
||||
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
|
||||
void tm_finalize(void);
|
||||
|
||||
/*
|
||||
Ask for exhaustive search: may be very long
|
||||
@ -178,10 +184,21 @@ Ask for exhaustive search: may be very long
|
||||
void tm_set_exhaustive_search_flag(int new_val);
|
||||
int tm_get_exhaustive_search_flag(void);
|
||||
|
||||
/*
|
||||
Ask for greedy k-partitionning even if scotch is available
|
||||
new_val == 0 : no greedy k-partitionning
|
||||
new_val != 0 : greedy k-partitionning
|
||||
*/
|
||||
void tm_set_greedy_flag(int new_val);
|
||||
int tm_get_greedy_flag(void);
|
||||
|
||||
|
||||
/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
|
||||
void tm_set_max_nb_threads(unsigned int val);
|
||||
|
||||
/* managing the usage of physical vs. logical core numbering when using hwloc/xml files */
|
||||
void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
tm_numbering_t tm_get_numbering(void); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
|
||||
#include "tm_malloc.h"
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user