diff --git a/ompi/mca/topo/treematch/treematch/tm_bucket.c b/ompi/mca/topo/treematch/treematch/tm_bucket.c index 88719cf925..69b912348b 100644 --- a/ompi/mca/topo/treematch/treematch/tm_bucket.c +++ b/ompi/mca/topo/treematch/treematch/tm_bucket.c @@ -199,7 +199,7 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list) /* display_bucket(bucket);*/ if(verbose_level >= DEBUG){ printf("Extending bucket %d (%p) from size %d to size %d!\n", - id, (void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size); + id,(void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size); } bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len)); @@ -525,7 +525,7 @@ void partial_update_val (int nb_args, void **args, int thread_id){ if(nb_args != 5){ if(verbose_level >= ERROR) - fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __func__, nb_args); + fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __FUNCTION__, nb_args); exit(-1); } @@ -648,6 +648,7 @@ double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t wait_work_completion(works[id]); val+=tab_val[id]; FREE(works[id]->args); + destroy_work(works[id]); } diff --git a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c index 4f56b49d69..052dbcf070 100644 --- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c +++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c @@ -6,6 +6,11 @@ #include #include "config.h" +#if HAVE_LIBSCOTCH +#include +#endif + + #define USE_KL_KPART 0 #define KL_KPART_GREEDY_TRIALS 0 @@ -33,6 +38,253 @@ void free_const_tab(constraint_t *,int); void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *, int *,int *,int,double *,double *); +static int greedy_flag = 0; + +void tm_set_greedy_flag(int new_val){ + greedy_flag = new_val; +} + +int tm_get_greedy_flag(){ + return greedy_flag; +} + + +#if HAVE_LIBSCOTCH + +SCOTCH_Graph* com_mat_to_scotch_graph(com_mat_t *com_mat, int n){ + double **mat = com_mat->comm; + SCOTCH_Num vertnbr = n; // number of vertices + SCOTCH_Num edgenbr = vertnbr*vertnbr; // number of edges + /* adjacency list */ + SCOTCH_Num *verttab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (vertnbr+1)); + /* loads of vertices */ + /* SCOTCH_Num *velotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr); */ + /* id of the neighbors */ + SCOTCH_Num *edgetab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr); + /* number of bytes exchanged */ + SCOTCH_Num *edlotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr); + SCOTCH_Graph *graphptr = SCOTCH_graphAlloc(); + + int edgeNum = 0; + int i,j; + + /* Building with the communication matrix */ + for(i = 0; i < com_mat->n ; i++) { + verttab[i] = edgeNum; + for(j = 0; j < i; j++) { + if(mat[i][j]){ + edgetab[edgeNum] = j; + edlotab[edgeNum] = (SCOTCH_Num)mat[i][j]; + edgeNum++; + } + } + /* ensure i!=j. Hence, avoid to test it...*/ + for(j = i+1 ; j < com_mat->n ; j++) { + if(mat[i][j]){ + edgetab[edgeNum] = j; + edlotab[edgeNum] = (SCOTCH_Num)mat[i][j]; + edgeNum++; + } + } + } + + + /* for(i = baseval; i < com_mat->n ; i++) { */ + /* verttab[i] = edgeNum; */ + /* /\* velotab[i] = (SCOTCH_Num) ceil(ogr->vertices[i].getVertexLoad() * ratio); *\/ */ + /* for(j = baseval; j < com_mat->n ; j++) { */ + /* if((mat[i][j] || mat[j][i]) && (i!=j)){ */ + /* edgetab[edgeNum] = j; */ + /* edlotab[edgeNum] = (SCOTCH_Num) ((mat[i][j] + mat[j][i])/2); */ + /* edgeNum++; */ + /* } */ + /* } */ + /* } */ + + /* adding the dumb vertices: they have no neighbor*/ + for(i = com_mat->n ; i=DEBUG){ + printf("Graph converted to Scotch format: edgeNum=%d, edgenbr = %lld, vertnbr = %lld\n",edgeNum, (long long int)edgenbr, (long long int)vertnbr); + } + + assert(edgeNum <= edgenbr); + edgenbr = edgeNum; + + SCOTCH_graphInit(graphptr); + SCOTCH_graphBuild(graphptr, 0, vertnbr, verttab, verttab+1, NULL, NULL, edgenbr, edgetab, edlotab); + + return graphptr; +} + + + +int check_partition(SCOTCH_Num *parttab, int k, int n){ + int *count = CALLOC(sizeof(int), k); + int i; + for(i=0; i=INFO) + fprintf(stdout, "Error in partition: %d vertices in partition %d while expecting %d vertices\n",count[i], i, target); + FREE(count); + return 0; + } + } + + FREE(count); + return 1; +} + + +/* n is the number of element in teh graoh with dumlb_vertices + comm_mat->n is the nulber of processes (i.e. the size of teh graph without dumb veritcies*/ +int *kpartition_scotch(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints){ + SCOTCH_Num partnbr = (SCOTCH_Num) k; + SCOTCH_Graph* graphptr; + SCOTCH_Strat strat; + SCOTCH_Num straval; + SCOTCH_Num *parttab = (SCOTCH_Num *)MALLOC(sizeof(SCOTCH_Num) * n); + int *partition = (int *)MALLOC(sizeof(int) * n); + int i, j; + int *nb_dumb = (int *)MALLOC(sizeof(int) * k); /*number of dumb vertices per partition */ + int dumb_id, min_nb_dumb = n, sum_dumb = 0, p; + /* if(SCOTCH_graphCheck(graphptr) == 1){ */ + /* fprintf(stderr,"Bad scotch graph! Exiting program...\n"); */ + /* exit(-1); */ + /* } */ + + /* printf("Correct scotch graph (%d, %d)!\n", SCOTCH_numSizeof(), sizeof(SCOTCH_Num)); */ + + for(i=0;i= max_val) + break; + end++; + } + /* now end - start is the number of constraints for the ith subtree + hence the number of dumb vertices in partition i is the differences between the + number of leaves of the subtree (n/k) and the number of constraints + */ + nb_dumb[i] = n/k - (end-start); + sum_dumb += nb_dumb[i]; + if(nb_dumb[i] < min_nb_dumb){ + min_nb_dumb = nb_dumb[i]; + } + start=end; + } + + /* Imagine we have n=12, k=3, nb_dumb[0] = 3, nb_dumb[1] = 2, nb_dumb[2] = 3, hence min_nb_dumb = 2 and sum_dumb = 8 + So, we have 8 fix vertices and 12-8 = 4 free vertices + We want scotch to allocate the 6 free vertices such that the whole partition is balanced (4 vertex in each) : + 1 in parttion 0, 2 in partition 1 and 1 in partition 2. + To do so we can fill partab as follows: + {-1, -1, -1, -1, 0, 0, 0, 1, 1, 2, 2, 2} and call scotch with a n=12 vertices graph with SCOTCH_STRATBALANCE + dumb_id = n - sum_dumb; + for(i = 0;i4) + straval = SCOTCH_STRATSPEED; + SCOTCH_stratGraphMapBuild (&strat, straval, partnbr, 0); + + + if(tm_get_verbose_level()>=DEBUG){ + printf("Before Scotch (p=%d, n=%d): \n", p, n); + for(i = 0 ; i < n; i++){ + printf("%d ",(int)parttab[i]); + } + printf("\n"); + } + + if(SCOTCH_graphPartFixed(graphptr, partnbr, &strat, parttab) == 0){ + if(tm_get_verbose_level()>=DEBUG){ + printf("After Scotch: \n"); + for(i = 0 ; i < n; i++){ + printf("%d ",(int)parttab[i]); + } + printf("\n"); + } + }else{ + if(tm_get_verbose_level()>=CRITICAL){ + fprintf(stderr,"Scotch Partitionning failed\n"); + } + exit(-1); + } + + if(!check_partition(parttab, partnbr, n)){ + if(tm_get_verbose_level()>=INFO){ + printf("falling from Scotch to greedy partionning\n"); + } + FREE(partition); + partition = kpartition_greedy(k, com_mat, n, constraints, nb_constraints); + }else{ + for(i=0;i= DEBUG) + printf("Using Scotch\n"); + res = kpartition_scotch(k, com_mat, n, constraints, nb_constraints); + }else{ + if(verbose_level >= DEBUG) + printf("Using greedy partitionning\n"); + res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints); + } #else - /*printf("Using default\n");*/ + if(verbose_level >= DEBUG) + printf("Using greedy partitionning\n"); res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints); #endif return res; @@ -242,7 +503,7 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t)); /* nb_leaves is the number of leaves of the current subtree - this will help to detremine where to split constraints and how to shift values + this will help to determine where to split constraints and how to shift values */ nb_leaves = compute_nb_leaves_from_level( depth + 1, topology ); @@ -251,8 +512,6 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm */ start = 0; - - for( i = 0; i < k; i++ ){ /*returns the indice in constraints that contains the smallest value not copied end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/ @@ -294,7 +553,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition) printf("Partition: "); print_1D_tab(partition,n); display_tab(com_mat->comm,com_mat->n); printf("m=%d,n=%d,k=%d\n",m,n,k); - printf("perm=%p\n", (void*)perm); + printf("perm=%p\n", (void *)perm); } perm = (int*)MALLOC(sizeof(int)*m); @@ -425,8 +684,8 @@ void free_const_tab(constraint_t *const_tab, int k) FREE(const_tab); } -#if 0 -static void check_com_mat(com_mat_t *com_mat){ + +void check_com_mat(com_mat_t *com_mat){ int i,j; for( i = 0 ; i < com_mat->n ; i++ ) @@ -435,8 +694,29 @@ static void check_com_mat(com_mat_t *com_mat){ printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]); exit(-1); } + + +} + +void print_tab(int n){ + for(;n;n--) + fprintf(stdout,"\t"); +} + +void display_partition(int *partition, int *local_vertices, int n, int depth, int k){ + int cur_part, j; + print_tab(depth);fprintf(stdout,"Partitions at depth=%d\n",depth); + for( cur_part = 0; cur_part < k ; cur_part ++){ + print_tab(depth); fprintf(stdout,"%d :",cur_part); + for( j = 0; j < n; j ++){ + if ( partition[j] == cur_part ){ + if(local_vertices[j]!=-1) + fprintf(stdout,"%d ",local_vertices[j]); + } + } + fprintf(stdout,"\n"); + } } -#endif void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth, tm_topology_t *topology, int *local_vertices, @@ -471,6 +751,10 @@ void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, in /* partition the com_matrix in k partitions*/ partition = kpartition(k, com_mat, N, constraints, nb_constraints); + if(verbose_level>=INFO) + display_partition(partition, local_vertices, N, depth, k); + + /* exit(-1); */ /* split the communication matrix in k parts according to the partition just found above */ tab_com_mat = split_com_mat( com_mat, N, k, partition); @@ -558,7 +842,7 @@ tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double ** the value of this array will be used to number the leaves of the tm_tree_t tree that start at "root" - min(N,nb_contraints) is used to takle the case where thre is less processes than constraints + min(N,nb_contraints) is used to tackle the case where there is less processes than constraints */ diff --git a/ompi/mca/topo/treematch/treematch/tm_malloc.c b/ompi/mca/topo/treematch/treematch/tm_malloc.c index 66fae50621..3e86bbed85 100644 --- a/ompi/mca/topo/treematch/treematch/tm_malloc.c +++ b/ompi/mca/topo/treematch/treematch/tm_malloc.c @@ -36,7 +36,7 @@ static void init_extra_data(void); -static char *my_strdup(char* string){ +char *my_strdup(char* string){ int size = 1+strlen(string); char *res = (char*)malloc(size*sizeof(char)); @@ -55,7 +55,7 @@ void save_ptr(void *ptr, size_t size, char *file, int line) { elem -> line = line; elem -> file = my_strdup(file); if(tm_get_verbose_level() >= DEBUG) - printf("Storing (%p,%ld)\n",ptr,size); + printf("Storing (%p,%ld)\n", (void *)ptr,size); HASH_ADD_PTR( size_hash, key, elem ); } @@ -66,14 +66,14 @@ size_t retreive_size(void *someaddr){ HASH_FIND_PTR(size_hash, &someaddr, elem); if(!elem){ if(tm_get_verbose_level() >= CRITICAL) - fprintf(stderr,"Cannot find ptr %p to free!\n",someaddr); + fprintf(stderr,"Cannot find ptr %p to free!\n", (void *)someaddr); abort(); return 0; } res = elem->size; if(tm_get_verbose_level()>=DEBUG) - printf("Retreiving (%p,%ld)\n",someaddr, res); + printf("Retreiving (%p,%ld)\n",(void *)someaddr, res); free(elem->file); HASH_DEL( size_hash, elem); @@ -86,7 +86,7 @@ void tm_mem_check(void){ int nb_errors = 0; for(s=size_hash; s != NULL; s=s->hh.next) { if(tm_get_verbose_level()>=ERROR) - printf("pointer %p of size %ld (%s: %d) has not been freed!\n", s->key, s->size, s->file, s->line); + printf("pointer %p of size %ld (%s: %d) has not been freed!\n", (void *)s->key + EXTRA_BYTE, s->size, s->file, s->line); nb_errors ++; } @@ -119,7 +119,7 @@ void *tm_malloc(size_t size, char *file, int line){ ptr = malloc(size); if(tm_get_verbose_level()>=DEBUG) - printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,(void*)ptr,file,line); + printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE, (void *)ptr,file,line); save_ptr(ptr, size, file, line); @@ -128,7 +128,7 @@ void *tm_malloc(size_t size, char *file, int line){ if(tm_get_verbose_level()>=DEBUG) - printf("tm_malloc returning: %p\n",(void*)(ptr+EXTRA_BYTE)); + printf("tm_malloc returning: %p\n",(void *)(ptr+EXTRA_BYTE)); return (void *)(ptr + EXTRA_BYTE); } @@ -147,14 +147,14 @@ void *tm_calloc(size_t count, size_t size, char *file, int line){ save_ptr(ptr, full_size, file, line); if(tm_get_verbose_level()>=DEBUG) - printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void*)ptr, file, line); + printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void *)ptr, file, line); memcpy(ptr, extra_data, EXTRA_BYTE); memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE); if(tm_get_verbose_level()>=DEBUG) - printf("tm_calloc returning: %p\n", (void*)(ptr+EXTRA_BYTE)); + printf("tm_calloc returning: %p\n",(void *)(ptr+EXTRA_BYTE)); return (void *)(ptr+EXTRA_BYTE); } @@ -172,7 +172,7 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){ save_ptr(ptr, full_size, file, line); if(tm_get_verbose_level()>=DEBUG) - printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE, (void*)ptr, file, line); + printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE, (void *)ptr, file, line); memcpy(ptr, extra_data, EXTRA_BYTE); @@ -185,17 +185,17 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){ memcpy(ptr + EXTRA_BYTE, old_ptr, MIN(old_ptr_size - 2 * EXTRA_BYTE, size)); if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ - fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", (void*)original_ptr); + fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", (void *)original_ptr); fprintf(stderr,"memory is probably corrupted here!\n"); } if((bcmp(original_ptr + old_ptr_size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ - fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", (void*)original_ptr); + fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", (void *)original_ptr); fprintf(stderr,"memory is probably corrupted here!\n"); } if(tm_get_verbose_level()>=DEBUG) - printf("tm_free freeing: %p\n", (void*)original_ptr); + printf("tm_free freeing: %p\n",(void *)original_ptr); free(original_ptr); @@ -203,7 +203,7 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){ if(tm_get_verbose_level()>=DEBUG) - printf("tm_realloc returning: %p (----- %p)\n",(void*)(ptr+EXTRA_BYTE),(void*)(((byte *)ptr) - EXTRA_BYTE)); + printf("tm_realloc returning: %p (----- %p)\n", (void *)(ptr+EXTRA_BYTE), (void *)(ptr - EXTRA_BYTE)); return (void *)(ptr+EXTRA_BYTE); @@ -219,17 +219,17 @@ void tm_free(void *ptr){ size = retreive_size(original_ptr); if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ - fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", (void*)original_ptr); + fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", (void *)original_ptr); fprintf(stderr,"memory is probably corrupted here!\n"); } if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){ - fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", (void*)original_ptr); + fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", (void *)original_ptr); fprintf(stderr,"memory is probably corrupted here!\n"); } if(tm_get_verbose_level()>=DEBUG) - printf("tm_free freeing: %p\n", (void*)original_ptr); + printf("tm_free freeing: %p\n", (void *)original_ptr); free(original_ptr); diff --git a/ompi/mca/topo/treematch/treematch/tm_mapping.c b/ompi/mca/topo/treematch/treematch/tm_mapping.c index 3472b4a998..597c7babfe 100644 --- a/ompi/mca/topo/treematch/treematch/tm_mapping.c +++ b/ompi/mca/topo/treematch/treematch/tm_mapping.c @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include #include #include @@ -18,6 +22,15 @@ #include #endif +#if HAVE_LIBSCOTCH +#include +#endif + +#include + + +#define MIN(a,b) (a)<(b)?(a):(b) + #define TEST_ERROR(n) do{ \ if( (n) != 0 ){ \ fprintf(stderr,"Error %d Line %d\n",n,__LINE__); \ @@ -34,6 +47,8 @@ typedef struct { } hash2_t; +static tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order); + /* compute the number of leaves of any subtree starting froma node of depth depth*/ int compute_nb_leaves_from_level(int depth,tm_topology_t *topology) { @@ -45,15 +60,11 @@ int compute_nb_leaves_from_level(int depth,tm_topology_t *topology) return res; } -void tm_finalize(){ +void tm_finalize(void){ terminate_thread_pool(); tm_mem_check(); } -int nb_processing_units(tm_topology_t *topology) -{ - return topology->nb_proc_units; -} void print_1D_tab(int *tab,int N) @@ -89,14 +100,15 @@ int nb_lines(char *filename) return N; } -void init_mat(char *filename,int N, double **mat, double *sum_row) -{ + + +long int init_mat(char *filename,int N, double **mat, double *sum_row){ FILE *pf = NULL; char *ptr= NULL; char line[LINE_SIZE]; int i,j; unsigned int vl = tm_get_verbose_level(); - + long int nnz = 0; if(!(pf=fopen(filename,"r"))){ if(vl >= CRITICAL) @@ -107,7 +119,6 @@ void init_mat(char *filename,int N, double **mat, double *sum_row) j = -1; i = 0; - while(fgets(line,LINE_SIZE,pf)){ char *l = line; j = 0; @@ -116,6 +127,7 @@ void init_mat(char *filename,int N, double **mat, double *sum_row) l = NULL; if((ptr[0]!='\n')&&(!isspace(ptr[0]))&&(*ptr)){ mat[i][j] = atof(ptr); + if(mat[i][j]) nnz++; sum_row[i] += mat [i][j]; if(mat[i][j]<0){ if(vl >= WARNING) @@ -140,15 +152,124 @@ void init_mat(char *filename,int N, double **mat, double *sum_row) } fclose (pf); + return nnz; } -tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order){ + +size_t get_filesize(char* filename) { + struct stat st; + stat(filename, &st); + return st.st_size; +} + + +char *parse_line(int i, double **mat, double *sum_row, int N, char *data, char *filename, long int *nnz){ + /* now parse the buffer byte per byte for the current line i until we reach '\n'*/ + unsigned int vl = tm_get_verbose_level(); + long val; + sum_row[i] = 0; + int j = 0; + while(*data != '\n'){ + while(*data ==' ' || *data == '\t') + data++; + if(*data != '\n'){ + val = 0; + while(*data !=' ' && *data != '\t' && *data != '\n'){ + val = val*10 + *data-'0'; + data++; + } + mat[i][j] = val; + /* printf("mat[%d][%d] = %ld\n",i,j, val); */ + if (val){ + (*nnz)++; + sum_row[i] += val; + } + j++; + } + } + if( j != N){ + if(vl >= CRITICAL) + fprintf(stderr,"Error at %d %d (%d!=%d). Wrong number of columns line %d for file %s\n",i ,j ,j ,N ,i+1, filename); + exit(-1); + } + data++; + return data; +} + + + +/* buffered read with mmap of teh file */ +long int init_mat_mmap(char *filename,int N, double **mat, double *sum_row){ + int i; + unsigned int vl = tm_get_verbose_level(); + size_t filesize = get_filesize(filename); + int fd = open(filename, O_RDONLY, 0); + long int nnz = 0; + + if(fd == -1){ + if(vl >= CRITICAL) + fprintf(stderr,"Cannot open %s\n",filename); + exit(-1); + } + + char* data = (char*) mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0); + + if(data == MAP_FAILED){ + if(vl >= CRITICAL) + fprintf(stderr,"Cannot mmap %s\n",filename); + exit(-1); + } + + i = 0; + while(i= CRITICAL) + fprintf(stderr,"Cannot open %s\n",filename); + exit(-1); + } + + i = 0; + while(i mat = mat; aff_mat -> sum_row = sum_row; aff_mat -> order = order; + aff_mat -> nnz = nnz; return aff_mat; } @@ -157,15 +278,20 @@ tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order){ tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order){ double *sum_row = NULL; int i,j; + long int nnz = 0; sum_row = (double*)MALLOC(order*sizeof(double)); for( i = 0 ; i < order ; i++){ sum_row[i] = 0; - for(j = 0 ; j < order ; j++) - sum_row[i] += mat [i][j]; + for(j = 0 ; j < order ; j++){ + if(mat[i][j]){ + nnz++; + sum_row[i] += mat [i][j]; + } + } } - return new_affinity_mat(mat, sum_row, order); + return new_affinity_mat(mat, sum_row, order, nnz); } @@ -190,7 +316,8 @@ tm_affinity_mat_t *tm_load_aff_mat(char *filename) double **mat = NULL; double *sum_row = NULL; int i, order; - + long int nnz; + if(tm_get_verbose_level() >= INFO) printf("Reading matrix file: %s\n",filename); @@ -201,13 +328,34 @@ tm_affinity_mat_t *tm_load_aff_mat(char *filename) for( i = 0 ; i < order ; i++) /* the last column stores the sum of the line*/ mat[i] = (double*)MALLOC((order)*sizeof(double)); - init_mat(filename,order, mat, sum_row); + /* on my mac parsing large file is better done with fopen than mmap */ + #ifdef __MACH__ + if (get_filesize(filename) > 1024*1024*1014) { + nnz = init_mat_long(filename,order, mat, sum_row); + if(tm_get_verbose_level() >= DEBUG) + printf("New parser\n"); + }else{ + nnz = init_mat_mmap(filename,order, mat, sum_row); + if(tm_get_verbose_level() >= DEBUG) + printf("MMap parser\n"); + } + #else + nnz = init_mat_mmap(filename,order, mat, sum_row); + if(tm_get_verbose_level() >= DEBUG) + printf("MMap parser\n"); + #endif + + /* TIC; */ + /* init_mat(filename,order, mat, sum_row); */ + /* double duration_fl = TOC; */ + /* printf("Old parser = %.3f\n",duration_fl); */ - if(tm_get_verbose_level() >= INFO) + + if(tm_get_verbose_level() >= INFO) printf("Affinity matrix built from %s!\n",filename); - return new_affinity_mat(mat, sum_row, order); + return new_affinity_mat(mat, sum_row, order, nnz); } @@ -261,7 +409,7 @@ int nb_leaves(tm_tree_t *comm_tree) } /* find the first '-1 in the array of size n and put the value there*/ -static void set_val(int *tab, int val, int n){ +void set_val(int *tab, int val, int n){ int i = 0; while (i < n ){ @@ -300,7 +448,7 @@ void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level, unsigned int vl = tm_get_verbose_level(); M = nb_leaves(comm_tree); - nodes_id = topology->node_id[level]; + nodes_id = topology->node_id; N = topology->nb_nodes[level]; if(vl >= INFO){ diff --git a/ompi/mca/topo/treematch/treematch/tm_mapping.h b/ompi/mca/topo/treematch/treematch/tm_mapping.h index 97b3a728a7..cc8bcbd681 100644 --- a/ompi/mca/topo/treematch/treematch/tm_mapping.h +++ b/ompi/mca/topo/treematch/treematch/tm_mapping.h @@ -5,13 +5,13 @@ #include "tm_timings.h" #include "tm_verbose.h" -tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order); +tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz); void build_synthetic_proc_id(tm_topology_t *topology); tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes); int compute_nb_leaves_from_level(int depth,tm_topology_t *topology); void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i); int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift); -void init_mat(char *filename,int N, double **mat, double *sum_row); +long int init_mat(char *filename,int N, double **mat, double *sum_row); void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level, int *sigma, int nb_processes, int **k, int nb_compute_units); int nb_leaves(tm_tree_t *comm_tree); @@ -19,7 +19,9 @@ int nb_lines(char *filename); int nb_processing_units(tm_topology_t *topology); void print_1D_tab(int *tab,int N); tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree); +void tm_finalize(void); void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat); +/* load affinity matrix */ tm_affinity_mat_t *tm_load_aff_mat(char *filename); void update_comm_speed(double **comm_speed,int old_size,int new_size); diff --git a/ompi/mca/topo/treematch/treematch/tm_solution.c b/ompi/mca/topo/treematch/treematch/tm_solution.c index a0fde41e29..7af294c9e4 100644 --- a/ompi/mca/topo/treematch/treematch/tm_solution.c +++ b/ompi/mca/topo/treematch/treematch/tm_solution.c @@ -2,7 +2,7 @@ #include #include "tm_solution.h" #include "tm_mt.h" -#include "tm_mapping.h" +#include "tm_topology.h" typedef struct { int val; @@ -10,6 +10,27 @@ typedef struct { } hash_t; + +void tm_free_solution(tm_solution_t *sol); +int distance(tm_topology_t *topology,int i, int j); +double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma); + double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric); +double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, + tm_metric_t metric); +void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric); +int in_tab(int *tab, int n, int val); +void map_Packed(tm_topology_t *topology, int N, int *sigma); +void map_RR(tm_topology_t * topology, int N, int *sigma); +int hash_asc(const void* x1,const void* x2); +int *generate_random_sol(tm_topology_t *topology,int N, int seed); +double eval_sol(int *sol,int N,double **comm, double **arch); +void exchange(int *sol,int i,int j); +double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch); +void select_max(int *l,int *m,double **gain,int N,int *state); +void compute_gain(int *sol,int N,double **gain,double **comm, double **arch); +void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch); + + void tm_free_solution(tm_solution_t *sol){ int i,n; @@ -41,8 +62,8 @@ int distance(tm_topology_t *topology,int i, int j) int vl = tm_get_verbose_level(); int depth = topology->nb_levels-1; - f_i = topology->node_rank[depth][i]; - f_j = topology->node_rank[depth][j]; + f_i = topology->node_rank[i]; + f_j = topology->node_rank[j]; if(vl >= DEBUG) printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j); @@ -58,7 +79,7 @@ int distance(tm_topology_t *topology,int i, int j) } while((f_i!=f_j) && (level < depth)); if(vl >= DEBUG) - printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level); + printf("distance(%d,%d):%d\n",topology->node_rank[i], topology->node_rank[j], level); /* exit(-1); */ return level; } @@ -85,7 +106,7 @@ double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, a = cost[depth-distance(topology,sigma[i],sigma[j])]; if(tm_get_verbose_level() >= DEBUG) printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a); - sol += c*a; + sol += c*a; } for (i = 0; i < N; i++) { @@ -99,7 +120,7 @@ double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, } -static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma) +double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma) { double a,c,sol; int i,j; @@ -135,7 +156,7 @@ static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *af return sol; } -static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma) +double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma) { double c,sol; int nb_hops; @@ -150,7 +171,7 @@ static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *a nb_hops = 2*distance(topology,sigma[i],sigma[j]); if(tm_get_verbose_level() >= DEBUG) printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops); - sol += c*nb_hops; + sol += c*nb_hops; } for (i = 0; i < N; i++) { @@ -257,7 +278,7 @@ int in_tab(int *tab, int n, int val){ if(tab[i] == val) return 1; - return 0; + return 0; } void map_Packed(tm_topology_t *topology, int N, int *sigma) @@ -270,10 +291,10 @@ void map_Packed(tm_topology_t *topology, int N, int *sigma) for( i = 0 ; i < topology->nb_nodes[depth] ; i++){ /* printf ("%d -> %d\n",objs[i]->os_index,i); */ - if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){ + if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[i]))){ if(vl >= DEBUG) - printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]); - sigma[j++]=topology->node_id[depth][i]; + printf ("%lu: %d -> %d\n", i, j, topology->node_id[i]); + sigma[j++]=topology->node_id[i]; if(j == N) break; } @@ -306,14 +327,14 @@ int hash_asc(const void* x1,const void* x2) } -int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed) +int *generate_random_sol(tm_topology_t *topology,int N, int seed) { hash_t *hash_tab = NULL; int *sol = NULL; int *nodes_id= NULL; int i; - nodes_id = topology->node_id[level]; + nodes_id = topology->node_id; hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N); sol = (int*)MALLOC(sizeof(int)*N); @@ -428,7 +449,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **com state = (int*)MALLOC(sizeof(int)*N); temp = (double*)MALLOC(sizeof(double)*N); - sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++); + sol = generate_random_sol(topology, N, seed++); for( i = 0 ; i < N ; i++) sigma[i] = sol[i]; @@ -488,7 +509,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **com } }while( max > 0 ); FREE(sol); - sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++); + sol=generate_random_sol(topology, N, seed++); } diff --git a/ompi/mca/topo/treematch/treematch/tm_solution.h b/ompi/mca/topo/treematch/treematch/tm_solution.h index 5ed62b7022..8cc38a8755 100644 --- a/ompi/mca/topo/treematch/treematch/tm_solution.h +++ b/ompi/mca/topo/treematch/treematch/tm_solution.h @@ -14,7 +14,7 @@ int in_tab(int *tab, int n, int val); void map_Packed(tm_topology_t *topology, int N, int *sigma); void map_RR(tm_topology_t *topology, int N, int *sigma); int hash_asc(const void* x1,const void* x2); -int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed); +int *generate_random_sol(tm_topology_t *topology,int N, int seed); double eval_sol(int *sol,int N,double **comm, double **arch); void exchange(int *sol,int i,int j); double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch); diff --git a/ompi/mca/topo/treematch/treematch/tm_thread_pool.c b/ompi/mca/topo/treematch/treematch/tm_thread_pool.c index ef9ccbf68d..a9b13fbaf3 100644 --- a/ompi/mca/topo/treematch/treematch/tm_thread_pool.c +++ b/ompi/mca/topo/treematch/treematch/tm_thread_pool.c @@ -23,7 +23,6 @@ static thread_pool_t *create_threads(void); static void f1 (int nb_args, void **args, int thread_id); static void f2 (int nb_args, void **args, int thread_id); -static void destroy_work(work_t *work); #define MIN(a, b) ((a)<(b)?(a):(b)) #define MAX(a, b) ((a)>(b)?(a):(b)) diff --git a/ompi/mca/topo/treematch/treematch/tm_thread_pool.h b/ompi/mca/topo/treematch/treematch/tm_thread_pool.h index 26279977fb..3499d261c0 100644 --- a/ompi/mca/topo/treematch/treematch/tm_thread_pool.h +++ b/ompi/mca/topo/treematch/treematch/tm_thread_pool.h @@ -41,6 +41,7 @@ void wait_work_completion(work_t *work); void terminate_thread_pool(void); work_t *create_work(int nb_args, void **args, void (int, void **, int)); int test_main(void); +void destroy_work(work_t *work); diff --git a/ompi/mca/topo/treematch/treematch/tm_topology.c b/ompi/mca/topo/treematch/treematch/tm_topology.c index 1ecf51657b..e800384ccc 100644 --- a/ompi/mca/topo/treematch/treematch/tm_topology.c +++ b/ompi/mca/topo/treematch/treematch/tm_topology.c @@ -7,7 +7,7 @@ #include "tm_solution.h" -tm_topology_t* get_local_topo_with_hwloc(void); +tm_topology_t* tm_get_local_topo_with_hwloc(void); tm_topology_t* hwloc_to_tm(char *filename); int int_cmp_inc(const void* x1,const void* x2); void optimize_arity(int **arity, double **cost, int *nb_levels,int n); @@ -27,11 +27,25 @@ void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_node double ** topology_to_arch(hwloc_topology_t topology); void build_synthetic_proc_id(tm_topology_t *topology); tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes); +void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */ +tm_numbering_t tm_get_numbering(); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */ #define LINE_SIZE (1000000) +static tm_numbering_t numbering = TM_NUMBERING_LOGICAL; + +void tm_set_numbering(tm_numbering_t new_val){ + numbering = new_val; +} + +tm_numbering_t tm_get_numbering(){ + return numbering; +} + + + /* transform a tgt scotch file into a topology file*/ tm_topology_t * tgt_to_tm(char *filename) { @@ -101,6 +115,13 @@ tm_topology_t * tgt_to_tm(char *filename) return topology; } + + +int nb_processing_units(tm_topology_t *topology) +{ + return topology->nb_proc_units; +} + int topo_nb_proc(hwloc_topology_t topology,int N) { hwloc_obj_t *objs = NULL; @@ -115,7 +136,7 @@ int topo_nb_proc(hwloc_topology_t topology,int N) -static double link_cost(int depth) +double link_cost(int depth) { /* Bertha values @@ -184,6 +205,46 @@ int symetric(hwloc_topology_t topology) return 1; } +void build_process_tab_id(tm_topology_t *topology, hwloc_obj_t *objs, char* filename){ + unsigned int i,j; + unsigned int nb_nodes = topology->nb_proc_units; + int vl = tm_get_verbose_level(); + + /* Build process id tab */ + if(numbering == TM_NUMBERING_LOGICAL){ + for (i = 0; i < nb_nodes; i++){ + topology->node_id[i] = i; + topology->node_rank[i] = i; + } + }else if(numbering == TM_NUMBERING_PHYSICAL){ + for (i = 0; i < nb_nodes; i++){ + if(objs[i]->os_index > nb_nodes){ + if(vl >= CRITICAL){ + fprintf(stderr, "Cannot use forced physical numbering!\n\tIndex of PU %d is %d and larger than number of nodes : %d\n", + i, objs[i]->os_index, nb_nodes); + } + exit(-1); + } + for(j = 0; j < i; j++){ + if((unsigned int)topology->node_id[j] == objs[i]->os_index){ + if(vl >= CRITICAL){ + fprintf(stderr, "Cannot use forced physical numbering!\n\tDuplicated physical number of some PUs in %s.\n\tPU %d and PU %d have the same physical number: (os_index[%d] = %d) == (os_index[%d] = %d)\n", filename, j, i, j, objs[j]->os_index, i, objs[i]->os_index); + } + exit(-1); + } + } + topology->node_id[i] = objs[i]->os_index; + topology->node_rank[objs[i]->os_index] = i; + } + }else{ + if(vl >= CRITICAL){ + fprintf(stderr, "Unknown numbering %d\n", (int)numbering); + } + exit(-1); + } +} + + tm_topology_t* hwloc_to_tm(char *filename) { hwloc_topology_t topology; @@ -193,43 +254,46 @@ tm_topology_t* hwloc_to_tm(char *filename) unsigned int nb_nodes; double *cost; int err, l; - unsigned int i; int vl = tm_get_verbose_level(); /* Build the topology */ hwloc_topology_init(&topology); - err = hwloc_topology_set_xml(topology,filename); + err = hwloc_topology_set_xml(topology, filename); if(err == -1){ if(vl >= CRITICAL) fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename); exit(-1); } -#if HWLOC_API_VERSION >= 0x00020000 - hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE); -#else /* HWLOC_API_VERSION >= 0x00020000 */ +#if HWLOC_API_VERSION < 0x20000 hwloc_topology_ignore_all_keep_structure(topology); -#endif /* HWLOC_API_VERSION >= 0x00020000 */ - hwloc_topology_load(topology); +#else + hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE); +#endif + + err = hwloc_topology_load(topology); + if(err == -1){ + if(vl >= CRITICAL) + fprintf(stderr,"Error: the content of the xml topology file %s is not compatible with the version installed on this machine.\nPlease use compatible versions to generate the file and to use it!\n",filename); + exit(-1); + } /* Test if symetric */ if(!symetric(topology)){ - if(tm_get_verbose_level() >= CRITICAL) + if(vl >= CRITICAL) fprintf(stderr,"%s not symetric!\n",filename); exit(-1); } /* work on depth */ topodepth = hwloc_topology_get_depth(topology); - + res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); res->oversub_fact = 1; res->nb_constraints = 0; res->constraints = NULL; res->nb_levels = topodepth; - res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); - res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels); res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels); res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); @@ -240,35 +304,24 @@ tm_topology_t* hwloc_to_tm(char *filename) for( depth = 0 ; depth < topodepth ; depth++ ){ nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); res->nb_nodes[depth] = nb_nodes; - res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); - res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); - objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); - objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); - hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); + objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); + objs[0] = hwloc_get_next_obj_by_depth(topology, depth, NULL); + hwloc_get_closest_objs(topology, objs[0], objs+1, nb_nodes-1); res->arity[depth] = objs[0]->arity; - - if (depth == topodepth -1){ - res->nb_constraints = nb_nodes; - res->nb_proc_units = nb_nodes; - } - + if(vl >= DEBUG) printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]); - /* Build process id tab */ - for (i = 0; i < nb_nodes; i++){ - if(objs[i]->os_index > nb_nodes){ - if(vl >= CRITICAL){ - fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n", - i, depth, objs[i]->os_index, nb_nodes); - } - exit(-1); - } - - res->node_id[depth][i] = objs[i]->os_index; - res->node_rank[depth][objs[i]->os_index] = i; - /* if(depth==topodepth-1) */ + + if (depth == topodepth -1){ + res->nb_constraints = nb_nodes; + res->nb_proc_units = nb_nodes; + res->node_id = (int*)MALLOC(sizeof(int)*nb_nodes); + res->node_rank = (int*)MALLOC(sizeof(int)*nb_nodes); + + build_process_tab_id(res, objs, filename); + } FREE(objs); @@ -292,21 +345,23 @@ tm_topology_t* hwloc_to_tm(char *filename) return res; } -tm_topology_t* get_local_topo_with_hwloc(void) +tm_topology_t* tm_get_local_topology_with_hwloc(void) { hwloc_topology_t topology; tm_topology_t *res = NULL; hwloc_obj_t *objs = NULL; unsigned topodepth,depth; - int nb_nodes,i; + int nb_nodes; /* Build the topology */ hwloc_topology_init(&topology); -#if HWLOC_API_VERSION >= 0x00020000 - hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE); -#else /* HWLOC_API_VERSION >= 0x00020000 */ + +#if HWLOC_API_VERSION < 0x20000 hwloc_topology_ignore_all_keep_structure(topology); -#endif /* HWLOC_API_VERSION >= 0x00020000 */ +#else + hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE); +#endif + hwloc_topology_load(topology); /* Test if symetric */ @@ -323,17 +378,15 @@ tm_topology_t* get_local_topo_with_hwloc(void) res->nb_constraints = 0; res->constraints = NULL; res->nb_levels = topodepth; - res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); - res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels); res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels); res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); + res->oversub_fact = 1; //defaut + res->cost = NULL; /* Build TreeMatch topology */ for( depth = 0 ; depth < topodepth ; depth++ ){ nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); res->nb_nodes[depth] = nb_nodes; - res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); - res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); @@ -342,15 +395,14 @@ tm_topology_t* get_local_topo_with_hwloc(void) if (depth == topodepth -1){ res->nb_constraints = nb_nodes; - res->nb_proc_units = nb_nodes; - } + res->nb_proc_units = nb_nodes; + res->node_id = (int*)MALLOC(sizeof(int)*nb_nodes); + res->node_rank = (int*)MALLOC(sizeof(int)*nb_nodes); /* printf("%d:",res->arity[depth]); */ - /* Build process id tab */ - for (i = 0; i < nb_nodes; i++){ - res->node_id[depth][i] = objs[i]->os_index; - res->node_rank[depth][objs[i]->os_index] = i; - /* if(depth==topodepth-1) */ + /* Build process id tab */ + + build_process_tab_id(res, objs, "Local node topology"); } FREE(objs); } @@ -367,15 +419,9 @@ tm_topology_t* get_local_topo_with_hwloc(void) void tm_free_topology(tm_topology_t *topology) { - int i; - for( i = 0 ; i < topology->nb_levels ; i++ ){ - FREE(topology->node_id[i]); - FREE(topology->node_rank[i]); - } - - FREE(topology->constraints); FREE(topology->node_id); FREE(topology->node_rank); + FREE(topology->constraints); FREE(topology->nb_nodes); FREE(topology->arity); FREE(topology->cost); @@ -400,18 +446,15 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty void tm_display_topology(tm_topology_t *topology) { int i; - unsigned int j; unsigned long id; for( i = 0 ; i < topology->nb_levels ; i++ ){ - printf("%d: ",i); - for( j = 0 ; j < topology->nb_nodes[i] ; j++) - printf("%d ",topology->node_id[i][j]); + printf("Level %d with arity %d ", i, topology->arity[i]); printf("\n"); } printf("Last level: "); for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++) - printf("%d ",topology->node_rank[topology->nb_levels-1][id]); + printf("%d ",topology->node_rank[id]); printf("\n"); @@ -430,9 +473,13 @@ void tm_display_topology(tm_topology_t *topology) void tm_display_arity(tm_topology_t *topology){ int depth; - for(depth=0; depth < topology->nb_levels; depth++) - printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]); - + for(depth=0; depth < topology->nb_levels; depth++){ + printf("%d",topology->arity[depth]); + if(topology->cost) + printf("(%lf)",topology->cost[depth]); + else + printf(":"); + } printf("\n"); } @@ -442,12 +489,12 @@ int int_cmp_inc(const void* x1,const void* x2) } -static int topo_check_constraints(tm_topology_t *topology){ +int topo_check_constraints(tm_topology_t *topology){ int n = topology->nb_constraints; int i; int depth = topology->nb_levels-1; for (i=0;inode_id[depth], topology->nb_nodes[depth], topology->constraints[i])){ + if(!in_tab(topology->node_id, topology->nb_nodes[depth], topology->constraints[i])){ if(tm_get_verbose_level() >= CRITICAL){ fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]); } @@ -462,7 +509,7 @@ static int topo_check_constraints(tm_topology_t *topology){ /* cpy flag tells if we need to copy the array. Set to 1 when called from the application level and 0 when called from inside the library*/ -static int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){ +int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){ topology -> nb_constraints = nb_constraints; if(cpy_flag){ @@ -548,7 +595,7 @@ void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_node if(vl >= INFO) printf("nb_nodes=%d\n",*nb_nodes); *numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes)); - memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes)); + memcpy(*numbering,topology->node_id,sizeof(int)*(*nb_nodes)); } void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels) @@ -701,7 +748,7 @@ void tm_optimize_topology(tm_topology_t **topology){ FREE(arity); FREE(numbering); tm_free_topology(*topology); - + *topology = new_topo; /* exit(-1); */ @@ -738,8 +785,6 @@ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_lev topology->constraints = NULL; topology->nb_levels = nb_levels; topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels); - topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels); - topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels); topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels); if(cost) topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double)); @@ -753,27 +798,17 @@ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_lev n = 1; for( i = 0 ; i < topology->nb_levels ; i++ ){ topology->nb_nodes[i] = n; - topology->node_id[i] = (int*)MALLOC(sizeof(int)*n); - topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n); - if( i < topology->nb_levels-1){ - for( j = 0 ; j < n ; j++ ){ - topology->node_id[i][j] = j; - topology->node_rank[i][j]=j; - } - }else{ + if (i == topology->nb_levels-1){ + topology->node_id = (int*)MALLOC(sizeof(int)*n); + topology->node_rank = (int*)MALLOC(sizeof(int)*n); + topology->nb_constraints = n; + topology->nb_proc_units = n; for( j = 0 ; j < n ; j++ ){ int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes); - topology->node_id[i][j] = id; - topology->node_rank[i][id] = j; + topology->node_id[j] = id; + topology->node_rank[id] = j; } } - - - if (i == topology->nb_levels-1){ - topology->nb_constraints = n; - topology->nb_proc_units = n; - } - n *= topology->arity[i]; } if(cost){ @@ -791,32 +826,30 @@ void build_synthetic_proc_id(tm_topology_t *topology) int i; size_t j,n = 1; - topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels); - topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels); topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels); for( i = 0 ; i < topology->nb_levels ; i++ ){ /* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */ topology->nb_nodes[i] = n; - topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n); - topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n); - if ( !topology->node_id[i] ){ - if(tm_get_verbose_level() >= CRITICAL) - fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n); - exit(-1); - } - + if (i == topology->nb_levels-1){ + topology->node_rank = (int*)MALLOC(sizeof(int)*n); + topology->node_id = (int*)MALLOC(sizeof(int)*n); + if ( !topology->node_id ){ + if(tm_get_verbose_level() >= CRITICAL) + fprintf(stderr,"Cannot allocate last level (of size %ld) of the topology\n", (unsigned long int)n); + exit(-1); + } + topology->nb_constraints = n; topology->nb_proc_units = n; + + for( j = 0 ; j < n ; j++ ){ + topology->node_id[j] = j; + topology->node_rank[j] = j; + } } - - - for( j = 0 ; j < n ; j++ ){ - topology->node_id[i][j] = j; - topology->node_rank[i][j] = j; - } n *= topology->arity[i]; } @@ -827,6 +860,7 @@ void build_synthetic_proc_id(tm_topology_t *topology) void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){ { int i,j,n; + int *node_id, *node_rank; if(oversub_fact <=1) return; @@ -834,8 +868,6 @@ void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fac topology -> nb_levels ++; topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels); topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels); - topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels); - topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels); topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels); topology -> oversub_fact = oversub_fact; @@ -843,15 +875,19 @@ void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fac n = topology->nb_nodes[i-1] * oversub_fact; topology->arity[i-1] = oversub_fact; topology->cost[i-1] = 0; - topology->node_id[i] = (int*)MALLOC(sizeof(int)*n); - topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n); + node_id = (int*)MALLOC(sizeof(int)*n); + node_rank = (int*)MALLOC(sizeof(int)*n); topology->nb_nodes[i] = n; for( j = 0 ; j < n ; j++ ){ - int id = topology->node_id[i-1][j/oversub_fact]; - topology->node_id[i][j] = id; - topology->node_rank[i][id] = j; + int id = topology->node_id[j/oversub_fact]; + node_id[j] = id; + node_rank[id] = j; } + FREE(topology->node_id); + FREE(topology->node_rank); + topology->node_id = node_id; + topology->node_rank = node_rank; } } diff --git a/ompi/mca/topo/treematch/treematch/tm_topology.h b/ompi/mca/topo/treematch/treematch/tm_topology.h index 1cd0c5b417..a7b04dee74 100644 --- a/ompi/mca/topo/treematch/treematch/tm_topology.h +++ b/ompi/mca/topo/treematch/treematch/tm_topology.h @@ -19,4 +19,5 @@ void topology_constraints(tm_topology_t *topology,int **constraints,int *nb_cons void topology_cost(tm_topology_t *topology,double **cost); void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes); double ** topology_to_arch(hwloc_topology_t topology); +int nb_processing_units(tm_topology_t *topology); diff --git a/ompi/mca/topo/treematch/treematch/tm_tree.c b/ompi/mca/topo/treematch/treematch/tm_tree.c index 35fc2aa2fe..55379dc43c 100644 --- a/ompi/mca/topo/treematch/treematch/tm_tree.c +++ b/ompi/mca/topo/treematch/treematch/tm_tree.c @@ -5,7 +5,6 @@ #include #include -#include "treematch.h" #include "tm_tree.h" #include "tm_mapping.h" #include "tm_timings.h" @@ -88,6 +87,17 @@ int int_cmp_inc(const void* x1, const void* x2); +double choose (long n, long k) +{ + /* compute C_n_k */ + double res = 1; + int i; + + for( i = 0 ; i < k ; i++ ){ + res *= ((double)(n-i)/(double)(k-i)); + } + return res; +} void tm_set_exhaustive_search_flag(int new_val){ @@ -105,8 +115,6 @@ void free_affinity_mat(tm_affinity_mat_t *aff_mat){ FREE(aff_mat); } - - void free_list_child(tm_tree_t *tree) { int i; @@ -116,13 +124,14 @@ void free_list_child(tm_tree_t *tree) free_list_child(tree->child[i]); FREE(tree->child); - if(tree->dumb) + if(tree->dumb) /*in dumb subtrees internal nodes have been allocated individually, they need to bee freed one by one*/ FREE(tree); } } void free_tab_child(tm_tree_t *tree) { if(tree){ + /*in a non constaint tree internal node are allocated in an array an stored ib tab_child : they are freed globaly here */ free_tab_child(tree->tab_child); FREE(tree->tab_child); } @@ -130,20 +139,26 @@ void free_tab_child(tm_tree_t *tree) void free_non_constraint_tree(tm_tree_t *tree) { - int d = tree->dumb; + if(tree->dumb){ + if(tm_get_verbose_level() <= CRITICAL){ + fprintf(stderr,"Error trying to free a dumb tree!\n. This should never be done like this: the root of a non-constraint tree cannot be a dumb one!\n"); + } + exit(-1); + } - free_tab_child(tree); - free_list_child(tree); - if(!d) - FREE(tree); + free_list_child(tree); /* free the tree->child array recursively and the nodes in dumb subtree*/ + free_tab_child(tree); /* free the tree->tab_child array that correspond of all the child nodes of a given node in non dumb subtrees */ + FREE(tree); } void free_constraint_tree(tm_tree_t *tree) { int i; + if(tree){ for(i=0;iarity;i++) free_constraint_tree(tree->child[i]); + /* tab_child field is NULL for all nodes in the constraint tree*/ FREE(tree->child); FREE(tree); } @@ -155,20 +170,9 @@ void tm_free_tree(tm_tree_t *tree) if(tree->constraint) free_constraint_tree(tree); else - free_non_constraint_tree(tree); + free_non_constraint_tree(tree); /* tab_child field is NULL for all nodes in the tree*/ } -double choose (long n, long k) -{ - /* compute C_n_k */ - double res = 1; - int i; - - for( i = 0 ; i < k ; i++ ){ - res *= ((double)(n-i)/(double)(k-i)); - } - return res; -} void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent, int id, double val, tm_tree_t *tab_child, int depth) @@ -239,13 +243,14 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){ int M = *(int*)args[4]; double **mat = (double**)args[5]; double *sum_row = (double*)args[6]; + long int *nnz = (long int *)args[7]; int i, j, i1, j1; int id1, id2; - if(nb_args != 7){ + if(nb_args != 8){ if(verbose_level >= ERROR) - fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __func__, nb_args); + fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __FUNCTION__, nb_args); exit(-1); } @@ -262,6 +267,9 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){ mat[i][j] += old_mat[id1][id2]; /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/ } + } + if(mat[i][j]){ + (*nnz)++; sum_row[i] += mat[i][j]; } } @@ -269,12 +277,13 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){ } -static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M) +tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M) { int i, j, i1, j1, id1, id2; double **new_mat = NULL, **old_mat = aff_mat->mat; double *sum_row = NULL; - + long int nnz = 0; + new_mat = (double**)MALLOC(M*sizeof(double*)); for( i = 0 ; i < M ; i++ ) new_mat[i] = (double*)CALLOC((M), sizeof(double)); @@ -287,16 +296,19 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat work_t **works; int *inf; int *sup; + long int *nnz_tab; nb_threads = MIN(M/512, get_nb_threads()); works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads); inf = (int*)MALLOC(sizeof(int)*nb_threads); sup = (int*)MALLOC(sizeof(int)*nb_threads); + nnz_tab = (long int*)MALLOC(sizeof(long int)*nb_threads); for(id=0;id= DEBUG) printf("Executing %p\n", (void *)works[id]); @@ -315,13 +328,16 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat for(id=0;idargs); + nnz += nnz_tab[id]; + destroy_work(works[id]); } - FREE(inf); FREE(sup); FREE(works); + FREE(nnz_tab); + }else{ for( i = 0 ; i < M ; i++ ) for( j = 0 ; j < M ; j++ ){ @@ -333,12 +349,16 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat new_mat[i][j] += old_mat[id1][id2]; /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/ } + } + if(new_mat[i][j]){ + nnz ++; sum_row[i] += new_mat[i][j]; } } } } - return new_affinity_mat(new_mat, sum_row, M); + + return new_affinity_mat(new_mat, sum_row, M, nnz); } void free_tab_double(double**tab, int mat_order) @@ -703,7 +723,7 @@ int select_independent_groups(group_list_t **tab_group, int n, int arity, int M } -static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){ +int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){ int i, j, ii, jj; int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n); @@ -731,7 +751,7 @@ static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int return indep_mat; } -static int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat) +int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat) { int i; int id_elem = elem->id; @@ -754,7 +774,7 @@ static int independent_groups_mat(group_list_t **selection, int selection_size, static long int y=0; -static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size, +int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){ @@ -842,8 +862,8 @@ static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, in return 0; } -#if 0 -static group_list_t * group_dup(group_list_t *group, int nb_groups){ + +group_list_t * group_dup(group_list_t *group, int nb_groups){ group_list_t *elem = NULL; /* tm_tree_t **tab = NULL; */ double *bound; @@ -867,10 +887,8 @@ static group_list_t * group_dup(group_list_t *group, int nb_groups){ return elem; } -#endif -#if 0 -static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){ +group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){ group_list_t **res; int i; @@ -884,10 +902,8 @@ static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){ return res; } -#endif -#if 0 -static int8_t **indep_mat_dup(int8_t** mat, int n){ +int8_t **indep_mat_dup(int8_t** mat, int n){ int i; int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n); int row_len; @@ -900,9 +916,9 @@ static int8_t **indep_mat_dup(int8_t** mat, int n){ return res; } -#endif -static void partial_exhaustive_search(int nb_args, void **args, int thread_id){ + +void partial_exhaustive_search(int nb_args, void **args, int thread_id){ int i, j; group_list_t **selection = NULL; double val; @@ -918,7 +934,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){ work_unit_t *work = (work_unit_t *) args[7]; pthread_mutex_t *lock = (pthread_mutex_t *) args[8]; int *tab_i; - int id = 0, id1, id2; + int id = -1, id1, id2; int total_work = work->nb_work; int cur_work = 0; @@ -926,7 +942,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){ if(nb_args!=9){ if(verbose_level>=ERROR){ - fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __func__, nb_args); + fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __FUNCTION__, nb_args); return; } } @@ -1009,20 +1025,19 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){ } -#if 0 -static int dbl_cmp_dec(const void* x1,const void* x2) + +int dbl_cmp_dec(const void* x1,const void* x2) { return *((double *)x1) > *((double *)x2) ? -1 : 1; } -#endif -static int dbl_cmp_inc(const void* x1,const void* x2) +int dbl_cmp_inc(const void* x1,const void* x2) { return *((double *)x1) < *((double *)x2) ? -1 : 1; } -static double *build_bound_array(double *tab, int n){ +double *build_bound_array(double *tab, int n){ int i; double *bound; @@ -1051,7 +1066,7 @@ static double *build_bound_array(double *tab, int n){ return bound; } -static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){ +work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){ work_unit_t *res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t)); int *tab_group = MALLOC(size*sizeof(int)); memcpy(tab_group, tab, size*sizeof(int)); @@ -1062,7 +1077,7 @@ static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){ return res; } -static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){ +work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){ tab_group[i] = id; if(i==size-1){ @@ -1082,7 +1097,7 @@ static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *t } -static work_unit_t *create_tab_work(int n){ +work_unit_t *create_tab_work(int n){ int work_size = 4; int i; work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t)); @@ -1106,7 +1121,7 @@ static work_unit_t *create_tab_work(int n){ } -static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val, +int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val, group_list_t **best_selection){ pthread_mutex_t lock; @@ -1181,6 +1196,7 @@ static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int for(id=0;idargs); + destroy_work(works[id]); } exit(-1); @@ -1209,8 +1225,8 @@ static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int return 0; } -#if 0 -static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat) + +int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat) { group_list_t *elem = NULL; @@ -1249,10 +1265,10 @@ static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int ar return 0; } -#endif -#if 0 -static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i) + + +int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i) { group_list_t *elem = NULL; @@ -1302,10 +1318,10 @@ static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, return 0; } -#endif -#if 0 -static int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, + + +int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection) { int i, j; @@ -1365,7 +1381,7 @@ static int exhaustive_search(group_list_t **tab_group, int n, int arity, int so return 0; } -#endif + int select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration) @@ -1566,7 +1582,7 @@ double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t return val; } -static double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) { +double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) { int *partition = NULL; int n = aff_mat->order; com_mat_t com_mat; @@ -1695,7 +1711,8 @@ tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_wei double **old_mat; double avg; int i, j, mat_order; - + long int nnz = 0; + if(!obj_weight) return aff_mat; @@ -1727,8 +1744,9 @@ tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_wei mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2); sum_row[i] += mat[i][j]; } + if(mat[i][j]) nnz++; } - return new_affinity_mat(mat, sum_row, mat_order); + return new_affinity_mat(mat, sum_row, mat_order,nnz); } @@ -1952,7 +1970,7 @@ void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K) sum_row[i] = (*aff_mat)->sum_row[i]; } - *aff_mat = new_affinity_mat(new_mat, sum_row, M); + *aff_mat = new_affinity_mat(new_mat, sum_row, M, (*aff_mat)->nnz); } void complete_obj_weight(double **tab, int mat_order, int K) @@ -2001,7 +2019,9 @@ void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology) list_child[i]->dumb = 1; } - set_node(node, list_child, arity, NULL, -1, 0, list_child[0], depth); + /* list_child => node->child ; list_child[0] => node->tab_child */ + /* printf("list_child[0] = %p\n",list_child[0]); */ + set_node(node, list_child, arity, NULL, -1, 0, NULL, depth); } void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology) { @@ -2080,6 +2100,8 @@ tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, TIC; K = arity*((mat_order/arity)+1)-mat_order; /*printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K); */ + if(verbose_level >= INFO) + printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K); /*display_tab(tab, mat_order);*/ /* add K rows and columns to comm_matrix*/ complete_aff_mat(&aff_mat, mat_order, K); @@ -2106,8 +2128,8 @@ tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, for( i = 0 ; i < M ; i++ ){ tm_tree_t **list_child = NULL; list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*)); - set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth); - } + set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth); + } duration = TOC; if(verbose_level >= INFO) printf("New nodes creation= %fs\n ", duration); @@ -2224,7 +2246,7 @@ int check_constraints(tm_topology_t *topology, int **constraints) In order to have all the ranks of a given id we need to shift them as follows: */ shift = 1 + i%topology->oversub_fact - topology->oversub_fact; - (*constraints)[i] = topology->node_rank[topology->nb_levels-1][topology->constraints[i/topology->oversub_fact]] +shift; + (*constraints)[i] = topology->node_rank[topology->constraints[i/topology->oversub_fact]] +shift; if((*constraints)[i] < last) sorted = 0; last = (*constraints)[i]; diff --git a/ompi/mca/topo/treematch/treematch/treematch.h b/ompi/mca/topo/treematch/treematch/treematch.h index 8891c819d0..d5466c4c9f 100644 --- a/ompi/mca/topo/treematch/treematch/treematch.h +++ b/ompi/mca/topo/treematch/treematch/treematch.h @@ -21,6 +21,11 @@ typedef enum{ TM_METRIC_HOP_BYTE = 3 } tm_metric_t; +/* numbering */ +typedef enum{ + TM_NUMBERING_LOGICAL = 0, + TM_NUMBERING_PHYSICAL = 1 +} tm_numbering_t; /********* TreeMatch Public Structures **********/ @@ -30,39 +35,40 @@ typedef struct _job_info_t{ int finish_date; } tm_job_info_t; -typedef struct _tree_t{ +typedef struct _tm_tree_t{ int constraint; /* tells if the tree has been constructed with constraints on the nodes or not. Usefull for freeing it. needs to be set on the root only*/ - struct _tree_t **child; - struct _tree_t *parent; - struct _tree_t *tab_child; /*the pointer to be freed*/ + struct _tm_tree_t **child; + struct _tm_tree_t *parent; + struct _tm_tree_t *tab_child; /* The pointer to be freed */ double val; int arity; int depth; - int id; - int uniq; - int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/ + int id; /* id of the node or the leaf. Ids are different onmly on a given level */ + int uniq; /* uniq id in the whole tree */ + int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately */ tm_job_info_t *job_info; - int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only*/ -}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ?*/ + int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only */ +}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ? */ /* Maximum number of levels in the tree*/ #define TM_MAX_LEVELS 100 typedef struct { - int *arity; /* arity of the nodes of each level*/ - int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/ - size_t *nb_nodes; /*nb of nodes of each level*/ - int **node_id; /*ID of the nodes of the tree for each level*/ - int **node_rank ; /*rank of the nodes of the tree for each level given its ID: this is the inverse tab of node_id*/ - size_t *nb_free_nodes; /*nb of available nodes of each level*/ - int **free_nodes; /*tab of node that are free: useful to simulate batch scheduler*/ - double *cost; /*cost of the communication depending on the distance: - cost[i] is the cost for communicating at distance nb_levels-i*/ - int *constraints; /* array of constraints: id of the nodes where it is possible to map processes */ - int nb_constraints; /* Size of the above array */ - int oversub_fact; /* maximum number of processes to be mapped on a given node */ - int nb_proc_units; /* the real number of units used for computation */ + int *arity; /* Arity of the nodes of each level*/ + int nb_levels; /* Number of levels of the tree. Levels are numbered from top to bottom starting at 0*/ + size_t *nb_nodes; /* Number of nodes of each level*/ + int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */ + int *node_id; /* ID of the nodes of the tree of the last level*/ + int *node_rank ; /* Rank of the nodes of the tree for the last level given its ID: this is the inverse tab of node_id*/ + size_t *nb_free_nodes; /* Nb of available nodes of each level*/ + int **free_nodes; /* array of node that are free: useful to simulate batch scheduler*/ + double *cost; /* Cost of the communication depending on the distance: + cost[i] is the cost for communicating at distance nb_levels-i*/ + int *constraints; /* Array of constraints: id of the nodes where it is possible to map processes */ + int nb_constraints; /* Size of the above array */ + int oversub_fact; /* Maximum number of processes to be mapped on a given node */ + int nb_proc_units; /* The real number of units used for computation */ }tm_topology_t; @@ -70,17 +76,18 @@ typedef struct { double ** mat; double * sum_row; int order; + long int nnz; /* number of non zero entries */ } tm_affinity_mat_t; /* - sigma_i is such that process i is mapped on core sigma_i - k_i is such that core i exectutes process k_i_j (0<=j<<=oversubscribing factor - 1) + sigma[i] is such that process i is mapped on core sigma[i] + k[i][j] is such that core i executes process k[i][j] (0<=j<<=oversubscribing factor - 1) size of sigma is the number of processes (nb_objs) size of k is the number of cores/nodes (nb_compute_units) size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing) - We must have numbe of process<=number of cores + We must have number of process<=number of cores k[i] == NULL if no process is mapped on core i */ @@ -95,8 +102,10 @@ typedef struct { /************ TreeMatch Public API ************/ +/* construct topology from local one using hwloc */ +tm_topology_t* tm_get_local_topology_with_hwloc(void); -/* load XML or TGT topology */ +/* Aletrnatively, load XML or TGT topology */ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type); /* Alternatively, build a synthetic balanced topology. @@ -120,14 +129,12 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty double cost[5] = {500,100,50,10,0}; int arity[5] = {16,2,2,2,0}; - int cn[5]={0,1}; + int cn[2]={0,1}; topology = tm_build_synthetic_topology(arity,cost,5,cn,2); */ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes); -/* load affinity matrix */ -tm_affinity_mat_t *tm_load_aff_mat(char *com_filename); /* Alternativelly, build the affinity matrix from a array of array of matrix of size order by order For performance reason mat is not copied. @@ -153,7 +160,7 @@ void tm_optimize_topology(tm_topology_t **topology); void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact); /* core of the treematch: compute the solution tree */ tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed); -/* compute the mapping according to teh tree an dthe core numbering*/ +/* compute the mapping according to the tree and the core numbering*/ tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree); /* display the solution*/ double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric); @@ -168,7 +175,6 @@ void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat); void tm_set_verbose_level(unsigned int level); unsigned int tm_get_verbose_level(void); /* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/ -void tm_finalize(void); /* Ask for exhaustive search: may be very long @@ -178,10 +184,21 @@ Ask for exhaustive search: may be very long void tm_set_exhaustive_search_flag(int new_val); int tm_get_exhaustive_search_flag(void); +/* +Ask for greedy k-partitionning even if scotch is available + new_val == 0 : no greedy k-partitionning + new_val != 0 : greedy k-partitionning +*/ +void tm_set_greedy_flag(int new_val); +int tm_get_greedy_flag(void); + /* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */ void tm_set_max_nb_threads(unsigned int val); +/* managing the usage of physical vs. logical core numbering when using hwloc/xml files */ +void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */ +tm_numbering_t tm_get_numbering(void); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */ #include "tm_malloc.h"