1
1

First step of the integration with the new TreeMatch.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2018-05-25 02:05:14 -04:00
родитель f784ce3459
Коммит c6f73e8883
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1
13 изменённых файлов: 819 добавлений и 287 удалений

Просмотреть файл

@ -199,7 +199,7 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
/* display_bucket(bucket);*/
if(verbose_level >= DEBUG){
printf("Extending bucket %d (%p) from size %d to size %d!\n",
id, (void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
id,(void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
}
bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
@ -525,7 +525,7 @@ void partial_update_val (int nb_args, void **args, int thread_id){
if(nb_args != 5){
if(verbose_level >= ERROR)
fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __func__, nb_args);
fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __FUNCTION__, nb_args);
exit(-1);
}
@ -648,6 +648,7 @@ double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t
wait_work_completion(works[id]);
val+=tab_val[id];
FREE(works[id]->args);
destroy_work(works[id]);
}

Просмотреть файл

@ -6,6 +6,11 @@
#include <stdio.h>
#include "config.h"
#if HAVE_LIBSCOTCH
#include <scotch.h>
#endif
#define USE_KL_KPART 0
#define KL_KPART_GREEDY_TRIALS 0
@ -33,6 +38,253 @@ void free_const_tab(constraint_t *,int);
void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *,
int *,int *,int,double *,double *);
static int greedy_flag = 0;
void tm_set_greedy_flag(int new_val){
greedy_flag = new_val;
}
int tm_get_greedy_flag(){
return greedy_flag;
}
#if HAVE_LIBSCOTCH
SCOTCH_Graph* com_mat_to_scotch_graph(com_mat_t *com_mat, int n){
double **mat = com_mat->comm;
SCOTCH_Num vertnbr = n; // number of vertices
SCOTCH_Num edgenbr = vertnbr*vertnbr; // number of edges
/* adjacency list */
SCOTCH_Num *verttab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (vertnbr+1));
/* loads of vertices */
/* SCOTCH_Num *velotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr); */
/* id of the neighbors */
SCOTCH_Num *edgetab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
/* number of bytes exchanged */
SCOTCH_Num *edlotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
SCOTCH_Graph *graphptr = SCOTCH_graphAlloc();
int edgeNum = 0;
int i,j;
/* Building with the communication matrix */
for(i = 0; i < com_mat->n ; i++) {
verttab[i] = edgeNum;
for(j = 0; j < i; j++) {
if(mat[i][j]){
edgetab[edgeNum] = j;
edlotab[edgeNum] = (SCOTCH_Num)mat[i][j];
edgeNum++;
}
}
/* ensure i!=j. Hence, avoid to test it...*/
for(j = i+1 ; j < com_mat->n ; j++) {
if(mat[i][j]){
edgetab[edgeNum] = j;
edlotab[edgeNum] = (SCOTCH_Num)mat[i][j];
edgeNum++;
}
}
}
/* for(i = baseval; i < com_mat->n ; i++) { */
/* verttab[i] = edgeNum; */
/* /\* velotab[i] = (SCOTCH_Num) ceil(ogr->vertices[i].getVertexLoad() * ratio); *\/ */
/* for(j = baseval; j < com_mat->n ; j++) { */
/* if((mat[i][j] || mat[j][i]) && (i!=j)){ */
/* edgetab[edgeNum] = j; */
/* edlotab[edgeNum] = (SCOTCH_Num) ((mat[i][j] + mat[j][i])/2); */
/* edgeNum++; */
/* } */
/* } */
/* } */
/* adding the dumb vertices: they have no neighbor*/
for(i = com_mat->n ; i<vertnbr ; i++) {
verttab[i] = edgeNum;
}
verttab[i] = edgeNum;
if(tm_get_verbose_level() >=DEBUG){
printf("Graph converted to Scotch format: edgeNum=%d, edgenbr = %lld, vertnbr = %lld\n",edgeNum, (long long int)edgenbr, (long long int)vertnbr);
}
assert(edgeNum <= edgenbr);
edgenbr = edgeNum;
SCOTCH_graphInit(graphptr);
SCOTCH_graphBuild(graphptr, 0, vertnbr, verttab, verttab+1, NULL, NULL, edgenbr, edgetab, edlotab);
return graphptr;
}
int check_partition(SCOTCH_Num *parttab, int k, int n){
int *count = CALLOC(sizeof(int), k);
int i;
for(i=0; i<n; i++){
count[parttab[i]]++;
}
int target= n/k;
for(i = 0; i<k ; i++){
if(count[i] != target){
if(tm_get_verbose_level()>=INFO)
fprintf(stdout, "Error in partition: %d vertices in partition %d while expecting %d vertices\n",count[i], i, target);
FREE(count);
return 0;
}
}
FREE(count);
return 1;
}
/* n is the number of element in teh graoh with dumlb_vertices
comm_mat->n is the nulber of processes (i.e. the size of teh graph without dumb veritcies*/
int *kpartition_scotch(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints){
SCOTCH_Num partnbr = (SCOTCH_Num) k;
SCOTCH_Graph* graphptr;
SCOTCH_Strat strat;
SCOTCH_Num straval;
SCOTCH_Num *parttab = (SCOTCH_Num *)MALLOC(sizeof(SCOTCH_Num) * n);
int *partition = (int *)MALLOC(sizeof(int) * n);
int i, j;
int *nb_dumb = (int *)MALLOC(sizeof(int) * k); /*number of dumb vertices per partition */
int dumb_id, min_nb_dumb = n, sum_dumb = 0, p;
/* if(SCOTCH_graphCheck(graphptr) == 1){ */
/* fprintf(stderr,"Bad scotch graph! Exiting program...\n"); */
/* exit(-1); */
/* } */
/* printf("Correct scotch graph (%d, %d)!\n", SCOTCH_numSizeof(), sizeof(SCOTCH_Num)); */
for(i=0;i<n;i++)
parttab[i] = -1;
/* put "dumb" vertices in the correct partition if there are any*/
/*constraints are leaves that can be used */
if (nb_constraints){
int end, start = 0;
for( i = 0 ; i < k ; i ++){
int max_val = (i+1)* (n/k);
end = start;
while( end < nb_constraints){
if(constraints[end] >= max_val)
break;
end++;
}
/* now end - start is the number of constraints for the ith subtree
hence the number of dumb vertices in partition i is the differences between the
number of leaves of the subtree (n/k) and the number of constraints
*/
nb_dumb[i] = n/k - (end-start);
sum_dumb += nb_dumb[i];
if(nb_dumb[i] < min_nb_dumb){
min_nb_dumb = nb_dumb[i];
}
start=end;
}
/* Imagine we have n=12, k=3, nb_dumb[0] = 3, nb_dumb[1] = 2, nb_dumb[2] = 3, hence min_nb_dumb = 2 and sum_dumb = 8
So, we have 8 fix vertices and 12-8 = 4 free vertices
We want scotch to allocate the 6 free vertices such that the whole partition is balanced (4 vertex in each) :
1 in parttion 0, 2 in partition 1 and 1 in partition 2.
To do so we can fill partab as follows:
{-1, -1, -1, -1, 0, 0, 0, 1, 1, 2, 2, 2} and call scotch with a n=12 vertices graph with SCOTCH_STRATBALANCE
dumb_id = n - sum_dumb;
for(i = 0;i<k;i++){
for( j = 0; j < nb_dumb[i]; j ++ ){
parttab[dumb_id] = i;
dumb_id++;
}
}
A more efficient solution is to fill partab as follows
{-1, -1, -1, -1, 0, 2, 0, 0, 1, 1, 2, 2} and call Scotch with
a p = 6 (n-sum_dumb+ sum_{i}(nb_dumb[i]-min_dumb) vertices graph.
Scotch will then only use the 8 fist element of partab
*/
dumb_id = n - sum_dumb; /* now dumb_id is the number of free vertices*/
for(i = 0 ; i < k ; i++){
for( j = 0; j < nb_dumb[i] - min_nb_dumb; j ++ ){
parttab[dumb_id] = i;
dumb_id++;
}
}
p = dumb_id;
for(i = 0 ; i < k ; i++){
for( j = 0 ; j < min_nb_dumb ; j ++ ){
parttab[dumb_id] = i;
dumb_id++;
}
}
}else{
p=n; /* if no constraint use n vertices */
}
graphptr = com_mat_to_scotch_graph(com_mat, p);
SCOTCH_stratInit (&strat);
straval = SCOTCH_STRATBALANCE;
if(k>4)
straval = SCOTCH_STRATSPEED;
SCOTCH_stratGraphMapBuild (&strat, straval, partnbr, 0);
if(tm_get_verbose_level()>=DEBUG){
printf("Before Scotch (p=%d, n=%d): \n", p, n);
for(i = 0 ; i < n; i++){
printf("%d ",(int)parttab[i]);
}
printf("\n");
}
if(SCOTCH_graphPartFixed(graphptr, partnbr, &strat, parttab) == 0){
if(tm_get_verbose_level()>=DEBUG){
printf("After Scotch: \n");
for(i = 0 ; i < n; i++){
printf("%d ",(int)parttab[i]);
}
printf("\n");
}
}else{
if(tm_get_verbose_level()>=CRITICAL){
fprintf(stderr,"Scotch Partitionning failed\n");
}
exit(-1);
}
if(!check_partition(parttab, partnbr, n)){
if(tm_get_verbose_level()>=INFO){
printf("falling from Scotch to greedy partionning\n");
}
FREE(partition);
partition = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
}else{
for(i=0;i<n;i++)
partition[i] = parttab [i];
}
SCOTCH_stratExit (&strat);
SCOTCH_graphExit(graphptr);
SCOTCH_memFree(graphptr);
FREE(parttab);
FREE(nb_dumb);
return partition;
}
#endif /* HAVE_LIBSCOTCH */
void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int max_size)
@ -128,6 +380,7 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
/* put "dumb" vertices in the correct partition if there are any*/
/*constraints are leaves that can be used */
if (nb_constraints){
start = 0;
dumb_id = n-1;
@ -139,7 +392,7 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
break;
end++;
}
/* now end - start is the number of constarints for the ith subtree
/* now end - start is the number of constraints for the ith subtree
hence the number of dumb vertices is the differences between the
number of leaves of the subtree (n/k) and the number of constraints
*/
@ -223,10 +476,18 @@ int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_const
#if HAVE_LIBSCOTCH
/*printf("Using Scotch\n");*/
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
if(!greedy_flag){
if(verbose_level >= DEBUG)
printf("Using Scotch\n");
res = kpartition_scotch(k, com_mat, n, constraints, nb_constraints);
}else{
if(verbose_level >= DEBUG)
printf("Using greedy partitionning\n");
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
}
#else
/*printf("Using default\n");*/
if(verbose_level >= DEBUG)
printf("Using greedy partitionning\n");
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
#endif
return res;
@ -242,7 +503,7 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
/* nb_leaves is the number of leaves of the current subtree
this will help to detremine where to split constraints and how to shift values
this will help to determine where to split constraints and how to shift values
*/
nb_leaves = compute_nb_leaves_from_level( depth + 1, topology );
@ -251,8 +512,6 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
*/
start = 0;
for( i = 0; i < k; i++ ){
/*returns the indice in constraints that contains the smallest value not copied
end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
@ -294,7 +553,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
printf("Partition: "); print_1D_tab(partition,n);
display_tab(com_mat->comm,com_mat->n);
printf("m=%d,n=%d,k=%d\n",m,n,k);
printf("perm=%p\n", (void*)perm);
printf("perm=%p\n", (void *)perm);
}
perm = (int*)MALLOC(sizeof(int)*m);
@ -425,8 +684,8 @@ void free_const_tab(constraint_t *const_tab, int k)
FREE(const_tab);
}
#if 0
static void check_com_mat(com_mat_t *com_mat){
void check_com_mat(com_mat_t *com_mat){
int i,j;
for( i = 0 ; i < com_mat->n ; i++ )
@ -435,8 +694,29 @@ static void check_com_mat(com_mat_t *com_mat){
printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]);
exit(-1);
}
}
void print_tab(int n){
for(;n;n--)
fprintf(stdout,"\t");
}
void display_partition(int *partition, int *local_vertices, int n, int depth, int k){
int cur_part, j;
print_tab(depth);fprintf(stdout,"Partitions at depth=%d\n",depth);
for( cur_part = 0; cur_part < k ; cur_part ++){
print_tab(depth); fprintf(stdout,"%d :",cur_part);
for( j = 0; j < n; j ++){
if ( partition[j] == cur_part ){
if(local_vertices[j]!=-1)
fprintf(stdout,"%d ",local_vertices[j]);
}
}
fprintf(stdout,"\n");
}
}
#endif
void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
tm_topology_t *topology, int *local_vertices,
@ -471,6 +751,10 @@ void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, in
/* partition the com_matrix in k partitions*/
partition = kpartition(k, com_mat, N, constraints, nb_constraints);
if(verbose_level>=INFO)
display_partition(partition, local_vertices, N, depth, k);
/* exit(-1); */
/* split the communication matrix in k parts according to the partition just found above */
tab_com_mat = split_com_mat( com_mat, N, k, partition);
@ -558,7 +842,7 @@ tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **
the value of this array will be used to number the leaves of the tm_tree_t tree
that start at "root"
min(N,nb_contraints) is used to takle the case where thre is less processes than constraints
min(N,nb_contraints) is used to tackle the case where there is less processes than constraints
*/

Просмотреть файл

@ -36,7 +36,7 @@ static void init_extra_data(void);
static char *my_strdup(char* string){
char *my_strdup(char* string){
int size = 1+strlen(string);
char *res = (char*)malloc(size*sizeof(char));
@ -55,7 +55,7 @@ void save_ptr(void *ptr, size_t size, char *file, int line) {
elem -> line = line;
elem -> file = my_strdup(file);
if(tm_get_verbose_level() >= DEBUG)
printf("Storing (%p,%ld)\n",ptr,size);
printf("Storing (%p,%ld)\n", (void *)ptr,size);
HASH_ADD_PTR( size_hash, key, elem );
}
@ -66,14 +66,14 @@ size_t retreive_size(void *someaddr){
HASH_FIND_PTR(size_hash, &someaddr, elem);
if(!elem){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot find ptr %p to free!\n",someaddr);
fprintf(stderr,"Cannot find ptr %p to free!\n", (void *)someaddr);
abort();
return 0;
}
res = elem->size;
if(tm_get_verbose_level()>=DEBUG)
printf("Retreiving (%p,%ld)\n",someaddr, res);
printf("Retreiving (%p,%ld)\n",(void *)someaddr, res);
free(elem->file);
HASH_DEL( size_hash, elem);
@ -86,7 +86,7 @@ void tm_mem_check(void){
int nb_errors = 0;
for(s=size_hash; s != NULL; s=s->hh.next) {
if(tm_get_verbose_level()>=ERROR)
printf("pointer %p of size %ld (%s: %d) has not been freed!\n", s->key, s->size, s->file, s->line);
printf("pointer %p of size %ld (%s: %d) has not been freed!\n", (void *)s->key + EXTRA_BYTE, s->size, s->file, s->line);
nb_errors ++;
}
@ -119,7 +119,7 @@ void *tm_malloc(size_t size, char *file, int line){
ptr = malloc(size);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,(void*)ptr,file,line);
printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE, (void *)ptr,file,line);
save_ptr(ptr, size, file, line);
@ -128,7 +128,7 @@ void *tm_malloc(size_t size, char *file, int line){
if(tm_get_verbose_level()>=DEBUG)
printf("tm_malloc returning: %p\n",(void*)(ptr+EXTRA_BYTE));
printf("tm_malloc returning: %p\n",(void *)(ptr+EXTRA_BYTE));
return (void *)(ptr + EXTRA_BYTE);
}
@ -147,14 +147,14 @@ void *tm_calloc(size_t count, size_t size, char *file, int line){
save_ptr(ptr, full_size, file, line);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void*)ptr, file, line);
printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void *)ptr, file, line);
memcpy(ptr, extra_data, EXTRA_BYTE);
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_calloc returning: %p\n", (void*)(ptr+EXTRA_BYTE));
printf("tm_calloc returning: %p\n",(void *)(ptr+EXTRA_BYTE));
return (void *)(ptr+EXTRA_BYTE);
}
@ -172,7 +172,7 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
save_ptr(ptr, full_size, file, line);
if(tm_get_verbose_level()>=DEBUG)
printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE, (void*)ptr, file, line);
printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE, (void *)ptr, file, line);
memcpy(ptr, extra_data, EXTRA_BYTE);
@ -185,17 +185,17 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
memcpy(ptr + EXTRA_BYTE, old_ptr, MIN(old_ptr_size - 2 * EXTRA_BYTE, size));
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", (void*)original_ptr);
fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", (void *)original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if((bcmp(original_ptr + old_ptr_size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", (void*)original_ptr);
fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", (void *)original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if(tm_get_verbose_level()>=DEBUG)
printf("tm_free freeing: %p\n", (void*)original_ptr);
printf("tm_free freeing: %p\n",(void *)original_ptr);
free(original_ptr);
@ -203,7 +203,7 @@ void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
if(tm_get_verbose_level()>=DEBUG)
printf("tm_realloc returning: %p (----- %p)\n",(void*)(ptr+EXTRA_BYTE),(void*)(((byte *)ptr) - EXTRA_BYTE));
printf("tm_realloc returning: %p (----- %p)\n", (void *)(ptr+EXTRA_BYTE), (void *)(ptr - EXTRA_BYTE));
return (void *)(ptr+EXTRA_BYTE);
@ -219,17 +219,17 @@ void tm_free(void *ptr){
size = retreive_size(original_ptr);
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", (void*)original_ptr);
fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", (void *)original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", (void*)original_ptr);
fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", (void *)original_ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if(tm_get_verbose_level()>=DEBUG)
printf("tm_free freeing: %p\n", (void*)original_ptr);
printf("tm_free freeing: %p\n", (void *)original_ptr);
free(original_ptr);

Просмотреть файл

@ -1,3 +1,7 @@
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
@ -18,6 +22,15 @@
#include <winbase.h>
#endif
#if HAVE_LIBSCOTCH
#include <scotch.h>
#endif
#include <sys/mman.h>
#define MIN(a,b) (a)<(b)?(a):(b)
#define TEST_ERROR(n) do{ \
if( (n) != 0 ){ \
fprintf(stderr,"Error %d Line %d\n",n,__LINE__); \
@ -34,6 +47,8 @@ typedef struct {
} hash2_t;
static tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
/* compute the number of leaves of any subtree starting froma node of depth depth*/
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
{
@ -45,15 +60,11 @@ int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
return res;
}
void tm_finalize(){
void tm_finalize(void){
terminate_thread_pool();
tm_mem_check();
}
int nb_processing_units(tm_topology_t *topology)
{
return topology->nb_proc_units;
}
void print_1D_tab(int *tab,int N)
@ -89,14 +100,15 @@ int nb_lines(char *filename)
return N;
}
void init_mat(char *filename,int N, double **mat, double *sum_row)
{
long int init_mat(char *filename,int N, double **mat, double *sum_row){
FILE *pf = NULL;
char *ptr= NULL;
char line[LINE_SIZE];
int i,j;
unsigned int vl = tm_get_verbose_level();
long int nnz = 0;
if(!(pf=fopen(filename,"r"))){
if(vl >= CRITICAL)
@ -107,7 +119,6 @@ void init_mat(char *filename,int N, double **mat, double *sum_row)
j = -1;
i = 0;
while(fgets(line,LINE_SIZE,pf)){
char *l = line;
j = 0;
@ -116,6 +127,7 @@ void init_mat(char *filename,int N, double **mat, double *sum_row)
l = NULL;
if((ptr[0]!='\n')&&(!isspace(ptr[0]))&&(*ptr)){
mat[i][j] = atof(ptr);
if(mat[i][j]) nnz++;
sum_row[i] += mat [i][j];
if(mat[i][j]<0){
if(vl >= WARNING)
@ -140,15 +152,124 @@ void init_mat(char *filename,int N, double **mat, double *sum_row)
}
fclose (pf);
return nnz;
}
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order){
size_t get_filesize(char* filename) {
struct stat st;
stat(filename, &st);
return st.st_size;
}
char *parse_line(int i, double **mat, double *sum_row, int N, char *data, char *filename, long int *nnz){
/* now parse the buffer byte per byte for the current line i until we reach '\n'*/
unsigned int vl = tm_get_verbose_level();
long val;
sum_row[i] = 0;
int j = 0;
while(*data != '\n'){
while(*data ==' ' || *data == '\t')
data++;
if(*data != '\n'){
val = 0;
while(*data !=' ' && *data != '\t' && *data != '\n'){
val = val*10 + *data-'0';
data++;
}
mat[i][j] = val;
/* printf("mat[%d][%d] = %ld\n",i,j, val); */
if (val){
(*nnz)++;
sum_row[i] += val;
}
j++;
}
}
if( j != N){
if(vl >= CRITICAL)
fprintf(stderr,"Error at %d %d (%d!=%d). Wrong number of columns line %d for file %s\n",i ,j ,j ,N ,i+1, filename);
exit(-1);
}
data++;
return data;
}
/* buffered read with mmap of teh file */
long int init_mat_mmap(char *filename,int N, double **mat, double *sum_row){
int i;
unsigned int vl = tm_get_verbose_level();
size_t filesize = get_filesize(filename);
int fd = open(filename, O_RDONLY, 0);
long int nnz = 0;
if(fd == -1){
if(vl >= CRITICAL)
fprintf(stderr,"Cannot open %s\n",filename);
exit(-1);
}
char* data = (char*) mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
if(data == MAP_FAILED){
if(vl >= CRITICAL)
fprintf(stderr,"Cannot mmap %s\n",filename);
exit(-1);
}
i = 0;
while(i<N){
data = parse_line(i, mat, sum_row, N, data, filename, &nnz);
i++;
}
munmap(data, filesize);
/* fprintf(stderr,"DONE!\n"); */
close (fd);
return nnz;
}
long int init_mat_long(char *filename,int N, double **mat, double *sum_row){
int i;
unsigned int vl = tm_get_verbose_level();
char line[LINE_SIZE];
FILE *pf;
long int nnz = 0;
if(!(pf=fopen(filename,"r"))){
if(vl >= CRITICAL)
fprintf(stderr,"Cannot open %s\n",filename);
exit(-1);
}
i = 0;
while(i<N){
fgets(line,LINE_SIZE,pf);
parse_line(i, mat, sum_row, N, line, filename, &nnz);
i++;
}
/* fprintf(stderr,"DONE!\n"); */
fclose (pf);
return nnz;
}
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz){
tm_affinity_mat_t * aff_mat;
aff_mat = (tm_affinity_mat_t *) MALLOC(sizeof(tm_affinity_mat_t));
aff_mat -> mat = mat;
aff_mat -> sum_row = sum_row;
aff_mat -> order = order;
aff_mat -> nnz = nnz;
return aff_mat;
}
@ -157,15 +278,20 @@ tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order){
tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order){
double *sum_row = NULL;
int i,j;
long int nnz = 0;
sum_row = (double*)MALLOC(order*sizeof(double));
for( i = 0 ; i < order ; i++){
sum_row[i] = 0;
for(j = 0 ; j < order ; j++)
sum_row[i] += mat [i][j];
for(j = 0 ; j < order ; j++){
if(mat[i][j]){
nnz++;
sum_row[i] += mat [i][j];
}
}
}
return new_affinity_mat(mat, sum_row, order);
return new_affinity_mat(mat, sum_row, order, nnz);
}
@ -190,7 +316,8 @@ tm_affinity_mat_t *tm_load_aff_mat(char *filename)
double **mat = NULL;
double *sum_row = NULL;
int i, order;
long int nnz;
if(tm_get_verbose_level() >= INFO)
printf("Reading matrix file: %s\n",filename);
@ -201,13 +328,34 @@ tm_affinity_mat_t *tm_load_aff_mat(char *filename)
for( i = 0 ; i < order ; i++)
/* the last column stores the sum of the line*/
mat[i] = (double*)MALLOC((order)*sizeof(double));
init_mat(filename,order, mat, sum_row);
/* on my mac parsing large file is better done with fopen than mmap */
#ifdef __MACH__
if (get_filesize(filename) > 1024*1024*1014) {
nnz = init_mat_long(filename,order, mat, sum_row);
if(tm_get_verbose_level() >= DEBUG)
printf("New parser\n");
}else{
nnz = init_mat_mmap(filename,order, mat, sum_row);
if(tm_get_verbose_level() >= DEBUG)
printf("MMap parser\n");
}
#else
nnz = init_mat_mmap(filename,order, mat, sum_row);
if(tm_get_verbose_level() >= DEBUG)
printf("MMap parser\n");
#endif
/* TIC; */
/* init_mat(filename,order, mat, sum_row); */
/* double duration_fl = TOC; */
/* printf("Old parser = %.3f\n",duration_fl); */
if(tm_get_verbose_level() >= INFO)
if(tm_get_verbose_level() >= INFO)
printf("Affinity matrix built from %s!\n",filename);
return new_affinity_mat(mat, sum_row, order);
return new_affinity_mat(mat, sum_row, order, nnz);
}
@ -261,7 +409,7 @@ int nb_leaves(tm_tree_t *comm_tree)
}
/* find the first '-1 in the array of size n and put the value there*/
static void set_val(int *tab, int val, int n){
void set_val(int *tab, int val, int n){
int i = 0;
while (i < n ){
@ -300,7 +448,7 @@ void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
unsigned int vl = tm_get_verbose_level();
M = nb_leaves(comm_tree);
nodes_id = topology->node_id[level];
nodes_id = topology->node_id;
N = topology->nb_nodes[level];
if(vl >= INFO){

Просмотреть файл

@ -5,13 +5,13 @@
#include "tm_timings.h"
#include "tm_verbose.h"
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order);
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz);
void build_synthetic_proc_id(tm_topology_t *topology);
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes);
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i);
int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift);
void init_mat(char *filename,int N, double **mat, double *sum_row);
long int init_mat(char *filename,int N, double **mat, double *sum_row);
void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
int *sigma, int nb_processes, int **k, int nb_compute_units);
int nb_leaves(tm_tree_t *comm_tree);
@ -19,7 +19,9 @@ int nb_lines(char *filename);
int nb_processing_units(tm_topology_t *topology);
void print_1D_tab(int *tab,int N);
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
void tm_finalize(void);
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
/* load affinity matrix */
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
void update_comm_speed(double **comm_speed,int old_size,int new_size);

Просмотреть файл

@ -2,7 +2,7 @@
#include <float.h>
#include "tm_solution.h"
#include "tm_mt.h"
#include "tm_mapping.h"
#include "tm_topology.h"
typedef struct {
int val;
@ -10,6 +10,27 @@ typedef struct {
} hash_t;
void tm_free_solution(tm_solution_t *sol);
int distance(tm_topology_t *topology,int i, int j);
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma);
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric);
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
tm_metric_t metric);
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
int in_tab(int *tab, int n, int val);
void map_Packed(tm_topology_t *topology, int N, int *sigma);
void map_RR(tm_topology_t * topology, int N, int *sigma);
int hash_asc(const void* x1,const void* x2);
int *generate_random_sol(tm_topology_t *topology,int N, int seed);
double eval_sol(int *sol,int N,double **comm, double **arch);
void exchange(int *sol,int i,int j);
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
void select_max(int *l,int *m,double **gain,int N,int *state);
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch);
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch);
void tm_free_solution(tm_solution_t *sol){
int i,n;
@ -41,8 +62,8 @@ int distance(tm_topology_t *topology,int i, int j)
int vl = tm_get_verbose_level();
int depth = topology->nb_levels-1;
f_i = topology->node_rank[depth][i];
f_j = topology->node_rank[depth][j];
f_i = topology->node_rank[i];
f_j = topology->node_rank[j];
if(vl >= DEBUG)
printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j);
@ -58,7 +79,7 @@ int distance(tm_topology_t *topology,int i, int j)
} while((f_i!=f_j) && (level < depth));
if(vl >= DEBUG)
printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level);
printf("distance(%d,%d):%d\n",topology->node_rank[i], topology->node_rank[j], level);
/* exit(-1); */
return level;
}
@ -85,7 +106,7 @@ double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
a = cost[depth-distance(topology,sigma[i],sigma[j])];
if(tm_get_verbose_level() >= DEBUG)
printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
sol += c*a;
sol += c*a;
}
for (i = 0; i < N; i++) {
@ -99,7 +120,7 @@ double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
}
static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
{
double a,c,sol;
int i,j;
@ -135,7 +156,7 @@ static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *af
return sol;
}
static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
{
double c,sol;
int nb_hops;
@ -150,7 +171,7 @@ static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *a
nb_hops = 2*distance(topology,sigma[i],sigma[j]);
if(tm_get_verbose_level() >= DEBUG)
printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops);
sol += c*nb_hops;
sol += c*nb_hops;
}
for (i = 0; i < N; i++) {
@ -257,7 +278,7 @@ int in_tab(int *tab, int n, int val){
if(tab[i] == val)
return 1;
return 0;
return 0;
}
void map_Packed(tm_topology_t *topology, int N, int *sigma)
@ -270,10 +291,10 @@ void map_Packed(tm_topology_t *topology, int N, int *sigma)
for( i = 0 ; i < topology->nb_nodes[depth] ; i++){
/* printf ("%d -> %d\n",objs[i]->os_index,i); */
if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){
if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[i]))){
if(vl >= DEBUG)
printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]);
sigma[j++]=topology->node_id[depth][i];
printf ("%lu: %d -> %d\n", i, j, topology->node_id[i]);
sigma[j++]=topology->node_id[i];
if(j == N)
break;
}
@ -306,14 +327,14 @@ int hash_asc(const void* x1,const void* x2)
}
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed)
int *generate_random_sol(tm_topology_t *topology,int N, int seed)
{
hash_t *hash_tab = NULL;
int *sol = NULL;
int *nodes_id= NULL;
int i;
nodes_id = topology->node_id[level];
nodes_id = topology->node_id;
hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N);
sol = (int*)MALLOC(sizeof(int)*N);
@ -428,7 +449,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **com
state = (int*)MALLOC(sizeof(int)*N);
temp = (double*)MALLOC(sizeof(double)*N);
sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++);
sol = generate_random_sol(topology, N, seed++);
for( i = 0 ; i < N ; i++)
sigma[i] = sol[i];
@ -488,7 +509,7 @@ void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **com
}
}while( max > 0 );
FREE(sol);
sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
sol=generate_random_sol(topology, N, seed++);
}

Просмотреть файл

@ -14,7 +14,7 @@ int in_tab(int *tab, int n, int val);
void map_Packed(tm_topology_t *topology, int N, int *sigma);
void map_RR(tm_topology_t *topology, int N, int *sigma);
int hash_asc(const void* x1,const void* x2);
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed);
int *generate_random_sol(tm_topology_t *topology,int N, int seed);
double eval_sol(int *sol,int N,double **comm, double **arch);
void exchange(int *sol,int i,int j);
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);

Просмотреть файл

@ -23,7 +23,6 @@ static thread_pool_t *create_threads(void);
static void f1 (int nb_args, void **args, int thread_id);
static void f2 (int nb_args, void **args, int thread_id);
static void destroy_work(work_t *work);
#define MIN(a, b) ((a)<(b)?(a):(b))
#define MAX(a, b) ((a)>(b)?(a):(b))

Просмотреть файл

@ -41,6 +41,7 @@ void wait_work_completion(work_t *work);
void terminate_thread_pool(void);
work_t *create_work(int nb_args, void **args, void (int, void **, int));
int test_main(void);
void destroy_work(work_t *work);

Просмотреть файл

@ -7,7 +7,7 @@
#include "tm_solution.h"
tm_topology_t* get_local_topo_with_hwloc(void);
tm_topology_t* tm_get_local_topo_with_hwloc(void);
tm_topology_t* hwloc_to_tm(char *filename);
int int_cmp_inc(const void* x1,const void* x2);
void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
@ -27,11 +27,25 @@ void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_node
double ** topology_to_arch(hwloc_topology_t topology);
void build_synthetic_proc_id(tm_topology_t *topology);
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
tm_numbering_t tm_get_numbering(); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
#define LINE_SIZE (1000000)
static tm_numbering_t numbering = TM_NUMBERING_LOGICAL;
void tm_set_numbering(tm_numbering_t new_val){
numbering = new_val;
}
tm_numbering_t tm_get_numbering(){
return numbering;
}
/* transform a tgt scotch file into a topology file*/
tm_topology_t * tgt_to_tm(char *filename)
{
@ -101,6 +115,13 @@ tm_topology_t * tgt_to_tm(char *filename)
return topology;
}
int nb_processing_units(tm_topology_t *topology)
{
return topology->nb_proc_units;
}
int topo_nb_proc(hwloc_topology_t topology,int N)
{
hwloc_obj_t *objs = NULL;
@ -115,7 +136,7 @@ int topo_nb_proc(hwloc_topology_t topology,int N)
static double link_cost(int depth)
double link_cost(int depth)
{
/*
Bertha values
@ -184,6 +205,46 @@ int symetric(hwloc_topology_t topology)
return 1;
}
void build_process_tab_id(tm_topology_t *topology, hwloc_obj_t *objs, char* filename){
unsigned int i,j;
unsigned int nb_nodes = topology->nb_proc_units;
int vl = tm_get_verbose_level();
/* Build process id tab */
if(numbering == TM_NUMBERING_LOGICAL){
for (i = 0; i < nb_nodes; i++){
topology->node_id[i] = i;
topology->node_rank[i] = i;
}
}else if(numbering == TM_NUMBERING_PHYSICAL){
for (i = 0; i < nb_nodes; i++){
if(objs[i]->os_index > nb_nodes){
if(vl >= CRITICAL){
fprintf(stderr, "Cannot use forced physical numbering!\n\tIndex of PU %d is %d and larger than number of nodes : %d\n",
i, objs[i]->os_index, nb_nodes);
}
exit(-1);
}
for(j = 0; j < i; j++){
if((unsigned int)topology->node_id[j] == objs[i]->os_index){
if(vl >= CRITICAL){
fprintf(stderr, "Cannot use forced physical numbering!\n\tDuplicated physical number of some PUs in %s.\n\tPU %d and PU %d have the same physical number: (os_index[%d] = %d) == (os_index[%d] = %d)\n", filename, j, i, j, objs[j]->os_index, i, objs[i]->os_index);
}
exit(-1);
}
}
topology->node_id[i] = objs[i]->os_index;
topology->node_rank[objs[i]->os_index] = i;
}
}else{
if(vl >= CRITICAL){
fprintf(stderr, "Unknown numbering %d\n", (int)numbering);
}
exit(-1);
}
}
tm_topology_t* hwloc_to_tm(char *filename)
{
hwloc_topology_t topology;
@ -193,43 +254,46 @@ tm_topology_t* hwloc_to_tm(char *filename)
unsigned int nb_nodes;
double *cost;
int err, l;
unsigned int i;
int vl = tm_get_verbose_level();
/* Build the topology */
hwloc_topology_init(&topology);
err = hwloc_topology_set_xml(topology,filename);
err = hwloc_topology_set_xml(topology, filename);
if(err == -1){
if(vl >= CRITICAL)
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
exit(-1);
}
#if HWLOC_API_VERSION >= 0x00020000
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
#else /* HWLOC_API_VERSION >= 0x00020000 */
#if HWLOC_API_VERSION < 0x20000
hwloc_topology_ignore_all_keep_structure(topology);
#endif /* HWLOC_API_VERSION >= 0x00020000 */
hwloc_topology_load(topology);
#else
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
#endif
err = hwloc_topology_load(topology);
if(err == -1){
if(vl >= CRITICAL)
fprintf(stderr,"Error: the content of the xml topology file %s is not compatible with the version installed on this machine.\nPlease use compatible versions to generate the file and to use it!\n",filename);
exit(-1);
}
/* Test if symetric */
if(!symetric(topology)){
if(tm_get_verbose_level() >= CRITICAL)
if(vl >= CRITICAL)
fprintf(stderr,"%s not symetric!\n",filename);
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->oversub_fact = 1;
res->nb_constraints = 0;
res->constraints = NULL;
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
@ -240,35 +304,24 @@ tm_topology_t* hwloc_to_tm(char *filename)
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology, depth, NULL);
hwloc_get_closest_objs(topology, objs[0], objs+1, nb_nodes-1);
res->arity[depth] = objs[0]->arity;
if (depth == topodepth -1){
res->nb_constraints = nb_nodes;
res->nb_proc_units = nb_nodes;
}
if(vl >= DEBUG)
printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]);
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
if(objs[i]->os_index > nb_nodes){
if(vl >= CRITICAL){
fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n",
i, depth, objs[i]->os_index, nb_nodes);
}
exit(-1);
}
res->node_id[depth][i] = objs[i]->os_index;
res->node_rank[depth][objs[i]->os_index] = i;
/* if(depth==topodepth-1) */
if (depth == topodepth -1){
res->nb_constraints = nb_nodes;
res->nb_proc_units = nb_nodes;
res->node_id = (int*)MALLOC(sizeof(int)*nb_nodes);
res->node_rank = (int*)MALLOC(sizeof(int)*nb_nodes);
build_process_tab_id(res, objs, filename);
}
FREE(objs);
@ -292,21 +345,23 @@ tm_topology_t* hwloc_to_tm(char *filename)
return res;
}
tm_topology_t* get_local_topo_with_hwloc(void)
tm_topology_t* tm_get_local_topology_with_hwloc(void)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
int nb_nodes,i;
int nb_nodes;
/* Build the topology */
hwloc_topology_init(&topology);
#if HWLOC_API_VERSION >= 0x00020000
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
#else /* HWLOC_API_VERSION >= 0x00020000 */
#if HWLOC_API_VERSION < 0x20000
hwloc_topology_ignore_all_keep_structure(topology);
#endif /* HWLOC_API_VERSION >= 0x00020000 */
#else
hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
#endif
hwloc_topology_load(topology);
/* Test if symetric */
@ -323,17 +378,15 @@ tm_topology_t* get_local_topo_with_hwloc(void)
res->nb_constraints = 0;
res->constraints = NULL;
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
res->oversub_fact = 1; //defaut
res->cost = NULL;
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
@ -342,15 +395,14 @@ tm_topology_t* get_local_topo_with_hwloc(void)
if (depth == topodepth -1){
res->nb_constraints = nb_nodes;
res->nb_proc_units = nb_nodes;
}
res->nb_proc_units = nb_nodes;
res->node_id = (int*)MALLOC(sizeof(int)*nb_nodes);
res->node_rank = (int*)MALLOC(sizeof(int)*nb_nodes);
/* printf("%d:",res->arity[depth]); */
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
res->node_id[depth][i] = objs[i]->os_index;
res->node_rank[depth][objs[i]->os_index] = i;
/* if(depth==topodepth-1) */
/* Build process id tab */
build_process_tab_id(res, objs, "Local node topology");
}
FREE(objs);
}
@ -367,15 +419,9 @@ tm_topology_t* get_local_topo_with_hwloc(void)
void tm_free_topology(tm_topology_t *topology)
{
int i;
for( i = 0 ; i < topology->nb_levels ; i++ ){
FREE(topology->node_id[i]);
FREE(topology->node_rank[i]);
}
FREE(topology->constraints);
FREE(topology->node_id);
FREE(topology->node_rank);
FREE(topology->constraints);
FREE(topology->nb_nodes);
FREE(topology->arity);
FREE(topology->cost);
@ -400,18 +446,15 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty
void tm_display_topology(tm_topology_t *topology)
{
int i;
unsigned int j;
unsigned long id;
for( i = 0 ; i < topology->nb_levels ; i++ ){
printf("%d: ",i);
for( j = 0 ; j < topology->nb_nodes[i] ; j++)
printf("%d ",topology->node_id[i][j]);
printf("Level %d with arity %d ", i, topology->arity[i]);
printf("\n");
}
printf("Last level: ");
for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++)
printf("%d ",topology->node_rank[topology->nb_levels-1][id]);
printf("%d ",topology->node_rank[id]);
printf("\n");
@ -430,9 +473,13 @@ void tm_display_topology(tm_topology_t *topology)
void tm_display_arity(tm_topology_t *topology){
int depth;
for(depth=0; depth < topology->nb_levels; depth++)
printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]);
for(depth=0; depth < topology->nb_levels; depth++){
printf("%d",topology->arity[depth]);
if(topology->cost)
printf("(%lf)",topology->cost[depth]);
else
printf(":");
}
printf("\n");
}
@ -442,12 +489,12 @@ int int_cmp_inc(const void* x1,const void* x2)
}
static int topo_check_constraints(tm_topology_t *topology){
int topo_check_constraints(tm_topology_t *topology){
int n = topology->nb_constraints;
int i;
int depth = topology->nb_levels-1;
for (i=0;i<n;i++){
if(!in_tab(topology->node_id[depth], topology->nb_nodes[depth], topology->constraints[i])){
if(!in_tab(topology->node_id, topology->nb_nodes[depth], topology->constraints[i])){
if(tm_get_verbose_level() >= CRITICAL){
fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]);
}
@ -462,7 +509,7 @@ static int topo_check_constraints(tm_topology_t *topology){
/* cpy flag tells if we need to copy the array.
Set to 1 when called from the application level and 0 when called from inside the library*/
static int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
topology -> nb_constraints = nb_constraints;
if(cpy_flag){
@ -548,7 +595,7 @@ void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_node
if(vl >= INFO)
printf("nb_nodes=%d\n",*nb_nodes);
*numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes));
memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes));
memcpy(*numbering,topology->node_id,sizeof(int)*(*nb_nodes));
}
void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels)
@ -701,7 +748,7 @@ void tm_optimize_topology(tm_topology_t **topology){
FREE(arity);
FREE(numbering);
tm_free_topology(*topology);
*topology = new_topo;
/* exit(-1); */
@ -738,8 +785,6 @@ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_lev
topology->constraints = NULL;
topology->nb_levels = nb_levels;
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels);
if(cost)
topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
@ -753,27 +798,17 @@ tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_lev
n = 1;
for( i = 0 ; i < topology->nb_levels ; i++ ){
topology->nb_nodes[i] = n;
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
if( i < topology->nb_levels-1){
for( j = 0 ; j < n ; j++ ){
topology->node_id[i][j] = j;
topology->node_rank[i][j]=j;
}
}else{
if (i == topology->nb_levels-1){
topology->node_id = (int*)MALLOC(sizeof(int)*n);
topology->node_rank = (int*)MALLOC(sizeof(int)*n);
topology->nb_constraints = n;
topology->nb_proc_units = n;
for( j = 0 ; j < n ; j++ ){
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
topology->node_id[i][j] = id;
topology->node_rank[i][id] = j;
topology->node_id[j] = id;
topology->node_rank[id] = j;
}
}
if (i == topology->nb_levels-1){
topology->nb_constraints = n;
topology->nb_proc_units = n;
}
n *= topology->arity[i];
}
if(cost){
@ -791,32 +826,30 @@ void build_synthetic_proc_id(tm_topology_t *topology)
int i;
size_t j,n = 1;
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels);
for( i = 0 ; i < topology->nb_levels ; i++ ){
/* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */
topology->nb_nodes[i] = n;
topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n);
topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n);
if ( !topology->node_id[i] ){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n);
exit(-1);
}
if (i == topology->nb_levels-1){
topology->node_rank = (int*)MALLOC(sizeof(int)*n);
topology->node_id = (int*)MALLOC(sizeof(int)*n);
if ( !topology->node_id ){
if(tm_get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot allocate last level (of size %ld) of the topology\n", (unsigned long int)n);
exit(-1);
}
topology->nb_constraints = n;
topology->nb_proc_units = n;
for( j = 0 ; j < n ; j++ ){
topology->node_id[j] = j;
topology->node_rank[j] = j;
}
}
for( j = 0 ; j < n ; j++ ){
topology->node_id[i][j] = j;
topology->node_rank[i][j] = j;
}
n *= topology->arity[i];
}
@ -827,6 +860,7 @@ void build_synthetic_proc_id(tm_topology_t *topology)
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){
{
int i,j,n;
int *node_id, *node_rank;
if(oversub_fact <=1)
return;
@ -834,8 +868,6 @@ void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fac
topology -> nb_levels ++;
topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels);
topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels);
topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels);
topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels);
topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels);
topology -> oversub_fact = oversub_fact;
@ -843,15 +875,19 @@ void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fac
n = topology->nb_nodes[i-1] * oversub_fact;
topology->arity[i-1] = oversub_fact;
topology->cost[i-1] = 0;
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
node_id = (int*)MALLOC(sizeof(int)*n);
node_rank = (int*)MALLOC(sizeof(int)*n);
topology->nb_nodes[i] = n;
for( j = 0 ; j < n ; j++ ){
int id = topology->node_id[i-1][j/oversub_fact];
topology->node_id[i][j] = id;
topology->node_rank[i][id] = j;
int id = topology->node_id[j/oversub_fact];
node_id[j] = id;
node_rank[id] = j;
}
FREE(topology->node_id);
FREE(topology->node_rank);
topology->node_id = node_id;
topology->node_rank = node_rank;
}
}

Просмотреть файл

@ -19,4 +19,5 @@ void topology_constraints(tm_topology_t *topology,int **constraints,int *nb_cons
void topology_cost(tm_topology_t *topology,double **cost);
void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes);
double ** topology_to_arch(hwloc_topology_t topology);
int nb_processing_units(tm_topology_t *topology);

Просмотреть файл

@ -5,7 +5,6 @@
#include <assert.h>
#include <pthread.h>
#include "treematch.h"
#include "tm_tree.h"
#include "tm_mapping.h"
#include "tm_timings.h"
@ -88,6 +87,17 @@ int int_cmp_inc(const void* x1, const void* x2);
double choose (long n, long k)
{
/* compute C_n_k */
double res = 1;
int i;
for( i = 0 ; i < k ; i++ ){
res *= ((double)(n-i)/(double)(k-i));
}
return res;
}
void tm_set_exhaustive_search_flag(int new_val){
@ -105,8 +115,6 @@ void free_affinity_mat(tm_affinity_mat_t *aff_mat){
FREE(aff_mat);
}
void free_list_child(tm_tree_t *tree)
{
int i;
@ -116,13 +124,14 @@ void free_list_child(tm_tree_t *tree)
free_list_child(tree->child[i]);
FREE(tree->child);
if(tree->dumb)
if(tree->dumb) /*in dumb subtrees internal nodes have been allocated individually, they need to bee freed one by one*/
FREE(tree);
}
}
void free_tab_child(tm_tree_t *tree)
{
if(tree){
/*in a non constaint tree internal node are allocated in an array an stored ib tab_child : they are freed globaly here */
free_tab_child(tree->tab_child);
FREE(tree->tab_child);
}
@ -130,20 +139,26 @@ void free_tab_child(tm_tree_t *tree)
void free_non_constraint_tree(tm_tree_t *tree)
{
int d = tree->dumb;
if(tree->dumb){
if(tm_get_verbose_level() <= CRITICAL){
fprintf(stderr,"Error trying to free a dumb tree!\n. This should never be done like this: the root of a non-constraint tree cannot be a dumb one!\n");
}
exit(-1);
}
free_tab_child(tree);
free_list_child(tree);
if(!d)
FREE(tree);
free_list_child(tree); /* free the tree->child array recursively and the nodes in dumb subtree*/
free_tab_child(tree); /* free the tree->tab_child array that correspond of all the child nodes of a given node in non dumb subtrees */
FREE(tree);
}
void free_constraint_tree(tm_tree_t *tree)
{
int i;
if(tree){
for(i=0;i<tree->arity;i++)
free_constraint_tree(tree->child[i]);
/* tab_child field is NULL for all nodes in the constraint tree*/
FREE(tree->child);
FREE(tree);
}
@ -155,20 +170,9 @@ void tm_free_tree(tm_tree_t *tree)
if(tree->constraint)
free_constraint_tree(tree);
else
free_non_constraint_tree(tree);
free_non_constraint_tree(tree); /* tab_child field is NULL for all nodes in the tree*/
}
double choose (long n, long k)
{
/* compute C_n_k */
double res = 1;
int i;
for( i = 0 ; i < k ; i++ ){
res *= ((double)(n-i)/(double)(k-i));
}
return res;
}
void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent,
int id, double val, tm_tree_t *tab_child, int depth)
@ -239,13 +243,14 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
int M = *(int*)args[4];
double **mat = (double**)args[5];
double *sum_row = (double*)args[6];
long int *nnz = (long int *)args[7];
int i, j, i1, j1;
int id1, id2;
if(nb_args != 7){
if(nb_args != 8){
if(verbose_level >= ERROR)
fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __func__, nb_args);
fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __FUNCTION__, nb_args);
exit(-1);
}
@ -262,6 +267,9 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
mat[i][j] += old_mat[id1][id2];
/* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/
}
}
if(mat[i][j]){
(*nnz)++;
sum_row[i] += mat[i][j];
}
}
@ -269,12 +277,13 @@ void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
}
static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
{
int i, j, i1, j1, id1, id2;
double **new_mat = NULL, **old_mat = aff_mat->mat;
double *sum_row = NULL;
long int nnz = 0;
new_mat = (double**)MALLOC(M*sizeof(double*));
for( i = 0 ; i < M ; i++ )
new_mat[i] = (double*)CALLOC((M), sizeof(double));
@ -287,16 +296,19 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
work_t **works;
int *inf;
int *sup;
long int *nnz_tab;
nb_threads = MIN(M/512, get_nb_threads());
works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
inf = (int*)MALLOC(sizeof(int)*nb_threads);
sup = (int*)MALLOC(sizeof(int)*nb_threads);
nnz_tab = (long int*)MALLOC(sizeof(long int)*nb_threads);
for(id=0;id<nb_threads;id++){
void **args=(void**)MALLOC(sizeof(void*)*7);
void **args=(void**)MALLOC(sizeof(void*)*8);
inf[id]=id*M/nb_threads;
sup[id]=(id+1)*M/nb_threads;
if(id == nb_threads-1) sup[id]=M;
nnz_tab[id] = 0;
args[0]=(void*)(inf+id);
args[1]=(void*)(sup+id);
args[2]=(void*)old_mat;
@ -304,8 +316,9 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
args[4]=&M;
args[5]=(void*)new_mat;
args[6]=(void*)sum_row;
args[7]=(void*)(nnz_tab+id);
works[id]= create_work(7, args, partial_aggregate_aff_mat);
works[id]= create_work(8, args, partial_aggregate_aff_mat);
if(verbose_level >= DEBUG)
printf("Executing %p\n", (void *)works[id]);
@ -315,13 +328,16 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
for(id=0;id<nb_threads;id++){
wait_work_completion(works[id]);
FREE(works[id]->args);
nnz += nnz_tab[id];
destroy_work(works[id]);
}
FREE(inf);
FREE(sup);
FREE(works);
FREE(nnz_tab);
}else{
for( i = 0 ; i < M ; i++ )
for( j = 0 ; j < M ; j++ ){
@ -333,12 +349,16 @@ static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat
new_mat[i][j] += old_mat[id1][id2];
/* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/
}
}
if(new_mat[i][j]){
nnz ++;
sum_row[i] += new_mat[i][j];
}
}
}
}
return new_affinity_mat(new_mat, sum_row, M);
return new_affinity_mat(new_mat, sum_row, M, nnz);
}
void free_tab_double(double**tab, int mat_order)
@ -703,7 +723,7 @@ int select_independent_groups(group_list_t **tab_group, int n, int arity, int M
}
static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
int i, j, ii, jj;
int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n);
@ -731,7 +751,7 @@ static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int
return indep_mat;
}
static int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
{
int i;
int id_elem = elem->id;
@ -754,7 +774,7 @@ static int independent_groups_mat(group_list_t **selection, int selection_size,
static long int y=0;
static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
double val, double *best_val, group_list_t **selection, group_list_t **best_selection,
int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){
@ -842,8 +862,8 @@ static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, in
return 0;
}
#if 0
static group_list_t * group_dup(group_list_t *group, int nb_groups){
group_list_t * group_dup(group_list_t *group, int nb_groups){
group_list_t *elem = NULL;
/* tm_tree_t **tab = NULL; */
double *bound;
@ -867,10 +887,8 @@ static group_list_t * group_dup(group_list_t *group, int nb_groups){
return elem;
}
#endif
#if 0
static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
group_list_t **res;
int i;
@ -884,10 +902,8 @@ static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
return res;
}
#endif
#if 0
static int8_t **indep_mat_dup(int8_t** mat, int n){
int8_t **indep_mat_dup(int8_t** mat, int n){
int i;
int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n);
int row_len;
@ -900,9 +916,9 @@ static int8_t **indep_mat_dup(int8_t** mat, int n){
return res;
}
#endif
static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
void partial_exhaustive_search(int nb_args, void **args, int thread_id){
int i, j;
group_list_t **selection = NULL;
double val;
@ -918,7 +934,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
work_unit_t *work = (work_unit_t *) args[7];
pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
int *tab_i;
int id = 0, id1, id2;
int id = -1, id1, id2;
int total_work = work->nb_work;
int cur_work = 0;
@ -926,7 +942,7 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
if(nb_args!=9){
if(verbose_level>=ERROR){
fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __func__, nb_args);
fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __FUNCTION__, nb_args);
return;
}
}
@ -1009,20 +1025,19 @@ static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
}
#if 0
static int dbl_cmp_dec(const void* x1,const void* x2)
int dbl_cmp_dec(const void* x1,const void* x2)
{
return *((double *)x1) > *((double *)x2) ? -1 : 1;
}
#endif
static int dbl_cmp_inc(const void* x1,const void* x2)
int dbl_cmp_inc(const void* x1,const void* x2)
{
return *((double *)x1) < *((double *)x2) ? -1 : 1;
}
static double *build_bound_array(double *tab, int n){
double *build_bound_array(double *tab, int n){
int i;
double *bound;
@ -1051,7 +1066,7 @@ static double *build_bound_array(double *tab, int n){
return bound;
}
static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
work_unit_t *res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
int *tab_group = MALLOC(size*sizeof(int));
memcpy(tab_group, tab, size*sizeof(int));
@ -1062,7 +1077,7 @@ static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
return res;
}
static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){
work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){
tab_group[i] = id;
if(i==size-1){
@ -1082,7 +1097,7 @@ static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *t
}
static work_unit_t *create_tab_work(int n){
work_unit_t *create_tab_work(int n){
int work_size = 4;
int i;
work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
@ -1106,7 +1121,7 @@ static work_unit_t *create_tab_work(int n){
}
static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
group_list_t **best_selection){
pthread_mutex_t lock;
@ -1181,6 +1196,7 @@ static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int
for(id=0;id<nb_threads;id++){
wait_work_completion(works[id]);
FREE(works[id]->args);
destroy_work(works[id]);
}
exit(-1);
@ -1209,8 +1225,8 @@ static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int
return 0;
}
#if 0
static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
{
group_list_t *elem = NULL;
@ -1249,10 +1265,10 @@ static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int ar
return 0;
}
#endif
#if 0
static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
{
group_list_t *elem = NULL;
@ -1302,10 +1318,10 @@ static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity,
return 0;
}
#endif
#if 0
static int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
group_list_t **best_selection)
{
int i, j;
@ -1365,7 +1381,7 @@ static int exhaustive_search(group_list_t **tab_group, int n, int arity, int so
return 0;
}
#endif
int select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration)
@ -1566,7 +1582,7 @@ double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t
return val;
}
static double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
int *partition = NULL;
int n = aff_mat->order;
com_mat_t com_mat;
@ -1695,7 +1711,8 @@ tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_wei
double **old_mat;
double avg;
int i, j, mat_order;
long int nnz = 0;
if(!obj_weight)
return aff_mat;
@ -1727,8 +1744,9 @@ tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_wei
mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
sum_row[i] += mat[i][j];
}
if(mat[i][j]) nnz++;
}
return new_affinity_mat(mat, sum_row, mat_order);
return new_affinity_mat(mat, sum_row, mat_order,nnz);
}
@ -1952,7 +1970,7 @@ void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K)
sum_row[i] = (*aff_mat)->sum_row[i];
}
*aff_mat = new_affinity_mat(new_mat, sum_row, M);
*aff_mat = new_affinity_mat(new_mat, sum_row, M, (*aff_mat)->nnz);
}
void complete_obj_weight(double **tab, int mat_order, int K)
@ -2001,7 +2019,9 @@ void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology)
list_child[i]->dumb = 1;
}
set_node(node, list_child, arity, NULL, -1, 0, list_child[0], depth);
/* list_child => node->child ; list_child[0] => node->tab_child */
/* printf("list_child[0] = %p\n",list_child[0]); */
set_node(node, list_child, arity, NULL, -1, 0, NULL, depth);
}
void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology)
{
@ -2080,6 +2100,8 @@ tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat,
TIC;
K = arity*((mat_order/arity)+1)-mat_order;
/*printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K); */
if(verbose_level >= INFO)
printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K);
/*display_tab(tab, mat_order);*/
/* add K rows and columns to comm_matrix*/
complete_aff_mat(&aff_mat, mat_order, K);
@ -2106,8 +2128,8 @@ tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat,
for( i = 0 ; i < M ; i++ ){
tm_tree_t **list_child = NULL;
list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth);
}
set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth);
}
duration = TOC;
if(verbose_level >= INFO)
printf("New nodes creation= %fs\n ", duration);
@ -2224,7 +2246,7 @@ int check_constraints(tm_topology_t *topology, int **constraints)
In order to have all the ranks of a given id we need to shift them as follows:
*/
shift = 1 + i%topology->oversub_fact - topology->oversub_fact;
(*constraints)[i] = topology->node_rank[topology->nb_levels-1][topology->constraints[i/topology->oversub_fact]] +shift;
(*constraints)[i] = topology->node_rank[topology->constraints[i/topology->oversub_fact]] +shift;
if((*constraints)[i] < last)
sorted = 0;
last = (*constraints)[i];

Просмотреть файл

@ -21,6 +21,11 @@ typedef enum{
TM_METRIC_HOP_BYTE = 3
} tm_metric_t;
/* numbering */
typedef enum{
TM_NUMBERING_LOGICAL = 0,
TM_NUMBERING_PHYSICAL = 1
} tm_numbering_t;
/********* TreeMatch Public Structures **********/
@ -30,39 +35,40 @@ typedef struct _job_info_t{
int finish_date;
} tm_job_info_t;
typedef struct _tree_t{
typedef struct _tm_tree_t{
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not.
Usefull for freeing it. needs to be set on the root only*/
struct _tree_t **child;
struct _tree_t *parent;
struct _tree_t *tab_child; /*the pointer to be freed*/
struct _tm_tree_t **child;
struct _tm_tree_t *parent;
struct _tm_tree_t *tab_child; /* The pointer to be freed */
double val;
int arity;
int depth;
int id;
int uniq;
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
int id; /* id of the node or the leaf. Ids are different onmly on a given level */
int uniq; /* uniq id in the whole tree */
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately */
tm_job_info_t *job_info;
int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only*/
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ?*/
int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only */
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ? */
/* Maximum number of levels in the tree*/
#define TM_MAX_LEVELS 100
typedef struct {
int *arity; /* arity of the nodes of each level*/
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
size_t *nb_nodes; /*nb of nodes of each level*/
int **node_id; /*ID of the nodes of the tree for each level*/
int **node_rank ; /*rank of the nodes of the tree for each level given its ID: this is the inverse tab of node_id*/
size_t *nb_free_nodes; /*nb of available nodes of each level*/
int **free_nodes; /*tab of node that are free: useful to simulate batch scheduler*/
double *cost; /*cost of the communication depending on the distance:
cost[i] is the cost for communicating at distance nb_levels-i*/
int *constraints; /* array of constraints: id of the nodes where it is possible to map processes */
int nb_constraints; /* Size of the above array */
int oversub_fact; /* maximum number of processes to be mapped on a given node */
int nb_proc_units; /* the real number of units used for computation */
int *arity; /* Arity of the nodes of each level*/
int nb_levels; /* Number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
size_t *nb_nodes; /* Number of nodes of each level*/
int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
int *node_id; /* ID of the nodes of the tree of the last level*/
int *node_rank ; /* Rank of the nodes of the tree for the last level given its ID: this is the inverse tab of node_id*/
size_t *nb_free_nodes; /* Nb of available nodes of each level*/
int **free_nodes; /* array of node that are free: useful to simulate batch scheduler*/
double *cost; /* Cost of the communication depending on the distance:
cost[i] is the cost for communicating at distance nb_levels-i*/
int *constraints; /* Array of constraints: id of the nodes where it is possible to map processes */
int nb_constraints; /* Size of the above array */
int oversub_fact; /* Maximum number of processes to be mapped on a given node */
int nb_proc_units; /* The real number of units used for computation */
}tm_topology_t;
@ -70,17 +76,18 @@ typedef struct {
double ** mat;
double * sum_row;
int order;
long int nnz; /* number of non zero entries */
} tm_affinity_mat_t;
/*
sigma_i is such that process i is mapped on core sigma_i
k_i is such that core i exectutes process k_i_j (0<=j<<=oversubscribing factor - 1)
sigma[i] is such that process i is mapped on core sigma[i]
k[i][j] is such that core i executes process k[i][j] (0<=j<<=oversubscribing factor - 1)
size of sigma is the number of processes (nb_objs)
size of k is the number of cores/nodes (nb_compute_units)
size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing)
We must have numbe of process<=number of cores
We must have number of process<=number of cores
k[i] == NULL if no process is mapped on core i
*/
@ -95,8 +102,10 @@ typedef struct {
/************ TreeMatch Public API ************/
/* construct topology from local one using hwloc */
tm_topology_t* tm_get_local_topology_with_hwloc(void);
/* load XML or TGT topology */
/* Aletrnatively, load XML or TGT topology */
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
/*
Alternatively, build a synthetic balanced topology.
@ -120,14 +129,12 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty
double cost[5] = {500,100,50,10,0};
int arity[5] = {16,2,2,2,0};
int cn[5]={0,1};
int cn[2]={0,1};
topology = tm_build_synthetic_topology(arity,cost,5,cn,2);
*/
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
/* load affinity matrix */
tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
/*
Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
For performance reason mat is not copied.
@ -153,7 +160,7 @@ void tm_optimize_topology(tm_topology_t **topology);
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact);
/* core of the treematch: compute the solution tree */
tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed);
/* compute the mapping according to teh tree an dthe core numbering*/
/* compute the mapping according to the tree and the core numbering*/
tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree);
/* display the solution*/
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric);
@ -168,7 +175,6 @@ void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
void tm_set_verbose_level(unsigned int level);
unsigned int tm_get_verbose_level(void);
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
void tm_finalize(void);
/*
Ask for exhaustive search: may be very long
@ -178,10 +184,21 @@ Ask for exhaustive search: may be very long
void tm_set_exhaustive_search_flag(int new_val);
int tm_get_exhaustive_search_flag(void);
/*
Ask for greedy k-partitionning even if scotch is available
new_val == 0 : no greedy k-partitionning
new_val != 0 : greedy k-partitionning
*/
void tm_set_greedy_flag(int new_val);
int tm_get_greedy_flag(void);
/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
void tm_set_max_nb_threads(unsigned int val);
/* managing the usage of physical vs. logical core numbering when using hwloc/xml files */
void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
tm_numbering_t tm_get_numbering(void); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
#include "tm_malloc.h"