1
1
George Bosilca fbe6c22b90
Make sure the gather is called in all cases, and not
simply based on some local state. This is the second
part of the patch proposed for open-mpi/ompi#1183.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
2017-07-26 11:52:47 -04:00

1653 строки
44 KiB
C

#include <float.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include "tm_tree.h"
#include "tm_timings.h"
#include "tm_bucket.h"
#include "tm_kpartitioning.h"
#include "tm_mapping.h"
#include "tm_verbose.h"
#include "tm_thread_pool.h"
#if !defined(MIN)
#define MIN(a,b) ((a)<(b)?(a):(b))
#endif
#if !defined(MAX)
#define MAX(a,b) ((a)>(b)?(a):(b))
#endif
#ifndef __CHARMC__
#define __CHARMC__ 0
#endif
#if __CHARMC__
#include "converse.h"
#else
static int ilog2(int val)
{
int i = 0;
for( ; val != 0; val >>= 1, i++ );
return i;
}
#define CmiLog2(VAL) ilog2((int)(VAL))
#endif
static int verbose_level = ERROR;
void FREE_list_child(tree_t *);
void FREE_tab_child(tree_t *);
unsigned long int choose (long,long);
void display_node(tree_t *);
void clone_tree(tree_t *,tree_t *);
double *aggregate_obj_weight(tree_t *,double *,int);
affinity_mat_t *aggregate_com_mat(tree_t *,affinity_mat_t *,int);
double eval_grouping(affinity_mat_t *,tree_t **,int);
group_list_t *new_group_list(tree_t **,double,group_list_t *);
void add_to_list(group_list_t *,tree_t **,int,double);
void list_all_possible_groups(affinity_mat_t *,tree_t *,int,int,int,tree_t **,group_list_t *);
int independent_groups(group_list_t **,int,group_list_t *,int);
void display_selection (group_list_t**,int,int,double);
void display_grouping (tree_t *,int,int,double);
int recurs_select_independent_groups(group_list_t **,int,int,int,int,
int,double,double *,group_list_t **,group_list_t **);
int test_independent_groups(group_list_t **,int,int,int,int,int,double,double *,
group_list_t **,group_list_t **);
void delete_group_list(group_list_t *);
int group_list_id(const void*,const void*);
int group_list_asc(const void*,const void*);
int group_list_dsc(const void*,const void*);
int weighted_degree_asc(const void*,const void*);
int weighted_degree_dsc(const void*,const void*);
int select_independent_groups(group_list_t **,int,int,int,double *,group_list_t **,int,double);
int select_independent_groups_by_largest_index(group_list_t **,int,int,int,double *,
group_list_t **,int,double);
void list_to_tab(group_list_t *,group_list_t **,int);
void display_tab_group(group_list_t **,int,int);
int independent_tab(tree_t **,tree_t **,int);
void compute_weighted_degree(group_list_t **,int,int);
void group(affinity_mat_t *,tree_t *,tree_t *,int,int,int,double *,tree_t **);
void fast_group(affinity_mat_t *,tree_t *,tree_t *,int,int,int,double *,tree_t **, int *, int);
int adjacency_asc(const void*,const void*);
int adjacency_dsc(const void*,const void*);
void super_fast_grouping(affinity_mat_t *,tree_t *,tree_t *,int, int);
affinity_mat_t *build_cost_matrix(affinity_mat_t *,double *,double);
void group_nodes(affinity_mat_t *,tree_t *,tree_t *,int ,int,double*,double);
void fast_grouping(affinity_mat_t *,tree_t *,tree_t *,int,int,long int);
void complete_aff_mat(affinity_mat_t **,int,int);
void complete_obj_weight(double **,int,int);
void create_dumb_tree(tree_t *,int,tm_topology_t *);
void complete_tab_node(tree_t **,int,int,int,tm_topology_t *);
void set_deb_tab_child(tree_t *,tree_t *,int);
tree_t *build_level_topology(tree_t *,affinity_mat_t *,int,int,tm_topology_t *,double *,double *);
int check_constraints(tm_topology_t *,int **);
tree_t *bottom_up_build_tree_from_topology(tm_topology_t *,double **, int ,double *,double *);
void FREE_non_constraint_tree(tree_t *);
void FREE_constraint_tree(tree_t *);
void FREE_tab_double(double**,int);
void FREE_tab_int(int**,int );
void partial_aggregate_com_mat (int, void **);
affinity_mat_t *new_affinity_mat(double **, double *, int);
void partial_aggregate_aff_mat (int, void **);
affinity_mat_t *aggregate_aff_mat(tree_t *, affinity_mat_t *, int);
affinity_mat_t * build_affinity_mat(double **, int);
affinity_mat_t *new_affinity_mat(double **mat, double *sum_row, int order){
affinity_mat_t *res = (affinity_mat_t *) MALLOC (sizeof(affinity_mat_t));
res -> mat = mat;
res -> sum_row = sum_row;
res -> order = order;
return res;
}
void FREE_list_child(tree_t *tree)
{
int i;
if(NULL == tree) return;
for(i=0;i<tree->arity;i++)
FREE_list_child(tree->child[i]);
FREE(tree->child);
if(tree->dumb)
FREE(tree);
}
void FREE_tab_child(tree_t *tree)
{
if(tree){
FREE_tab_child(tree->tab_child);
FREE(tree->tab_child);
}
}
void FREE_non_constraint_tree(tree_t *tree)
{
int free_tree = tree->dumb;
FREE_tab_child(tree);
FREE_list_child(tree);
if(free_tree)
FREE(tree);
}
void FREE_constraint_tree(tree_t *tree)
{
int i;
if(tree){
for(i=0;i<tree->arity;i++)
FREE_constraint_tree(tree->child[i]);
FREE(tree->child);
FREE(tree);
}
}
void FREE_tree(tree_t *tree)
{
if(tree->constraint)
FREE_constraint_tree(tree);
else
FREE_non_constraint_tree(tree);
}
unsigned long int choose (long n,long k)
{
/* compute C_n_k */
double res = 1;
int i;
for( i = 0 ; i < k ; i++ )
res *= (double)(n-i)/(double)(k-i);
return (unsigned long int)res;
}
void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,
int id,double val,tree_t *tab_child,int depth)
{
static int uniq = 0;
node->child = child;
node->arity = arity;
node->tab_child = tab_child;
node->parent = parent;
node->id = id;
node->val = val;
node->uniq = uniq++;
node->depth= depth;
node->dumb = 0;
}
void display_node(tree_t *node)
{
if (verbose_level >= DEBUG)
printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n",
(void *)(node->child),node->arity,(void *)(node->parent),node->id,node->val,node->uniq);
}
void clone_tree(tree_t *new,tree_t *old)
{
int i;
new->child = old->child;
new->parent = old->parent;
new->tab_child = old->tab_child;
new->val = old->val;
new->arity = old->arity;
new->depth = old->depth;
new->id = old->id;
new->uniq = old->uniq;
new->dumb = old->dumb;
for( i = 0 ; i < new->arity ; i++ )
new->child[i]->parent = new;
}
double *aggregate_obj_weight(tree_t *new_tab_node, double *tab, int M)
{
int i,i1,id1;
double *res = NULL;
if(!tab)
return NULL;
res = (double*)MALLOC(M*sizeof(double));
for( i = 0 ; i < M ; i++ ){
res[i] = 0.0;
for( i1 = 0 ; i1 < new_tab_node[i].arity ; i1++ ){
id1 = new_tab_node[i].child[i1]->id;
res[i] += tab[id1];
}
}
return res;
}
void partial_aggregate_aff_mat (int nb_args, void **args){
int inf = *(int*)args[0];
int sup = *(int*)args[1];
double **old_mat = (double**)args[2];
tree_t *tab_node = (tree_t*)args[3];
int M = *(int*)args[4];
double **mat = (double**)args[5];
double *sum_row = (double*)args[6];
int i,j,i1,j1;
int id1, id2;
if(nb_args != 6){
if(verbose_level >= ERROR)
fprintf(stderr,"Wrong number of args in %s: %d\n",__func__, nb_args);
exit(-1);
}
if(verbose_level >= INFO)
printf("Aggregate in parallel (%d-%d)\n",inf,sup-1);
for( i = inf ; i < sup ; i++ )
for( j = 0 ; j < M ; j++ ){
if(i != j){
for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
id1 = tab_node[i].child[i1]->id;
for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
id2 = tab_node[j].child[j1]->id;
mat[i][j] += old_mat[id1][id2];
/* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n",i,j,id1,id2,old_mat[id1][id2]);*/
}
sum_row[i] += mat[i][j];
}
}
}
}
affinity_mat_t *aggregate_aff_mat(tree_t *tab_node, affinity_mat_t *aff_mat, int M)
{
int i,j,i1,j1,id1,id2;
double **new_mat = NULL, **old_mat = aff_mat->mat;
double *sum_row = NULL;
new_mat = (double**)MALLOC(M*sizeof(double*));
for( i = 0 ; i < M ; i++ )
new_mat[i] = (double*)CALLOC((M),sizeof(double));
sum_row = (double*)CALLOC(M,sizeof(double));
if(M>512){ /* perform this part in parallel*/
int id;
int nb_threads;
work_t **works;
int *inf;
int *sup;
nb_threads = MIN(M/512,get_nb_threads());
works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
inf = (int*)MALLOC(sizeof(int)*nb_threads);
sup = (int*)MALLOC(sizeof(int)*nb_threads);
for(id=0;id<nb_threads;id++){
void **args=(void**)MALLOC(sizeof(void*)*7);
inf[id]=id*M/nb_threads;
sup[id]=(id+1)*M/nb_threads;
if(id == nb_threads-1) sup[id]=M;
args[0]=(void*)(inf+id);
args[1]=(void*)(sup+id);
args[2]=(void*)old_mat;
args[3]=(void*)tab_node;
args[4]=&M;
args[5]=(void*)new_mat;
args[6]=(void*)sum_row;
works[id]= create_work(7,args,partial_aggregate_aff_mat);
if(verbose_level >= DEBUG)
printf("Executing %p\n",(void *)works[id]);
submit_work( works[id], id);
}
for(id=0;id<nb_threads;id++){
wait_work_completion(works[id]);
FREE(works[id]->args);
}
FREE(inf);
FREE(sup);
FREE(works);
}else{
for( i = 0 ; i < M ; i++ )
for( j = 0 ; j < M ; j++ ){
if(i != j){
for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
id1 = tab_node[i].child[i1]->id;
for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
id2 = tab_node[j].child[j1]->id;
new_mat[i][j] += old_mat[id1][id2];
/* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n",i,j,id1,id2,old_mat[id1][id2]);*/
}
sum_row[i] += new_mat[i][j];
}
}
}
}
return new_affinity_mat(new_mat,sum_row,M);
}
void FREE_tab_double(double**tab,int N)
{
int i;
for( i = 0 ; i < N ; i++ )
FREE(tab[i]);
FREE(tab);
}
void FREE_tab_int(int**tab,int N)
{
int i;
for( i = 0 ; i < N ; i++ )
FREE(tab[i]);
FREE(tab);
}
void display_tab(double **tab,int N)
{
int i,j;
double line,total = 0;
for( i = 0 ; i < N ; i++ ){
line = 0;
for( j = 0 ; j < N ; j++ ){
printf("%g ",tab[i][j]);
line += tab[i][j];
}
total += line;
/* printf(": %g",line);*/
printf("\n");
}
/* printf("Total: %.2f\n",total);*/
}
double eval_grouping(affinity_mat_t *aff_mat,tree_t **cur_group,int arity)
{
double res = 0;
int i,j,id,id1,id2;
double **mat = aff_mat->mat;
double * sum_row = aff_mat -> sum_row;
/*display_tab(tab,N);*/
for( i = 0 ; i < arity ; i++ ){
id = cur_group[i]->id;
res += sum_row[id];
}
for( i = 0 ; i < arity ; i++ ){
id1 = cur_group[i]->id;
for( j = 0 ; j < arity ; j++ ){
id2 = cur_group[j]->id;
/*printf("res-=tab[%d][%d]=%f\n",id1,id2,tab[id1][id2]);*/
res -= mat[id1][id2];
}
}
/*printf(" = %f\n",res);*/
return res;
}
group_list_t *new_group_list(tree_t **tab,double val,group_list_t *next)
{
group_list_t *res = NULL;
res = (group_list_t *)MALLOC(sizeof(group_list_t));
res->tab = tab;
res->val = val;
res->next = next;
res->sum_neighbour = 0;
return res;
}
void add_to_list(group_list_t *list,tree_t **cur_group, int arity, double val)
{
group_list_t *elem = NULL;
tree_t **tab = NULL;
int i;
tab=(tree_t **)MALLOC(sizeof(tree_t *)*arity);
for( i = 0 ; i < arity ; i++ ){
tab[i] = cur_group[i];
if(verbose_level>=INFO)
printf("cur_group[%d]=%d ",i,cur_group[i]->id);
}
if(verbose_level>=INFO)
printf(": %f\n",val);
/*printf("\n");*/
elem = new_group_list(tab,val,list->next);
list->next = elem;
list->val++;
}
void list_all_possible_groups(affinity_mat_t *aff_mat,tree_t *tab_node,int id,int arity, int depth,
tree_t **cur_group, group_list_t *list)
{
double val;
int i;
int N = aff_mat->order;
if(depth == arity){
val = eval_grouping(aff_mat,cur_group,arity);
add_to_list(list,cur_group,arity,val);
return;
}else if( (N+depth) >= (arity+id) ){
/*}else if(1){*/
for( i = id ; i < N ; i++ ){
if(tab_node[i].parent)
continue;
cur_group[depth] = &tab_node[i];
if(verbose_level>=INFO)
printf("%d<-%d\n",depth,i);
list_all_possible_groups(aff_mat,tab_node,i+1,arity,depth+1,cur_group,list);
}
}
}
void update_val(affinity_mat_t *aff_mat,tree_t *parent)
{
/* int i; */
parent->val = eval_grouping(aff_mat,parent->child,parent->arity);
/*printf("connecting: ");*/
/*for( i = 0 ; i < parent->arity ; i++ ){ */
/*printf("%d ",parent->child[i]->id);*/
/* if(parent->child[i]->parent!=parent){
parent->child[i]->parent=parent;
}else{
fprintf(stderr,"redundant operation!\n");
exit(-1);
}*/
/* } */
/*printf(": %f\n",parent->val);*/
}
int independent_groups(group_list_t **selection,int d,group_list_t *elem,int arity)
{
int i,j,k;
if(d == 0)
return 1;
for( i = 0 ; i < arity ; i++ )
for( j = 0 ; j < d ; j++ )
for( k = 0 ; k < arity ; k++ )
if(elem->tab[i]->id == selection[j]->tab[k]->id)
return 0;
return 1;
}
void display_selection (group_list_t** selection,int M,int arity,double val)
{
int i,j;
if(verbose_level<INFO)
return;
for( i = 0 ; i < M ; i++ ) {
for( j = 0 ; j < arity ; j++ )
printf("%d ",selection[i]->tab[j]->id);
printf("-- ");
}
printf(":%f\n",val);
}
void display_grouping (tree_t *father,int M,int arity,double val)
{
int i,j;
if(verbose_level < INFO)
return;
printf("Grouping : ");
for( i = 0 ; i < M ; i++ ){
for( j = 0 ; j < arity ; j++ )
printf("%d ",father[i].child[j]->id);
printf("-- ");
}
printf(":%f\n",val);
}
int recurs_select_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M,double val,double *best_val,group_list_t **selection,group_list_t **best_selection)
{
group_list_t *elem = NULL;
/*
if(val>=*best_val)
return 0;
*/
if( d == M ){
if(verbose_level>=INFO)
display_selection(selection,M,arity,val);
if( val < *best_val ){
*best_val = val;
for( i = 0 ; i < M ; i++ )
best_selection[i] = selection[i];
return 1;
}
return 0;
}
while( i < n ){
elem = tab[i];
if(independent_groups(selection,d,elem,arity)){
if(verbose_level>=INFO)
printf("%d: %d\n",d,i);
selection[d] = elem;
val += elem->val;
return recurs_select_independent_groups(tab,i+1,n,arity,d+1,M,val,best_val,selection,best_selection);
}
i++;
}
return 0;
}
int test_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M,double val,double *best_val,group_list_t **selection,group_list_t **best_selection)
{
group_list_t *elem = NULL;
if( d == M ){
/*display_selection(selection,M,arity,val);*/
return 1;
}
while( i < n ){
elem = tab[i];
if(independent_groups(selection,d,elem,arity)){
/*printf("%d: %d\n",d,i);*/
selection[d] = elem;
val += elem->val;
return recurs_select_independent_groups(tab,i+1,n,arity,d+1,M,val,best_val,selection,best_selection);
}
i++;
}
return 0;
}
void delete_group_list(group_list_t *list)
{
if(list){
delete_group_list(list->next);
FREE(list->tab);
FREE(list);
}
}
int group_list_id(const void* x1,const void* x2)
{
group_list_t *e1 = NULL,*e2= NULL;
e1 = *((group_list_t**)x1);
e2 = *((group_list_t**)x2);
return (e1->tab[0]->id < e2->tab[0]->id) ? - 1 : 1;
}
int group_list_asc(const void* x1,const void* x2)
{
group_list_t *e1 = NULL,*e2 = NULL;
e1 = *((group_list_t**)x1);
e2 = *((group_list_t**)x2);
return (e1->val < e2->val) ? - 1 : 1;
}
int group_list_dsc(const void* x1,const void* x2)
{
group_list_t *e1 = NULL,*e2 = NULL;
e1 = *((group_list_t**)x1);
e2 = *((group_list_t**)x2);
return (e1->val > e2->val) ? -1 : 1;
}
int weighted_degree_asc(const void* x1,const void* x2)
{
group_list_t *e1= NULL,*e2 = NULL;
e1 = *((group_list_t**)x1);
e2 = *((group_list_t**)x2);
return (e1->wg > e2->wg) ? 1 : -1;
}
int weighted_degree_dsc(const void* x1,const void* x2)
{
group_list_t *e1 = NULL,*e2 = NULL;
e1 = *((group_list_t**)x1);
e2 = *((group_list_t**)x2);
return (e1->wg > e2->wg) ? - 1 : 1;
}
int select_independent_groups(group_list_t **tab_group,int n,int arity,int M,double *best_val,
group_list_t **best_selection,int bound,double max_duration)
{
int i,j;
group_list_t **selection = NULL;
double val,duration;
CLOCK_T time1,time0;
if(verbose_level>=INFO){
for(i=0;i<n;i++){
for(j=0;j<arity;j++){
printf("%d ",tab_group[i]->tab[j]->id);
}
printf(" : %f\n",tab_group[i]->val);
}
}
selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M);
CLOCK(time0);
for( i = 0 ; i < MIN(bound,n) ; i++ ){
/* if(!(i%100)) {printf("%d/%d ",i, MIN(bound,n)); fflush(stdout);} */
selection[0] = tab_group[i];
val = tab_group[i]->val;
recurs_select_independent_groups(tab_group,i+1,n,arity,1,M,val,best_val,selection,best_selection);
if((!(i%5)) && (max_duration>0)){
CLOCK(time1);
duration = CLOCK_DIFF(time1,time0);
if(duration>max_duration){
FREE(selection);
return 1;
}
}
}
FREE(selection);
if(verbose_level>=INFO)
display_selection(best_selection,M,arity,*best_val);
return 0;
}
int select_independent_groups_by_largest_index(group_list_t **tab_group,int n,int arity,int M,double *best_val,group_list_t **best_selection,int bound,double max_duration)
{
int i,dec,nb_groups=0;
group_list_t **selection = NULL;
double val,duration;
CLOCK_T time1,time0;
selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M);
CLOCK(time0);
dec = MAX(n/10000,2);
for( i = n-1 ; i >= 0 ; i -= dec*dec){
selection[0] = tab_group[i];
val = tab_group[i]->val;
nb_groups += test_independent_groups(tab_group,i+1,n,arity,1,M,val,best_val,selection,best_selection);
if(verbose_level>=DEBUG)
printf("%d:%d\n",i,nb_groups);
if(nb_groups >= bound){
FREE(selection);
return 0;
}
if((!(i%5)) && (max_duration>0)){
CLOCK(time1);
duration=CLOCK_DIFF(time1,time0);
if(duration>max_duration){
FREE(selection);
return 1;
}
}
}
FREE(selection);
return 0;
}
void list_to_tab(group_list_t *list,group_list_t **tab,int n)
{
int i;
for( i = 0 ; i < n ; i++ ){
if(!list){
if(verbose_level>=CRITICAL)
fprintf(stderr,"Error not enough elements. Only %d on %d\n",i,n);
exit(-1);
}
tab[n-i-1] = list;
list = list->next;
}
if(list){
if(verbose_level>=DEBUG)
fprintf(stderr,"Error too many elements\n");
exit(-1);
}
}
void display_tab_group(group_list_t **tab, int n,int arity)
{
int i,j;
if(verbose_level<DEBUG)
return;
for( i = 0 ; i < n ; i++ ){
for( j = 0 ; j < arity ; j++ )
printf("%d ",tab[i]->tab[j]->id);
printf(": %.2f %.2f\n",tab[i]->val,tab[i]->wg);
}
}
int independent_tab(tree_t **tab1,tree_t **tab2,int n)
{
int i = 0,j = 0;
while( (i<n) && (j<n) ){
if(tab1[i]->id == tab2[j]->id)
return 0;
else if(tab1[i]->id > tab2[j]->id)
j++;
else
i++;
}
return 1;
}
void compute_weighted_degree(group_list_t **tab, int n,int arity)
{
int i,j;
for( i = 0 ; i < n ; i++)
tab[i]->sum_neighbour = 0;
for( i = 0 ; i < n ; i++ ){
/*printf("%d/%d=%f%%\n",i,n,(100.0*i)/n);*/
for( j = i+1 ; j < n ; j++ )
/*if(!independent_groups(&tab[i],1,tab[j],arity)){*/
if(!independent_tab(tab[i]->tab,tab[j]->tab,arity)){
tab[i]->sum_neighbour += tab[j]->val;
tab[j]->sum_neighbour += tab[i]->val;
}
tab[i]->wg = tab[i]->sum_neighbour/tab[i]->val;
if(tab[i]->sum_neighbour == 0)
tab[i]->wg = 0;
/*printf("%d:%f/%f=%f\n",i,tab[i]->sum_neighbour,tab[i]->val,tab[i]->wg);*/
}
}
/*
Very slow: explore all possibilities
aff_mat : the affiity matrix at the considered level (used to evaluate a grouping)
tab_node: array of the node to group
parent: node to which attached the computed group
id: current considered node of tab_node
arity: number of children of parent (i.e.) size of the group to compute
best_val: current value of th grouping
cur_group: current grouping
*/
void group(affinity_mat_t *aff_mat,tree_t *tab_node,tree_t *parent,int id,int arity, int n,double *best_val,tree_t **cur_group)
{
int N = aff_mat->order;
double val;
int i;
/*if we have found enough noide in the group*/
if( n == arity){
/* evaluate this group*/
val = eval_grouping(aff_mat,cur_group,arity);
/* If we improve compared to previous grouping: uodate the children of parent accordingly */
if( val < *best_val ){
*best_val = val;
for( i = 0 ; i < arity ; i++ )
parent->child[i] = cur_group[i];
parent->arity = arity;
}
return;
}
/*
If we need more node in the group
Continue to explore avilable nodes
*/
for( i = id+1 ; i < N ; i++ ){
/* If this node is allready in a group: skip it*/
if(tab_node[i].parent)
continue;
/*Otherwise, add it to the group at place n*/
cur_group[n] = &tab_node[i];
/*
printf("%d<-%d\n",n,i);
recursively add the next element to this group
*/
group(aff_mat,tab_node,parent,i,arity,n+1,best_val,cur_group);
}
}
/*
aff_mat : the affiity matrix at the considered level (used to evaluate a grouping)
tab_node: array of the node to group
parent: node to which attached the computed group
id: current considered node of tab_node
arity: number of children of parent (i.e.) size of the group to compute
best_val: current value of th grouping
cur_group: current grouping
N: size of tab and tab_node. i.e. number of nodes at the considered level
*/
void fast_group(affinity_mat_t *aff_mat,tree_t *tab_node,tree_t *parent,int id,int arity, int n,
double *best_val,tree_t **cur_group, int *nb_groups,int max_groups)
{
double val;
int i;
int N = aff_mat->order;
/*printf("Max groups=%d\n",max_groups);*/
/*if we have found enough node in the group*/
if( n == arity ){
(*nb_groups)++;
/*evaluate this group*/
val = eval_grouping(aff_mat,cur_group,arity);
/* If we improve compared to previous grouping: uodate the children of parent accordingly*/
if( val < *best_val ){
*best_val = val;
for( i = 0 ; i < arity ; i++ )
parent->child[i] = cur_group[i];
parent->arity = arity;
}
return;
}
/*
If we need more node in the group
Continue to explore avilable nodes
*/
for( i = id+1 ; i < N ; i++ ){
/* If this node is allready in a group: skip it*/
if(tab_node[i].parent)
continue;
/*Otherwise, add it to the group at place n */
cur_group[n] = &tab_node[i];
/*
printf("%d<-%d %d/%d\n",n,i,*nb_groups,max_groups);
exit(-1);
recursively add the next element to this group
*/
fast_group(aff_mat,tab_node,parent,i,arity,n+1,best_val,cur_group,nb_groups,max_groups);
if(*nb_groups > max_groups)
return;
}
}
void fast_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, int arity, int M,long int k)
{
tree_t **cur_group = NULL;
int l,i,nb_groups;
double best_val,val=0;
cur_group = (tree_t**)MALLOC(sizeof(tree_t*)*arity);
for( l = 0 ; l < M ; l++ ){
best_val = DBL_MAX;
nb_groups = 0;
/*printf("k%d/%d, k=%ld\n",l,M,k);*/
/* select the best greedy grouping among the 10 first one*/
/*fast_group(tab,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,N,&nb_groups,MAX(2,(int)(50-log2(k))-M/10));*/
fast_group(aff_mat,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,&nb_groups,MAX(1,(int)(50-CmiLog2(k))-M/10));
val += best_val;
for( i = 0 ; i < new_tab_node[l].arity ; i++ )
new_tab_node[l].child[i]->parent=&new_tab_node[l];
update_val(aff_mat,&new_tab_node[l]);
}
FREE(cur_group);
if(verbose_level>=INFO)
printf("val=%f\n",val);
/*exit(-1);*/
if(verbose_level>=INFO)
display_grouping(new_tab_node,M,arity,val);
}
int adjacency_asc(const void* x1,const void* x2)
{
adjacency_t *e1 = NULL,*e2 = NULL;
e1 = ((adjacency_t*)x1);
e2 = ((adjacency_t*)x2);
return (e1->val < e2->val) ? - 1 : 1;
}
int adjacency_dsc(const void* x1,const void* x2)
{
adjacency_t *e1 = NULL,*e2 = NULL;
e1 = ((adjacency_t*)x1);
e2 = ((adjacency_t*)x2);
return (e1->val > e2->val) ? -1 : 1;
}
void super_fast_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, int arity, int M)
{
double val = 0,duration;
adjacency_t *graph;
int i,j,e,l,nb_groups;
int N = aff_mat->order;
double **mat = aff_mat->mat;
assert( 2 == arity);
TIC;
graph = (adjacency_t*)MALLOC(sizeof(adjacency_t)*((N*N-N)/2));
e = 0;
for( i = 0 ; i < N ; i++ )
for( j = i+1 ; j < N ; j++){
graph[e].i = i;
graph[e].j = j;
graph[e].val = mat[i][j];
e++;
}
duration = TOC;
if(verbose_level>=DEBUG)
printf("linearization=%fs\n",duration);
assert( e == (N*N-N)/2);
TIC;
qsort(graph,e,sizeof(adjacency_t),adjacency_dsc);
duration = TOC;
if(verbose_level>=DEBUG)
printf("sorting=%fs\n",duration);
TIC;
TIC;
l = 0;
nb_groups = 0;
for( i = 0 ; (i < e) && (l < M) ; i++ )
if(try_add_edge(tab_node,&new_tab_node[l],arity,graph[i].i,graph[i].j,&nb_groups))
l++;
for( l = 0 ; l < M ; l++ ){
update_val(aff_mat,&new_tab_node[l]);
val += new_tab_node[l].val;
}
duration = TOC;
if(verbose_level>=DEBUG)
printf("Grouping=%fs\n",duration);
if(verbose_level>=DEBUG)
printf("val=%f\n",val);
display_grouping(new_tab_node,M,arity,val);
FREE(graph);
}
affinity_mat_t *build_cost_matrix(affinity_mat_t *aff_mat, double* obj_weight, double comm_speed)
{
double **mat = NULL, *sum_row;
double **old_mat;
double avg;
int i,j,N;
if(!obj_weight)
return aff_mat;
N = aff_mat->order;
old_mat = aff_mat -> mat;
mat = (double**)MALLOC(N*sizeof(double*));
for( i = 0 ; i < N ; i++ )
mat[i] = (double*)MALLOC(N*sizeof(double));
sum_row = (double*)CALLOC(N,sizeof(double));
avg = 0;
for( i = 0 ; i < N ; i++ )
avg += obj_weight[i];
avg /= N;
if(verbose_level>=DEBUG)
printf("avg=%f\n",avg);
for( i = 0 ; i < N ; i++ )
for( j = 0 ; j < N ; j++){
if( i == j )
mat[i][j] = 0;
else{
mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
sum_row[i] += mat[i][j];
}
}
return new_affinity_mat(mat,sum_row,N);
}
/*
aff_mat: affinity matrix at the considered level (use to evaluate a grouping)
tab_node: array of the node to group
new_tab_node: array of nodes at the next level (the parents of the node in tab_node once the grouping will be done).
arity: number of children of parent (i.e.) size of the group to compute
M: size of new_tab_node (i.e) the number of parents
*/
void group_nodes(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node, int arity, int M, double* obj_weigth, double comm_speed)
{
/*
N: size of tab and tab_node. i.e. number of nodes at the considered level
Hence we have: M*arity=N
*/
int N = aff_mat -> order;
tree_t **cur_group = NULL;
int j,l;
unsigned int n;
unsigned long int k;
group_list_t list,**best_selection = NULL,**tab_group = NULL;
double best_val,last_best;
int timeout;
affinity_mat_t *cost_mat = NULL; /*cost matrix taking into account the communiocation cost but also the weight of the object*/
double duration;
TIC;
/* might return aff_mat (if obj_weight==NULL): do not FREE this tab in this case*/
cost_mat = build_cost_matrix(aff_mat,obj_weigth,comm_speed);
k = choose(N,arity);
if(verbose_level>=INFO)
printf("Number of groups:%ld\n",k);
/* Todo: check if the depth is a criteria for speeding up the computation*/
/* if(k>30000||depth>5){*/
if( k > 30000 ) {
double duration;
TIC;
if( arity <= 2 ) {
/*super_fast_grouping(tab,tab_node,new_tab_node,arity,N,M,k);*/
if(verbose_level >= INFO )
printf("Bucket Grouping...\n");
bucket_grouping(cost_mat,tab_node,new_tab_node,arity,M);
} else {
if(verbose_level >= INFO)
printf("Fast Grouping...\n");
fast_grouping(cost_mat,tab_node,new_tab_node,arity,M,k);
}
duration = TOC;
if(verbose_level>=INFO)
printf("Fast grouping duration=%f\n",duration);
if(verbose_level>=DEBUG)
display_grouping(new_tab_node,M,arity,-1);
} else {
if(verbose_level>=INFO)
printf("Grouping nodes...\n");
list.next = NULL;
list.val = 0; /*number of elements in the list*/
cur_group = (tree_t**)MALLOC(sizeof(tree_t*)*arity);
best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M);
list_all_possible_groups(cost_mat,tab_node,0,arity,0,cur_group,&list);
n = (int)list.val;
assert( n == k );
tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*n);
list_to_tab(list.next,tab_group,n);
if(verbose_level>=INFO)
printf("List to tab done\n");
best_val = DBL_MAX;
/* perform the pack mapping fist*/
/* timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,1,0.1); */
timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,1,100);
if((verbose_level>=INFO) && timeout)
printf("Packed mapping timeout!\n");
/* give this mapping an exra credit (in general MPI application are made such that
neighbour process communicates more than distant ones) */
best_val /= 1.001;
/* best_val *= 1.001; */
if(verbose_level>=INFO)
printf("Packing computed\n");
/* perform a mapping trying to use group that cost less first*/
qsort(tab_group,n,sizeof(group_list_t*),group_list_asc);
last_best = best_val;
timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,10,0.1);
/* timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,n,0); */
if(verbose_level>=INFO){
if(timeout) {
printf("Cost less first timeout!\n");
} else if(last_best>best_val) {
printf("Cost less first Impoved solution\n");
}
printf("----\n");
}
/* perform a mapping trying to minimize the use of groups that cost a lot */
qsort(tab_group,n,sizeof(group_list_t*),group_list_dsc);
last_best=best_val;
timeout=select_independent_groups_by_largest_index(tab_group,n,arity,M,&best_val,best_selection,10,0.1);
if(verbose_level>=DEBUG) {
if(timeout)
printf("Cost most last timeout!\n");
else if(last_best>best_val)
printf("Cost most last impoved solution\n");
}
if( n < 10000 ){
/* perform a mapping in the weighted degree order */
if(verbose_level>=INFO)
printf("----WG----\n");
compute_weighted_degree(tab_group,n,arity);
if(verbose_level>=INFO)
printf("Weigted degree computed\n");
qsort(tab_group,n,sizeof(group_list_t*),weighted_degree_dsc);
/* display_tab_group(tab_group,n,arity);*/
last_best = best_val;
timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,10,0.1);
/* timeout = select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,n,0); */
if(verbose_level>=DEBUG){
if(timeout)
printf("WG timeout!\n");
else if(last_best>best_val)
printf("WG impoved solution\n");
}
}
qsort(best_selection,M,sizeof(group_list_t*),group_list_id);
for( l = 0 ; l < M ; l++ ){
for( j = 0 ; j < arity ; j++ ){
new_tab_node[l].child[j] = best_selection[l]->tab[j];
new_tab_node[l].child[j]->parent = &new_tab_node[l];
}
new_tab_node[l].arity = arity;
/* printf("arity=%d\n",new_tab_node[l].arity); */
update_val(cost_mat,&new_tab_node[l]);
}
delete_group_list((&list)->next);
FREE(best_selection);
FREE(tab_group);
FREE(cur_group);
}
if(cost_mat != aff_mat){
FREE_tab_double(cost_mat->mat,N);
FREE(cost_mat->sum_row);
FREE(cost_mat);
}
duration = TOC;
if(verbose_level>=INFO)
display_grouping(new_tab_node,M,arity,-1);
if(verbose_level>=INFO)
printf("Grouping done in %.4fs!\n",duration);
}
void complete_aff_mat(affinity_mat_t **aff_mat ,int N, int K)
{
double **old_mat = NULL,**new_mat = NULL; double *sum_row;
int M,i;
old_mat = (*aff_mat) -> mat;
M = N+K;
new_mat = (double**)MALLOC(M*sizeof(double*));
for( i = 0 ; i < M ; i++ )
new_mat[i] = (double*)CALLOC((M),sizeof(double));
sum_row = (double*) CALLOC(M,sizeof(double));
for( i = 0 ; i < N ; i++ ){
memcpy(new_mat[i],old_mat[i],N*sizeof(double));
sum_row[i] = (*aff_mat)->sum_row[i];
}
*aff_mat = new_affinity_mat(new_mat,sum_row,M);
}
void complete_obj_weight(double **tab,int N, int K)
{
double *old_tab = NULL,*new_tab = NULL,avg;
int M,i;
old_tab = *tab;
if(!old_tab)
return;
avg = 0;
for( i = 0 ; i < N ; i++ )
avg += old_tab[i];
avg /= N;
M = N+K;
new_tab = (double*)MALLOC(M*sizeof(double));
*tab = new_tab;
for( i = 0 ; i < M ; i++ )
if(i < N)
new_tab[i] = old_tab[i];
else
new_tab[i] = avg;
}
void create_dumb_tree(tree_t *node,int depth,tm_topology_t *topology)
{
tree_t **list_child = NULL;
int arity,i;
if( depth == topology->nb_levels-1) {
set_node(node,NULL,0,NULL,-1,0,NULL,depth);
return;
}
arity = topology->arity[depth];
assert(arity>0);
list_child = (tree_t**)CALLOC(arity,sizeof(tree_t*));
for( i = 0 ; i < arity ; i++ ){
list_child[i] = (tree_t*)MALLOC(sizeof(tree_t));
create_dumb_tree(list_child[i],depth+1,topology);
list_child[i]->parent = node;
list_child[i]->dumb = 1;
}
set_node(node,list_child,arity,NULL,-1,0,list_child[0], depth);
}
void complete_tab_node(tree_t **tab,int N, int K,int depth,tm_topology_t *topology)
{
tree_t *old_tab = NULL,*new_tab = NULL;
int M,i;
if( K == 0 )
return;
old_tab = *tab;
M = N+K;
new_tab = (tree_t*)MALLOC(M*sizeof(tree_t));
*tab = new_tab;
for( i = 0 ; i < M ; i++ )
if(i < N)
clone_tree(&new_tab[i],&old_tab[i]);
else{
create_dumb_tree(&new_tab[i],depth,topology);
new_tab[i].id = i;
}
/* do not suppress tab if you are at the depth-most level it will be used at the mapping stage */
FREE(old_tab);
}
void set_deb_tab_child(tree_t *tree, tree_t *child,int depth)
{
/* printf("depth=%d\t%p\t%p\n",depth,child,tree);*/
if( depth > 0 )
set_deb_tab_child(tree->tab_child,child,depth-1);
else
tree->tab_child=child;
}
/*
Build the tree of the matching. It is a bottom up algorithm: it starts from the bottom of the tree on proceed by decreasing the depth
It groups nodes of the matrix tab and link these groups to the nodes of the under level.
Then it calls recursively the function to prefrom the grouping at the above level.
tab_node: array of nodes of the under level.
aff_mat: local affinity matrix
arity: arity of the nodes of the above level.
depth: current depth of the algorithm
toplogy: description of the hardware topology.
constraints: set of constraints: core ids where to bind the processes
*/
tree_t *build_level_topology(tree_t *tab_node, affinity_mat_t *aff_mat,int arity,int depth,tm_topology_t *topology,
double *obj_weight, double *comm_speed)
{
/* N: number of nodes. Order of com_mat, size of obj_weight */
int N=aff_mat->order ;
int i,K=0,M; /*M = N/Arity: number the groups*/
tree_t *new_tab_node = NULL; /*array of node for this level (of size M): there will be linked to the nodes of tab_nodes*/
affinity_mat_t * new_aff_mat= NULL; /*New communication matrix (after grouyping nodes together)*/
tree_t *res = NULL; /*resulting tree*/
int completed = 0;
double speed; /* communication speed at this level*/
double *new_obj_weight = NULL;
double duration;
if( 0 == depth ){
if((1 == N) && (0 == depth))
return &tab_node[0];
else {
if(verbose_level >= CRITICAL)
fprintf(stderr,"Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n",N,depth);
exit(-1);
}
}
/* If the number of nodes does not divide the arity: we add K nodes */
if( N%arity != 0 ){
TIC;
K = arity*((N/arity)+1)-N;
/*printf("****N=%d arity=%d K=%d\n",N,arity,K); */
/*display_tab(tab,N);*/
/* add K rows and columns to comm_matrix*/
complete_aff_mat(&aff_mat,N,K);
/* add K element to the object weight*/
complete_obj_weight(&obj_weight,N,K);
/*display_tab(tab,N+K);*/
/* add a dumb tree to the K new "virtual nodes"*/
complete_tab_node(&tab_node,N,K,depth,topology);
completed = 1; /*flag this addition*/
N += K; /*increase the number of nodes accordingly*/
duration = TOC;
if(verbose_level >= INFO)
fprintf(stderr,"Completing matrix duration= %fs\n ", duration);
} /*display_tab(tab,N);*/
M = N/arity;
if(verbose_level >= INFO)
printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n",depth,N,M,arity);
TIC;
/*create the new nodes*/
new_tab_node = (tree_t*)MALLOC(sizeof(tree_t)*M);
/*intitialize each node*/
for( i = 0 ; i < M ; i++ ){
tree_t **list_child = NULL;
list_child = (tree_t**)CALLOC(arity,sizeof(tree_t*));
set_node(&new_tab_node[i],list_child,arity,NULL,i,0,tab_node,depth);
}
duration = TOC;
if(verbose_level >= INFO)
printf("New nodes creation= %fs\n ", duration);
/*Core of the algorithm: perfrom the grouping*/
if(comm_speed)
speed = comm_speed[depth];
else
speed = -1;
group_nodes(aff_mat, tab_node, new_tab_node, arity, M, obj_weight, speed);
TIC;
/*based on that grouping aggregate the communication matrix*/
new_aff_mat = aggregate_aff_mat(new_tab_node,aff_mat,M);
duration = TOC;
if(verbose_level >= INFO)
printf("Aggregate_com_mat= %fs\n", duration);
TIC;
/*based on that grouping aggregate the object weight matrix*/
new_obj_weight = aggregate_obj_weight(new_tab_node,obj_weight,M);
duration = TOC;
if(verbose_level >= INFO)
printf("Aggregate obj_weight= %fs\n ", duration);
/* set ID of virtual nodes to -1*/
for( i = N-K ; i < N ; i++ )
tab_node[i].id = -1;
/*
for(i=0;i<N;i++)
display_node(&tab_node[i]);
display_tab(new_com_mat,M);
*/
/* decrease depth and compute arity of the above level*/
depth--;
if(depth > 0)
arity = topology->arity[depth-1];
else
arity = 1;
/* assume all objects have the same arity*/
res = build_level_topology(new_tab_node, new_aff_mat, arity, depth,topology, new_obj_weight, comm_speed);
set_deb_tab_child(res,tab_node,depth);
/* if we have extended the matrix with zero, free the data here as they are local to this recursive step only*/
if(completed){
FREE_tab_double(aff_mat->mat,aff_mat->order);
FREE(aff_mat->sum_row);
FREE(aff_mat);
FREE(obj_weight);
}
FREE_tab_double(new_aff_mat->mat,new_aff_mat->order);
FREE(new_aff_mat->sum_row);
FREE(new_aff_mat);
FREE(new_obj_weight);
return res;
}
double speed(int depth)
{
/*
Bertha values
double tab[5]={21,9,4.5,2.5,0.001};
double tab[5]={1,1,1,1,1};
double tab[6]={100000,10000,1000,500,100,10};
*/
double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1};
return 1.0/tab[depth];
/*
return 10*log(depth+2);
return (depth+1);
return (long int)pow(100,depth);
*/
}
/* check the leaf numbering of the topology
this number must be between 0 and n-1 (the number of leaves)
teh number must all be different
However if a given leaf number is -1, it means that this
leaf cannot bee used for the mapping
The function returns the number of constraints (leaves that can be used)
and their numbers (in increasing order) in the array pointed by contraints
*/
int check_constraints(tm_topology_t *topology, int **constraints)
{
int j,i,n = nb_processing_units(topology);
int *tab_constraints = NULL, nb_constraints = 0;
int *tab_node = NULL;
int *count = NULL;
/* tab_node: array of core numbers.
tab_node[i]=-1 if this core is forbiden
numbering is such that
0<=tab_node[i]<n
and that there is only one core of a given number
*/
tab_node = topology->node_id[topology->nb_levels-1];
/* "count" counts the number of cores of a given number.
count[i]: number of cores of number i.
0<=count[i]<=1
*/
count = (int *)CALLOC(n,sizeof(int));
for( i = 0 ; i < n ; i++ )
if (tab_node[i] != -1){
if( (tab_node[i] >= 0) && (tab_node[i] < n)){
/* In the remaining, we assume that the core numbering is logical from 0 to n
so if tab_node[i]!=-1 this mean sthat we have to use core number i*/
count[i]++;
nb_constraints++;
}else{
if(verbose_level >= ERROR)
fprintf(stderr, "*** Error: Core numbering not between 0 and %d: tab_node[%d]=%d\n", n , i, tab_node[i]);
*constraints = NULL;
FREE(count);
return 0;
}
}
if(nb_constraints == 0){
FREE(count);
*constraints = NULL;
return 0;
}
tab_constraints = (int*) MALLOC(sizeof(int)*nb_constraints);
/* we can now use the "counting sort" to sort the constraint tab in increasing order in linear time*/
j = 0;
for( i = 0 ; i < n ; i++ )
if(count[i])
tab_constraints[j++] = i;
/* if the constraint_tab is not full, this means that some count[i]>1*/
if( j != nb_constraints ){
if(verbose_level >= ERROR)
fprintf(stderr,"*** Error: Duplicate numbering: j=%d, nb_constraints= %d\n",j, nb_constraints);
FREE(tab_constraints);
FREE(count);
*constraints = NULL;
return 0;
}
/* FREE local variables, assign result, return result*/
FREE(count);
*constraints = tab_constraints;
return nb_constraints;
}
affinity_mat_t * build_affinity_mat(double **mat, int order){
int i,j;
double *sum_row = (double*) CALLOC (order, sizeof(double));
for (i=0 ; i<order ; i++)
for (j=0 ; j<order ; j++)
sum_row[i] += mat[i][j];
return new_affinity_mat(mat,sum_row,order);
}
tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, double *obj_weight, double *comm_speed)
{
int depth,i;
tree_t *res = NULL,*tab_node = NULL;
affinity_mat_t *aff_mat;
tab_node = (tree_t*)MALLOC(sizeof(tree_t)*N);
depth = topology->nb_levels;
for( i = 0 ; i < N ; i++ )
set_node(&tab_node[i],NULL,0,NULL,i,0,NULL,depth);
aff_mat = build_affinity_mat(com_mat,N);
if(verbose_level >= INFO)
printf("nb_levels=%d\n",depth);
/* assume all objects have the same arity*/
res = build_level_topology(tab_node, aff_mat , topology->arity[depth-2], depth-1, topology, obj_weight, comm_speed);
if(verbose_level >= INFO)
printf("Build (top down) tree done!\n");
/* tell the system it is not a constraint tree, this is usefull for freeing pointers*/
res->constraint = 0;
FREE(aff_mat -> sum_row);
FREE(aff_mat);
return res;
}
tree_t * build_tree_from_topology(tm_topology_t *topology, double **com_mat, int N, double *obj_weight, double *com_speed)
{
int *constraints = NULL, nb_constraints;
tree_t * result;
verbose_level = get_verbose_level();
nb_constraints = check_constraints (topology, &constraints);
if(verbose_level>=INFO)
printf("nb_constraints = %d, N= %d; nb_processing units = %d\n",nb_constraints, N, nb_processing_units(topology));
if(N>nb_constraints){
if(verbose_level >= CRITICAL){
printf("Error : More processes (%d) than number of constraints (%d)!\n",N ,nb_constraints);
}
exit(-1);
}
if(verbose_level >= INFO){
printf("Com matrix size: %d\n",N);
printf("nb_constraints: %d\n",nb_constraints);
}
if(nb_constraints == nb_processing_units(topology))
{
nb_constraints = 0;
FREE(constraints);
}
if(nb_constraints){
if(verbose_level >= INFO){
printf("Partitionning with constraints\n");
}
result = kpartition_build_tree_from_topology(topology, com_mat, N, constraints, nb_constraints, obj_weight, com_speed);
FREE(constraints);
return result;
}
else{
if(verbose_level >= INFO){
printf("Partitionning without constraints\n");
}
return bottom_up_build_tree_from_topology(topology, com_mat, N, obj_weight, com_speed);
}
}