1
1

Merge pull request #725 from bosilca/treematch

Add a new topo module: Treematch
Этот коммит содержится в:
Jeff Squyres 2015-07-31 15:17:54 -04:00
родитель 8649a9f6ef e239de581b
Коммит 047eccef8d
34 изменённых файлов: 8400 добавлений и 1 удалений

5
README
Просмотреть файл

@ -1,7 +1,7 @@
Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
University Research and Technology
Corporation. All rights reserved.
Copyright (c) 2004-2007 The University of Tennessee and The University
Copyright (c) 2004-2015 The University of Tennessee and The University
of Tennessee Research Foundation. All rights
reserved.
Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@ -436,6 +436,9 @@ General Run-Time Support Notes
MPI Functionality and Features
------------------------------
- Rank reordering support is available using the TreeMatch library. It is activated
for the graph and dist_graph topologies.
- All MPI-3 functionality is supported.
- When using MPI deprecated functions, some compilers will emit

62
ompi/mca/topo/treematch/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,62 @@
#
# Copyright (c) 2011-2015 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2011-2015 INRIA. All rights reserved.
# Copyright (c) 2011-2015 Université Bordeaux 1
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
if topo_treematch_local
extra_treematch_files = treematch/tm_bucket.h \
treematch/tm_hwloc.h treematch/tm_mapping.h \
treematch/tm_timings.h treematch/tm_tree.h \
treematch/tm_kpartitioning.h treematch/uthash.h\
treematch/IntConstantInitializedVector.h \
treematch/tm_mt.h \
treematch/tm_thread_pool.h treematch/tm_verbose.h \
treematch/tm_malloc.h \
treematch/IntConstantInitializedVector.c \
treematch/tm_mt.c \
treematch/tm_thread_pool.c treematch/tm_verbose.c \
treematch/tm_malloc.c \
treematch/tm_mapping.c treematch/tm_timings.c \
treematch/tm_bucket.c treematch/tm_tree.c \
treematch/tm_hwloc.c treematch/tm_kpartitioning.c
endif
sources = \
topo_treematch.h \
topo_treematch_module.c \
topo_treematch_component.c \
topo_treematch_dist_graph_create.c $(extra_treematch_files)
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_topo_treematch_DSO
lib =
lib_sources =
component = mca_topo_treematch.la
component_sources = $(sources)
else
lib = libmca_topo_treematch.la
lib_sources = $(sources)
component =
component_sources =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_topo_treematch_la_SOURCES = $(component_sources)
mca_topo_treematch_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(lib)
libmca_topo_treematch_la_SOURCES = $(lib_sources)
libmca_topo_treematch_la_LDFLAGS = -module -avoid-version

87
ompi/mca/topo/treematch/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,87 @@
# -*- shell-script -*-
#
# Copyright (c) 2011-2015 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2011-2015 INRIA. All rights reserved.
# Copyright (c) 2011-2015 Universite Bordeaux 1
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_topo_treematch_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# -------------------------------------------
AC_DEFUN([MCA_ompi_topo_treematch_CONFIG], [
AC_REQUIRE([MCA_opal_hwloc_CONFIG_REQUIRE])
AC_ARG_WITH([treematch],
[AC_HELP_STRING([--with-treematch(=DIR)],
[Build TreeMatch topology support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])],
[],
[with_treematch=yes])
AC_ARG_WITH([treematch-include],
[AC_HELP_STRING([--with-treematch-include(=DIR)],
["Search for TreeMatch headers in DIR"])])
AC_ARG_WITH([treematch-libdir],
[AC_HELP_STRING([--with-treematch-libdir(=DIR)],
["Search for TreeMatch libraries in DIR"])])
treematch_files_local="no"
ompi_check_treematch_dir=$srcdir
ompi_check_treematch_libdir=""
ompi_check_treematch_happy="no"
AS_IF([test "x$with_treematch" != xno],
[AC_MSG_CHECKING([TreeMatch headers])
AS_IF([test "x$with_treematch_include" = x],
[AS_IF([test "x$with_treematch" = xyes],
[treematch_files_local="yes"
with_treematch_include=$OMPI_TOP_SRCDIR/ompi/mca/topo/treematch/treematch],
[with_treematch_include=$with_treematch/include])])
AS_IF([test -f $with_treematch_include/tm_tree.h],
[AS_IF([test "x$with_treematch" = xyes],
[AC_MSG_RESULT([in the source])],
[AC_MSG_RESULT([user provided])])
opal_check_treematch_dir=$with_treematch_include
ompi_check_treematch_happy="yes"],
[AC_MSG_ERROR([missing tm_tree.h (${with_treematch}:${with_treematch_include})])])])
AS_IF([test "$ompi_check_treematch_happy" = "yes"],
[AC_MSG_CHECKING([TreeMatch library])
OPAL_CHECK_WITHDIR([treematch], [$with_treematch_include], [tm_tree.h])
AS_IF([test "x$with_treematch_libdir" = x],
[AS_IF([test "x$with_treematch" != xyes],
[with_treematch_libdir=$with_treematch/lib]
[with_treematch_libdir=$OMPI_TOP_SRCDIR/ompi/mca/topo/treematch/treematch])])
AS_IF([test "x$treematch_files_local" = xno],
[OPAL_CHECK_WITHDIR([treematch-libdir], [$with_treematch_libdir], [libtreematch.*])
AS_IF([test "x$with_treematch" != xno -a "x$with_treematch" != xyes],
[AS_IF([test ! -z "$with_treematch" -a "$with_treematch" != "yes"],
[ompi_check_treematch_dir="$with_treematch"])
AS_IF([test ! -z "$with_treematch_libdir" -a "$with_treematch_libdir" != "yes"],
[ompi_check_treematch_libdir="$with_treematch_libdir"])
OPAL_CHECK_PACKAGE([topo_treematch],
[tm_tree.h],
[treematch],
[build_tree],
[],
[$with_treematch_include],
[$with_treematch_libdir],
[ompi_check_treematch_happy="yes"],
[ompi_check_treematch_happy="no"])],
[ompi_check_treematch_happy="no"])])])
AS_IF([test "$ompi_check_treematch_happy" = "yes"],
[$1],
[AS_IF([test ! -z "$with_treematch" -a "$with_treematch" != "no"],
[AC_MSG_ERROR([TreeMatch support requested but not found. Aborting])])
$2])
AC_CONFIG_FILES([ompi/mca/topo/treematch/Makefile])
AM_CONDITIONAL(topo_treematch_local,
[test "x$treematch_files_local" = "xyes"])
])

80
ompi/mca/topo/treematch/topo_treematch.h Обычный файл
Просмотреть файл

@ -0,0 +1,80 @@
/*
* Copyright (c) 2011-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2015 INRIA. All rights reserved.
* Copyright (c) 2011-2015 Bordeaux Polytechnic Institute
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_TOPO_UNTIY_H
#define MCA_TOPO_UNTIY_H
#include "ompi_config.h"
#include "ompi/mca/topo/topo.h"
/*
* ******************************************************************
* ******** functions which provide MCA interface comppliance *******
* ******************************************************************
* These functions are:
* - mca_topo_treematch_module_open
* - mca_topo_treematch_module_close
* - mca_topo_treematch_module_query
* - mca_topo_treematch_module_finalize
* These functions are always found on the mca_topo_treematch_module
* structure. They are the "meta" functions to ensure smooth op.
* ******************************************************************
*/
BEGIN_C_DECLS
/*
* Public component instance
*/
typedef struct mca_topo_treematch_component_2_2_0_t {
mca_topo_base_component_2_2_0_t super;
int reorder_mode;
} mca_topo_treematch_component_2_2_0_t;
OMPI_MODULE_DECLSPEC extern mca_topo_treematch_component_2_2_0_t
mca_topo_treematch_component;
/*
* A unique module class for the module so that we can both cache
* module-specific information on the module and have a
* module-specific constructor and destructor.
*/
typedef struct {
mca_topo_base_module_t super;
/* Modules can add their own information here */
} mca_topo_treematch_module_t;
OBJ_CLASS_DECLARATION(mca_topo_treematch_module_t);
/*
* Module functions
*/
int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* module,
ompi_communicator_t *comm_old,
int n, int nodes[],
int degrees[], int targets[],
int weights[],
struct ompi_info_t *info, int reorder,
ompi_communicator_t **newcomm);
/*
* ******************************************************************
* ************ functions implemented in this module end ************
* ******************************************************************
*/
END_C_DECLS
#endif /* MCA_TOPO_EXAMPLE_H */

Просмотреть файл

@ -0,0 +1,100 @@
/*
* Copyright (c) 2011-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2015 INRIA. All rights reserved.
* Copyright (c) 2011-2015 Université Bordeaux 1
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/topo/treematch/topo_treematch.h"
/*
* Public string showing the topo treematch module version number
*/
const char *mca_topo_treematch_component_version_string =
"Open MPI treematch topology MCA component version" OMPI_VERSION;
/*
* Local funtions
*/
static int init_query(bool enable_progress_threads, bool enable_mpi_threads);
static struct mca_topo_base_module_t *
comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type);
static int mca_topo_treematch_component_register(void);
/*
* Public component structure
*/
mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component =
{
{
{
MCA_TOPO_BASE_VERSION_2_2_0,
"treematch",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
NULL, /* component open */
NULL, /* component close */
NULL, /* component query */
mca_topo_treematch_component_register, /* component register */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
init_query,
comm_query
},
0 /* reorder: by default centralized */
};
static int init_query(bool enable_progress_threads, bool enable_mpi_threads)
{
if(NULL == opal_hwloc_topology) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OMPI_SUCCESS;
}
static struct mca_topo_base_module_t *
comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type)
{
mca_topo_treematch_module_t *treematch;
if( OMPI_COMM_DIST_GRAPH != type ) {
return NULL;
}
treematch = OBJ_NEW(mca_topo_treematch_module_t);
if (NULL == treematch) {
return NULL;
}
treematch->super.topo.dist_graph.dist_graph_create = mca_topo_treematch_dist_graph_create;
/* This component has very low priority -- it's an treematch, after
all! */
*priority = 42;
treematch->super.type = OMPI_COMM_DIST_GRAPH;
return &(treematch->super);
}
static int mca_topo_treematch_component_register(void)
{
(void)mca_base_component_var_register(&mca_topo_treematch_component.super.topoc_version,
"reorder_mode", "If set the reordering will be done in a partially distributed way (default=0). If partially-distributed only local knowledge will be used, possibly leading to less accurate reordering.", MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY, &mca_topo_treematch_component.reorder_mode);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,907 @@
/*
* Copyright (c) 2011-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2015 INRIA. All rights reserved.
* Copyright (c) 2012-2015 Bordeaux Poytechnic Institute
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/constants.h"
#if defined(OPAL_HAVE_HWLOC)
#include "opal/mca/hwloc/hwloc.h"
#endif /* defined(OPAL_HAVE_HWLOC) */
#include "ompi/mca/topo/treematch/topo_treematch.h"
#include "ompi/mca/topo/treematch/treematch/tm_mapping.h"
#include "ompi/mca/topo/base/base.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/mca/pml/pml.h"
#include "opal/mca/dstore/dstore.h"
#define ERR_EXIT(ERR) \
do { free(local_pattern); \
return (ERR); } \
while(0);
#define FALLBACK() \
do { free(nodes_roots); \
free(local_procs); \
hwloc_bitmap_free(set); \
goto fallback; } \
while(0);
#define MY_STRING_SIZE 64
/*#define __DEBUG__ 1 */
static int check_oversubscribing(int rank,
int num_nodes,
int num_objs_in_node,
int num_procs_in_node,
int *nodes_roots,
int *local_procs,
ompi_communicator_t *comm_old)
{
int oversubscribed = 0;
int local_oversub = 0;
int err;
if (rank == local_procs[0])
if(num_objs_in_node < num_procs_in_node)
local_oversub = 1;
if (rank == 0) {
MPI_Request *reqs = (MPI_Request *)calloc(num_nodes-1, sizeof(MPI_Request));
int *oversub = (int *)calloc(num_nodes, sizeof(int));
int i;
oversub[0] = local_oversub;
for(i = 1; i < num_nodes; i++)
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&oversub[i], 1, MPI_INT,
nodes_roots[i], 111, comm_old, &reqs[i-1]))))
return err;
if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes-1,
reqs, MPI_STATUSES_IGNORE)))
return err;
for(i = 0; i < num_nodes; i++)
oversubscribed += oversub[i];
free(oversub);
free(reqs);
} else {
if (rank == local_procs[0])
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&local_oversub, 1, MPI_INT, 0,
111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
return err;
}
if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_bcast(&oversubscribed, 1,
MPI_INT, 0, comm_old,
comm_old->c_coll.coll_bcast_module)))
return err;
return oversubscribed;
}
int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
ompi_communicator_t *comm_old,
int n, int nodes[],
int degrees[], int targets[],
int weights[],
struct ompi_info_t *info, int reorder,
ompi_communicator_t **newcomm)
{
int err;
if (OMPI_SUCCESS != (err = mca_topo_base_dist_graph_distribute(topo_module, comm_old,n,nodes,
degrees,targets,weights,
&(topo_module->mtc.dist_graph))))
return err;
if(!reorder) { /* No reorder. Create a new communicator, then */
/* jump out to attach the dist_graph and return */
fallback:
if( OMPI_SUCCESS == (err = ompi_comm_create(comm_old,
comm_old->c_local_group,
newcomm))){
/* Attach the dist_graph to the newly created communicator */
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
(*newcomm)->c_topo = topo_module;
(*newcomm)->c_topo->reorder = reorder;
}
return err;
} else { /* reorder == yes */
mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL;
ompi_proc_t *proc = NULL;
MPI_Request *reqs = NULL;
hwloc_cpuset_t set;
hwloc_obj_t object,root_obj;
hwloc_obj_t *tracker = NULL;
double *local_pattern = NULL;
int *vpids, *colors = NULL;
int *local_procs = NULL;
int *nodes_roots = NULL;
int *localrank_to_objnum = NULL;
int depth, effective_depth, obj_rank = -1;
int num_objs_in_node = 0;
int num_pus_in_node = 0;
int numlevels = 0;
int num_nodes = 0;
int num_procs_in_node = 0;
int rank, size;
int hwloc_err;
int oversubscribing_objs = 0;
int i, j, idx;
uint32_t val, *pval;
topo = topo_module->mtc.dist_graph;
rank = ompi_comm_rank(comm_old);
size = ompi_comm_size(comm_old);
#ifdef __DEBUG__
fprintf(stdout,"Process rank is : %i\n",rank);
#endif
/* Determine the number of local procs */
/* and the number of ext procs */
for(i = 0 ; i < size ; i++){
proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
if (( i == rank ) ||
(OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)))
num_procs_in_node++;
}
/* Get the ranks of the local procs in comm_old */
local_procs = (int *)malloc(num_procs_in_node * sizeof(int));
for(i = idx = 0 ; i < size ; i++){
proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
if (( i == rank ) ||
(OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)))
local_procs[idx++] = i;
}
vpids = (int *)malloc(size * sizeof(int));
colors = (int *)malloc(size * sizeof(int));
for(i = 0; i < size ; i++) {
proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
pval = &val;
OPAL_MODEX_RECV_VALUE(err, OPAL_DSTORE_NODEID, &(proc->super), &pval, OPAL_UINT32);
if( OPAL_SUCCESS != err ) {
opal_output(0, "Unable to extract peer %s nodeid from the modex.\n",
OMPI_NAME_PRINT(&(proc->super)));
vpids[i] = colors[i] = -1;
continue;
}
vpids[i] = colors[i] = (int)val;
}
#ifdef __DEBUG__
fprintf(stdout,"Process rank (2) is : %i \n",rank);
if ( 0 == rank ){
fprintf(stdout,"local_procs : ");
for(i = 0; i < num_procs_in_node ; i++)
fprintf(stdout," [%i:%i] ",i,local_procs[i]);
fprintf(stdout,"\n");
fprintf(stdout,"Vpids : ");
for(i = 0; i < size ; i++)
fprintf(stdout," [%i:%i] ",i,vpids[i]);
fprintf(stdout,"\n");
}
#endif
/* clean-up dupes in the array */
for(i = 0; i < size ; i++)
if ( -1 == vpids[i] )
continue;
else
for(j = i+1 ; j < size ; j++)
if( vpids[j] != -1 )
if( vpids[i] == vpids[j] )
vpids[j] = -1;
/* compute number of nodes */
for(i = 0; i < size ; i++)
if( vpids[i] != -1 )
num_nodes++;
/* compute local roots ranks in comm_old */
/* Only the global root needs to do this */
if(0 == rank) {
nodes_roots = (int *)calloc(num_nodes,sizeof(int));
for(i = idx = 0; i < size ; i++)
if( vpids[i] != -1 )
nodes_roots[idx++] = i;
#ifdef __DEBUG__
fprintf(stdout,"num nodes is %i\n",num_nodes);
fprintf(stdout,"Root nodes are :\n");
for(i = 0; i < num_nodes ; i++)
fprintf(stdout," [root %i : %i] ",i,nodes_roots[i]);
fprintf(stdout,"\n");
#endif
}
free(vpids);
/* Then, we need to know if the processes are bound */
/* We make the hypothesis that all processes are in */
/* the same state : all bound or none bound */
hwloc_err = hwloc_topology_init(&opal_hwloc_topology);
if (-1 == hwloc_err) goto fallback;
hwloc_err = hwloc_topology_load(opal_hwloc_topology);
if (-1 == hwloc_err) goto fallback;
root_obj = hwloc_get_root_obj(opal_hwloc_topology);
if (NULL == root_obj) goto fallback;
/* if cpubind returns an error, it will be full anyway */
set = hwloc_bitmap_alloc_full();
hwloc_get_cpubind(opal_hwloc_topology,set,0);
num_pus_in_node = hwloc_get_nbobjs_by_type(opal_hwloc_topology, HWLOC_OBJ_PU);
if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){
/* processes are not bound on the machine */
#ifdef __DEBUG__
if (0 == rank)
fprintf(stdout,">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n");
#endif /* __DEBUG__ */
/* we try to bind to cores or above objects if enough are present */
/* Not sure that cores are present in ALL nodes */
depth = hwloc_get_type_or_above_depth(opal_hwloc_topology,HWLOC_OBJ_CORE);
num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,depth);
/* Check for oversubscribing */
oversubscribing_objs = check_oversubscribing(rank,num_nodes,
num_objs_in_node,num_procs_in_node,
nodes_roots,local_procs,comm_old);
if(oversubscribing_objs) {
#ifdef __DEBUG__
fprintf(stdout,"Oversubscribing OBJ/CORES resources => Trying to use PUs \n");
#endif
int oversubscribed_pus = check_oversubscribing(rank,num_nodes,
num_pus_in_node,num_procs_in_node,
nodes_roots,local_procs,comm_old);
if (oversubscribed_pus){
#ifdef __DEBUG__
fprintf(stdout,"Oversubscribing PUs resources => Rank Reordering Impossible \n");
#endif
FALLBACK();
} else {
obj_rank = ompi_process_info.my_local_rank%num_pus_in_node;
effective_depth = hwloc_topology_get_depth(opal_hwloc_topology) - 1;
num_objs_in_node = num_pus_in_node;
#ifdef __DEBUG__
fprintf(stdout,"Process not bound : binding on PU#%i \n",obj_rank);
#endif
}
} else {
obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
effective_depth = depth;
object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank);
if( NULL == object) FALLBACK();
hwloc_bitmap_copy(set,object->cpuset);
hwloc_bitmap_singlify(set); /* we don't want the process to move */
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
if( -1 == hwloc_err) FALLBACK();
#ifdef __DEBUG__
fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
#endif
}
} else { /* the processes are already bound */
object = hwloc_get_obj_covering_cpuset(opal_hwloc_topology,set);
obj_rank = object->logical_index;
effective_depth = object->depth;
num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, effective_depth);
/* Check for oversubscribing */
oversubscribing_objs = check_oversubscribing(rank,num_nodes,
num_objs_in_node,num_procs_in_node,
nodes_roots,local_procs,comm_old);
if(oversubscribing_objs) {
#ifdef __DEBUG__
fprintf(stdout,"Oversubscribing OBJ/CORES resources => Rank Reordering Impossible\n");
#endif
FALLBACK();
}
#ifdef __DEBUG__
fprintf(stdout,"Process %i bound on OBJ #%i \n",rank,obj_rank);
fprintf(stdout,"=====> Num obj in node : %i | num pus in node : %i\n",num_objs_in_node,num_pus_in_node);
#endif
}
reqs = (MPI_Request *)calloc(num_procs_in_node-1,sizeof(MPI_Request));
if( rank == local_procs[0] ) {
/* we need to find the right elements of the hierarchy */
/* and remove the unneeded elements */
/* Only local masters need to do this */
int array_size = effective_depth + 1;
int *myhierarchy = (int *)calloc(array_size,sizeof(int));
for (i = 0; i < array_size ; i++)
myhierarchy[i] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,i);
numlevels = 1;
for (i = 1; i < array_size; i++)
if ((myhierarchy[i] != 0) && (myhierarchy[i] != myhierarchy[i-1]))
numlevels++;
tracker = (hwloc_obj_t *)calloc(numlevels,sizeof(hwloc_obj_t));
idx = 0;
tracker[idx++] = root_obj;
i = 1;
while (i < array_size){
if ( myhierarchy[i] != myhierarchy[i-1]) {
j = i;
while(myhierarchy[j] == myhierarchy[i])
if (++j > effective_depth)
break;
tracker[idx++] = hwloc_get_obj_by_depth(opal_hwloc_topology,j-1,0);
i = j;
} else i++;
}
free(myhierarchy);
#ifdef __DEBUG__
fprintf(stdout,">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n",
effective_depth,hwloc_topology_get_depth(opal_hwloc_topology),numlevels);
for(i = 0 ; i < numlevels ; i++)
fprintf(stdout,"tracker[%i] : arity %i | depth %i\n",i,tracker[i]->arity,tracker[i]->depth);
#endif
/* get the obj number */
localrank_to_objnum = (int *)calloc(num_procs_in_node,sizeof(int));
localrank_to_objnum[0] = obj_rank;
for(i = 1; i < num_procs_in_node; i++) {
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&localrank_to_objnum[i],1,MPI_INT,
local_procs[i],111, comm_old,&reqs[i-1]))))
return err;
}
if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_procs_in_node-1,
reqs,MPI_STATUSES_IGNORE)))
return err;
} else {
/* sending my core number to my local master on the node */
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&obj_rank, 1, MPI_INT, local_procs[0],
111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
return err;
}
free(reqs);
/* Centralized Reordering */
if (0 == mca_topo_treematch_component.reorder_mode) {
int *k = NULL;
int *obj_mapping = NULL;
int newrank = -1;
int num_objs_total = 0;
/* Gather comm pattern
* If weights have been provided take them in account. Otherwise rely
* solely on HWLOC information.
*/
if(0 == rank) {
fprintf(stderr,"========== Centralized Reordering ========= \n");
local_pattern = (double *)calloc(size*size,sizeof(double));
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
local_pattern[topo->in[i]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
local_pattern[topo->out[i]] += topo->outw[i];
if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE,
local_pattern, size, MPI_DOUBLE,
0, comm_old,
comm_old->c_coll.coll_gather_module)))
return err;
}
} else {
local_pattern = (double *)calloc(size,sizeof(double));
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
local_pattern[topo->in[i]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
local_pattern[topo->out[i]] += topo->outw[i];
if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_gather(local_pattern, size, MPI_DOUBLE,
NULL,0,0,
0, comm_old,
comm_old->c_coll.coll_gather_module)))
return err;
}
}
if( rank == local_procs[0]) {
tm_topology_t *tm_topology = NULL;
tm_topology_t *tm_opt_topology = NULL;
int *obj_to_rank_in_comm = NULL;
int *hierarchies = NULL;
int hierarchy[MAX_LEVELS+1];
int min;
/* create a table that derives the rank in comm_old from the object number */
obj_to_rank_in_comm = (int *)malloc(num_objs_in_node*sizeof(int));
for(i = 0 ; i < num_objs_in_node ; i++)
obj_to_rank_in_comm[i] = -1;
for(i = 0 ; i < num_objs_in_node ; i++) {
object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,i);
for( j = 0; j < num_procs_in_node ; j++ )
if(localrank_to_objnum[j] == (int)(object->logical_index))
break;
if(j == num_procs_in_node)
obj_to_rank_in_comm[i] = -1;
else {
int k;
for(k = 0; k < size ; k++)
if (k == local_procs[j])
break;
obj_to_rank_in_comm[i] = k;
}
}
/* the global master gathers info from local_masters */
if ( 0 == rank ) {
if ( num_nodes > 1 ) {
int *objs_per_node = NULL ;
int *displs = NULL;
objs_per_node = (int *)calloc(num_nodes,sizeof(int));
reqs = (MPI_Request *)calloc(num_nodes-1,sizeof(MPI_Request));
objs_per_node[0] = num_objs_in_node;
for(i = 1; i < num_nodes ; i++)
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(objs_per_node + i, 1, MPI_INT,
nodes_roots[i],111,comm_old,&reqs[i-1]))))
ERR_EXIT(err);
if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
reqs,MPI_STATUSES_IGNORE)))
ERR_EXIT(err);
for(i = 0; i < num_nodes; i++)
num_objs_total += objs_per_node[i];
obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
displs = (int *)calloc(num_objs_total,sizeof(int));
displs[0] = 0;
for(i = 1; i < num_nodes ; i++)
displs[i] = displs[i-1] + objs_per_node[i];
memset(reqs,0,(num_nodes-1)*sizeof(MPI_Request));
memcpy(obj_mapping,obj_to_rank_in_comm,objs_per_node[0]*sizeof(int));
for(i = 1; i < num_nodes ; i++)
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(obj_mapping + displs[i], objs_per_node[i], MPI_INT,
nodes_roots[i],111,comm_old,&reqs[i-1]))))
ERR_EXIT(err);
if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
reqs,MPI_STATUSES_IGNORE)))
ERR_EXIT(err);
free(displs);
free(objs_per_node);
} else {
/* if num_nodes == 1, then it's easy to get the obj mapping */
num_objs_total = num_objs_in_node;
obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
memcpy(obj_mapping,obj_to_rank_in_comm,num_objs_total*sizeof(int));
}
#ifdef __DEBUG__
fprintf(stdout,"Obj mapping : ");
for(i = 0 ; i < num_objs_total ; i++)
fprintf(stdout," [%i:%i] ",i,obj_mapping[i]);
fprintf(stdout,"\n");
#endif
} else {
if ( num_nodes > 1 ) {
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&num_objs_in_node, 1, MPI_INT,
0, 111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
ERR_EXIT(err);
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(obj_to_rank_in_comm, num_objs_in_node, MPI_INT,
0, 111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
ERR_EXIT(err);
}
}
free(obj_to_rank_in_comm);
for(i = 0 ; i < (MAX_LEVELS+1) ; i++)
hierarchy[i] = -1;
hierarchy[0] = numlevels;
assert(numlevels < MAX_LEVELS);
for(i = 0 ; i < hierarchy[0] ; i++)
hierarchy[i+1] = tracker[i]->arity;
if( 0 == rank ) {
hierarchies = (int *)malloc(num_nodes*(MAX_LEVELS+1)*sizeof(int));
for(i = 0 ; i < num_nodes*(MAX_LEVELS+1) ; i++)
hierarchies[i] = -1;
}
/* gather hierarchies iff more than 1 node! */
if ( num_nodes > 1 ) {
if(rank != 0) {
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(MAX_LEVELS+1), MPI_INT, 0,
111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
ERR_EXIT(err);
} else {
memset(reqs,0,(num_nodes-1)*sizeof(MPI_Request));
for(i = 1; i < num_nodes ; i++)
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(MAX_LEVELS+1),(MAX_LEVELS+1),MPI_INT,
nodes_roots[i],111,comm_old,&reqs[i-1])))){
free(hierarchies);
ERR_EXIT(err);
}
if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
reqs,MPI_STATUSES_IGNORE))) {
free(hierarchies);
ERR_EXIT(err);
}
free(reqs);
}
}
if ( 0 == rank ) {
tree_t *comm_tree = NULL;
double **comm_pattern = NULL;
int *matching = NULL;
memcpy(hierarchies,hierarchy,(MAX_LEVELS+1)*sizeof(int));
#ifdef __DEBUG__
fprintf(stdout,"hierarchies : ");
for(i = 0 ; i < num_nodes*(MAX_LEVELS+1) ; i++)
fprintf(stdout," [%i] ",hierarchies[i]);
fprintf(stdout,"\n");
#endif
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
tm_topology->nb_levels = hierarchies[0];
/* extract min depth */
for(i = 1 ; i < num_nodes ; i++)
if (hierarchies[i*(MAX_LEVELS+1)] < tm_topology->nb_levels)
tm_topology->nb_levels = hierarchies[i*(MAX_LEVELS+1)];
/* Crush levels in hierarchies too long (ie > tm_topology->nb_levels)*/
for(i = 0; i < num_nodes ; i++) {
int *base_ptr = hierarchies + i*(MAX_LEVELS+1) ;
int suppl = *base_ptr - tm_topology->nb_levels;
for(j = 1 ; j <= suppl ; j++)
*(base_ptr + tm_topology->nb_levels) *= *(base_ptr + tm_topology->nb_levels + j);
}
if( num_nodes > 1){
/* We aggregate all topos => +1 level!*/
tm_topology->nb_levels += 1;
tm_topology->arity = (int *)calloc(tm_topology->nb_levels,sizeof(int));
tm_topology->arity[0] = num_nodes;
for(i = 0; i < (tm_topology->nb_levels - 1); i++) {
min = *(hierarchies + 1 + i);
for(j = 1; j < num_nodes ; j++)
if( hierarchies[j*(MAX_LEVELS+1) + 1 + i] < min)
min = hierarchies[j*(MAX_LEVELS+1) + 1 + i];
tm_topology->arity[i+1] = min;
}
}else{
tm_topology->arity = (int *)calloc(tm_topology->nb_levels,sizeof(int));
for(i = 0; i < tm_topology->nb_levels; i++)
tm_topology->arity[i] = hierarchies[i+1];
}
free(hierarchies);
/* compute the number of processing elements */
tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels,sizeof(int));
tm_topology->nb_nodes[0] = 1;
for(i = 1 ; i < tm_topology->nb_levels; i++)
tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1]*tm_topology->arity[i-1];
comm_pattern = (double **)malloc(size*sizeof(double *));
for(i = 0 ; i < size ; i++)
comm_pattern[i] = local_pattern + i*size;
/* matrix needs to be symmetric */
for( i = 0 ; i < size ; i++)
for(j = i ; j < size ; j++) {
comm_pattern[i][j] += comm_pattern[j][i];
comm_pattern[j][i] = comm_pattern[i][j];
}
for( i = 0 ; i < size ; i++)
for(j = 0 ; j < size ; j++)
comm_pattern[i][j] /= 2;
#ifdef __DEBUG__
fprintf(stdout,"==== COMM PATTERN ====\n");
for( i = 0 ; i < size ; i++){
for(j = 0 ; j < size ; j++)
fprintf(stdout," %f ",comm_pattern[i][j]);
fprintf(stdout,"\n");
}
#endif
/* Build process id tab */
tm_topology->node_id = (int **)calloc(tm_topology->nb_levels,sizeof(int*));
for(i = 0; i < tm_topology->nb_levels ; i++) {
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i],sizeof(int));
for (j = 0; j < tm_topology->nb_nodes[i] ; j++)
tm_topology->node_id[i][j] = obj_mapping[j];
}
#ifdef __DEBUG__
for(i = 0; i < tm_topology->nb_levels ; i++) {
fprintf(stdout,"tm topo node_id for level [%i] : ",i);
for(j = 0 ; j < tm_topology->nb_nodes[i] ; j++)
fprintf(stdout," [%i:%i] ",j,obj_mapping[j]);
fprintf(stdout,"\n");
}
display_topology(tm_topology);
#endif
k = (int *)calloc(num_objs_total,sizeof(int));
matching = (int *)calloc(size,sizeof(int));
tm_opt_topology = optimize_topology(tm_topology);
comm_tree = build_tree_from_topology(tm_opt_topology,comm_pattern,size,NULL,NULL);
map_topology_simple(tm_opt_topology,comm_tree,matching,size,k);
#ifdef __DEBUG__
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
fprintf(stdout,"Rank permutation sigma/k : ");
for(i = 0 ; i < num_objs_total ; i++)
fprintf(stdout," [%i:%i] ",i,k[i]);
fprintf(stdout,"\n");
fprintf(stdout,"Matching : ");
for(i = 0 ; i < size ; i++)
fprintf(stdout," [%i:%i] ",i,matching[i]);
fprintf(stdout,"\n");
#endif
free(comm_pattern);
free(comm_tree);
free(matching);
free(obj_mapping);
for(i = 0 ; i < tm_topology->nb_levels ; i++)
free(tm_topology->node_id[i]);
free(tm_topology->node_id);
free(tm_topology->nb_nodes);
free(tm_topology->arity);
free(tm_topology);
FREE_topology(tm_opt_topology);
}
}
/* Todo : Bcast + group creation */
/* scatter the ranks */
if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_scatter(k, 1, MPI_INT,
&newrank, 1, MPI_INT,
0, comm_old,comm_old->c_coll.coll_scatter_module)))
ERR_EXIT(err);
if ( 0 == rank )
free(k);
/* this needs to be optimized but will do for now */
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, 0, newrank,newcomm, false)))
ERR_EXIT(err);
/* end of TODO */
/* Attach the dist_graph to the newly created communicator */
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
(*newcomm)->c_topo = topo_module;
(*newcomm)->c_topo->reorder = reorder;
} else { /* partially distributed reordering */
ompi_communicator_t *localcomm = NULL;
int *matching = (int *)calloc(num_procs_in_node,sizeof(int));
int *lrank_to_grank = (int *)calloc(num_procs_in_node,sizeof(int));
int *grank_to_lrank = (int *)calloc(size,sizeof(int));
hwloc_obj_t object;
opal_hwloc_locality_t locality;
char set_as_string[64];
opal_value_t kv;
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old,colors[rank],ompi_process_info.my_local_rank,&localcomm, false)))
return err;
for(i = 0 ; i < num_procs_in_node ; i++)
lrank_to_grank[i] = -1;
lrank_to_grank[ompi_process_info.my_local_rank] = rank;
for(i = 0 ; i < size ; i++)
grank_to_lrank[i] = -1;
if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_allgather(&rank,1,MPI_INT,
lrank_to_grank,1,MPI_INT,
localcomm,
localcomm->c_coll.coll_allgather_module)))
return err;
for(i = 0 ; i < num_procs_in_node ; i++)
grank_to_lrank[lrank_to_grank[i]] = i;
if (rank == local_procs[0]){
tm_topology_t *tm_topology = NULL;
tm_topology_t *tm_opt_topology = NULL;
tree_t *comm_tree = NULL;
double **comm_pattern = NULL;
#ifdef __DEBUG__
fprintf(stderr,"========== Partially Distributed Reordering ========= \n");
#endif
local_pattern = (double *)calloc(num_procs_in_node*num_procs_in_node,sizeof(double));
for(i = 0 ; i < num_procs_in_node*num_procs_in_node ; i++)
local_pattern[i] = 0.0;
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
if (grank_to_lrank[topo->in[i]] != -1)
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
if (grank_to_lrank[topo->out[i]] != -1)
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_gather(MPI_IN_PLACE, num_procs_in_node, MPI_DOUBLE,
local_pattern, num_procs_in_node, MPI_DOUBLE,
0,localcomm,
localcomm->c_coll.coll_gather_module)))
ERR_EXIT(err);
}
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
for(i = 0 ; i < num_procs_in_node ; i++){
comm_pattern[i] = (double *)calloc(num_procs_in_node,sizeof(double));
memcpy((void *)comm_pattern[i],(void *)(local_pattern + i*num_procs_in_node),num_procs_in_node*sizeof(double));
}
/* Matrix needs to be symmetric */
for( i = 0 ; i < num_procs_in_node ; i++)
for(j = i ; j < num_procs_in_node ; j++){
comm_pattern[i][j] += comm_pattern[j][i];
comm_pattern[j][i] = comm_pattern[i][j];
}
for( i = 0 ; i < num_procs_in_node ; i++)
for(j = 0 ; j < num_procs_in_node ; j++)
comm_pattern[i][j] /= 2;
#ifdef __DEBUG__
fprintf(stdout,"========== COMM PATTERN ============= \n");
for(i = 0 ; i < num_procs_in_node ; i++){
fprintf(stdout," %i : ",i);
for(j = 0; j < num_procs_in_node ; j++)
fprintf(stdout," %f ",comm_pattern[i][j]);
fprintf(stdout,"\n");
}
fprintf(stdout,"======================= \n");
#endif
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
tm_topology->nb_levels = numlevels;
tm_topology->arity = (int *)calloc(tm_topology->nb_levels,sizeof(int));
tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels,sizeof(int));
tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
for(i = 0 ; i < tm_topology->nb_levels ; i++){
int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,tracker[i]->depth);
tm_topology->nb_nodes[i] = nb_objs;
tm_topology->node_id[i] = (int*)malloc(sizeof(int)*nb_objs);
tm_topology->arity[i] = tracker[i]->arity;
for(j = 0 ; j < nb_objs ; j++)
tm_topology->node_id[i][j] = -1;
for(j = 0 ; j < nb_objs ; j++)
if ( j < num_procs_in_node )
tm_topology->node_id[i][j] = localrank_to_objnum[j];
}
#ifdef __DEBUG__
fprintf(stdout,"Levels in topo : %i | num procs in node : %i\n",tm_topology->nb_levels,num_procs_in_node);
for(i = 0; i < tm_topology->nb_levels ; i++){
fprintf(stdout,"Nb objs for level %i : %i | arity %i\n ",i,tm_topology->nb_nodes[i],tm_topology->arity[i]);
for(j = 0; j < tm_topology->nb_nodes[i] ; j++)
fprintf(stdout,"Obj id : %i |",tm_topology->node_id[i][j]);
fprintf(stdout,"\n");
}
display_topology(tm_topology);
#endif
tm_opt_topology = optimize_topology(tm_topology);
comm_tree = build_tree_from_topology(tm_opt_topology,comm_pattern,num_procs_in_node,NULL,NULL);
map_topology_simple(tm_opt_topology,comm_tree,matching,num_procs_in_node,NULL);
#ifdef __DEBUG__
fprintf(stdout,"Matching :");
for(i = 0 ; i < num_procs_in_node ; i++)
fprintf(stdout," %i ",matching[i]);
fprintf(stdout,"\n");
#endif
for(i = 0 ; i < num_procs_in_node ; i++)
free(comm_pattern[i]);
free(comm_pattern);
for(i = 0; i < tm_topology->nb_levels ; i++)
free(tm_topology->node_id[i]);
free(tm_topology->node_id);
free(tm_topology->nb_nodes);
free(tm_topology->arity);
free(tm_topology);
FREE_topology(tm_opt_topology);
} else {
local_pattern = (double *)calloc(num_procs_in_node,sizeof(double));
for(i = 0 ; i < num_procs_in_node ; i++)
local_pattern[i] = 0.0;
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
if (grank_to_lrank[topo->in[i]] != -1)
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
if (grank_to_lrank[topo->out[i]] != -1)
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_gather(local_pattern, num_procs_in_node, MPI_DOUBLE,
NULL,0,0,
0,localcomm,
localcomm->c_coll.coll_gather_module)))
ERR_EXIT(err);
}
}
if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_bcast(matching, num_procs_in_node,
MPI_INT,0,localcomm,
localcomm->c_coll.coll_bcast_module)))
ERR_EXIT(err);
object = hwloc_get_obj_by_depth(opal_hwloc_topology,
effective_depth,matching[ompi_process_info.my_local_rank]);
if( NULL == object) goto fallback;
hwloc_bitmap_copy(set,object->cpuset);
hwloc_bitmap_singlify(set);
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
if( -1 == hwloc_err) goto fallback;
/* Report new binding to ORTE/OPAL */
/* hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set); */
err = hwloc_bitmap_snprintf (set_as_string,64,set);
#ifdef __DEBUG__
fprintf(stdout,"Bitmap str size : %i\n",err);
#endif
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_DSTORE_CPUSET);
kv.type = OPAL_STRING;
kv.data.string = strdup(set_as_string);
(void)opal_dstore.store(opal_dstore_internal, (opal_process_name_t*)ORTE_PROC_MY_NAME, &kv);
OBJ_DESTRUCT(&kv);
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
orte_process_info.cpuset,set_as_string);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_DSTORE_LOCALITY);
kv.type = OPAL_UINT16;
kv.data.uint16 = locality;
(void)opal_dstore.store(opal_dstore_internal, (opal_process_name_t*)ORTE_PROC_MY_NAME, &kv);
OBJ_DESTRUCT(&kv);
if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old,
comm_old->c_local_group,
newcomm))){
ERR_EXIT(err);
} else {
/* Attach the dist_graph to the newly created communicator */
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
(*newcomm)->c_topo = topo_module;
(*newcomm)->c_topo->reorder = reorder;
}
free(matching);
free(grank_to_lrank);
free(lrank_to_grank);
} /* distributed reordering end */
if(rank == local_procs[0])
free(tracker);
free(nodes_roots);
free(local_procs);
free(local_pattern);
free(localrank_to_objnum);
free(colors);
hwloc_bitmap_free(set);
} /* reorder == yes */
return err;
}

Просмотреть файл

@ -0,0 +1,45 @@
/*
* Copyright (c) 2011-2015 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2015 INRIA. All rights reserved.
* Copyright (c) 2011-2015 Université Bordeaux 1
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include "mpi.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/topo/topo.h"
#include "ompi/mca/topo/base/base.h"
#include "ompi/mca/topo/treematch/topo_treematch.h"
/*
* Local functions
*/
static void treematch_module_constructor(mca_topo_treematch_module_t *u);
static void treematch_module_destructor(mca_topo_treematch_module_t *u);
OBJ_CLASS_INSTANCE(mca_topo_treematch_module_t, mca_topo_base_module_t,
treematch_module_constructor, treematch_module_destructor);
static void treematch_module_constructor(mca_topo_treematch_module_t *u)
{
mca_topo_base_module_t *m = &(u->super);
memset(&m->topo, 0, sizeof(m->topo));
}
static void treematch_module_destructor(mca_topo_treematch_module_t *u)
{
/* Do whatever is necessary to clean up / destroy the module */
}

8
ompi/mca/topo/treematch/treematch/COPYING Обычный файл
Просмотреть файл

@ -0,0 +1,8 @@
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of Inria nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Просмотреть файл

@ -0,0 +1,61 @@
#include <stdlib.h>
#include <stdio.h>
#include "IntConstantInitializedVector.h"
int intCIV_isInitialized(int_CIVector * v, int i)
{
if(v->top == 0)
return 0;
if(v->from[i] >= 0)
if(v->from[i] < v->top && v->to[v->from[i]] == i)
return 1;
return 0;
}
void intCIV_init(int_CIVector * v, int size, int init_value)
{
v->init_value = init_value;
v->size = size;
v->top = 0;
v->to = malloc(sizeof(int)*size);
v->from = malloc(sizeof(int)*size);
v->vec = malloc(sizeof(int)*size);
}
void intCIV_exit(int_CIVector * v)
{
free(v->to);
free(v->from);
free(v->vec);
}
int intCIV_set(int_CIVector * v, int i, int val)
{
if(v == NULL)
return -1;
if(i < 0 || i >= v->size)
return -1;
if(!intCIV_isInitialized(v,i))
{
v->from[i] = v->top;
v->to[v->top] = i;
v->top++;
}
v->vec[i] = val;
return 0;
}
int intCIV_get(int_CIVector * v, int i)
{
if(v == NULL)
return -1;
if(i < 0 || i >= v->size)
return -1;
if(intCIV_isInitialized(v,i))
return v->vec[i];
return v->init_value;
}

Просмотреть файл

@ -0,0 +1,16 @@
#ifndef INTEGER_CONSTANT_INITIALIZED_VECTOR
#define INTEGER_CONSTANT_INITIALIZED_VECTOR
typedef struct int_CIVector_
{
int init_value, size, top, *to, *from, *vec;
} int_CIVector;
int intCIV_isInitialized(int_CIVector * v, int i);
void intCIV_init(int_CIVector * v, int size, int init_value);
void intCIV_exit(int_CIVector * v);
int intCIV_set(int_CIVector * v, int i, int val);
int intCIV_get(int_CIVector * v, int i);
#endif /*INTEGER_CONSTANT_INITIALIZED_VECTOR*/

515
ompi/mca/topo/treematch/treematch/LICENSE Обычный файл
Просмотреть файл

@ -0,0 +1,515 @@
CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
Notice
This Agreement is a Free Software license agreement that is the result
of discussions between its authors in order to ensure compliance with
the two main principles guiding its drafting:
* firstly, compliance with the principles governing the distribution
of Free Software: access to source code, broad rights granted to
users,
* secondly, the election of a governing law, French law, with which
it is conformant, both as regards the law of torts and
intellectual property law, and the protection that it offers to
both authors and holders of the economic rights over software.
The authors of the CeCILL-B (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
license are:
Commissariat р l'Energie Atomique - CEA, a public scientific, technical
and industrial research establishment, having its principal place of
business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
Centre National de la Recherche Scientifique - CNRS, a public scientific
and technological establishment, having its principal place of business
at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
Institut National de Recherche en Informatique et en Automatique -
INRIA, a public scientific and technological establishment, having its
principal place of business at Domaine de Voluceau, Rocquencourt, BP
105, 78153 Le Chesnay cedex, France.
Preamble
This Agreement is an open source software license intended to give users
significant freedom to modify and redistribute the software licensed
hereunder.
The exercising of this freedom is conditional upon a strong obligation
of giving credits for everybody that distributes a software
incorporating a software ruled by the current license so as all
contributions to be properly identified and acknowledged.
In consideration of access to the source code and the rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors only have limited liability.
In this respect, the risks associated with loading, using, modifying
and/or developing or reproducing the software by the user are brought to
the user's attention, given its Free Software status, which may make it
complicated to use, with the result that its use is reserved for
developers and experienced professionals having in-depth computer
knowledge. Users are therefore encouraged to load and test the
suitability of the software as regards their requirements in conditions
enabling the security of their systems and/or data to be ensured and,
more generally, to use and operate it in the same conditions of
security. This Agreement may be freely reproduced and published,
provided it is not altered, and that no provisions are either added or
removed herefrom.
This Agreement may apply to any or all software for which the holder of
the economic rights decides to submit the use thereof to its provisions.
Article 1 - DEFINITIONS
For the purpose of this Agreement, when the following expressions
commence with a capital letter, they shall have the following meaning:
Agreement: means this license agreement, and its possible subsequent
versions and annexes.
Software: means the software in its Object Code and/or Source Code form
and, where applicable, its documentation, "as is" when the Licensee
accepts the Agreement.
Initial Software: means the Software in its Source Code and possibly its
Object Code form and, where applicable, its documentation, "as is" when
it is first distributed under the terms and conditions of the Agreement.
Modified Software: means the Software modified by at least one
Contribution.
Source Code: means all the Software's instructions and program lines to
which access is required so as to modify the Software.
Object Code: means the binary files originating from the compilation of
the Source Code.
Holder: means the holder(s) of the economic rights over the Initial
Software.
Licensee: means the Software user(s) having accepted the Agreement.
Contributor: means a Licensee having made at least one Contribution.
Licensor: means the Holder, or any other individual or legal entity, who
distributes the Software under the Agreement.
Contribution: means any or all modifications, corrections, translations,
adaptations and/or new functions integrated into the Software by any or
all Contributors, as well as any or all Internal Modules.
Module: means a set of sources files including their documentation that
enables supplementary functions or services in addition to those offered
by the Software.
External Module: means any or all Modules, not derived from the
Software, so that this Module and the Software run in separate address
spaces, with one calling the other when they are run.
Internal Module: means any or all Module, connected to the Software so
that they both execute in the same address space.
Parties: mean both the Licensee and the Licensor.
These expressions may be used both in singular and plural form.
Article 2 - PURPOSE
The purpose of the Agreement is the grant by the Licensor to the
Licensee of a non-exclusive, transferable and worldwide license for the
Software as set forth in Article 5 hereinafter for the whole term of the
protection granted by the rights over said Software.
Article 3 - ACCEPTANCE
3.1 The Licensee shall be deemed as having accepted the terms and
conditions of this Agreement upon the occurrence of the first of the
following events:
* (i) loading the Software by any or all means, notably, by
downloading from a remote server, or by loading from a physical
medium;
* (ii) the first time the Licensee exercises any of the rights
granted hereunder.
3.2 One copy of the Agreement, containing a notice relating to the
characteristics of the Software, to the limited warranty, and to the
fact that its use is restricted to experienced users has been provided
to the Licensee prior to its acceptance as set forth in Article 3.1
hereinabove, and the Licensee hereby acknowledges that it has read and
understood it.
Article 4 - EFFECTIVE DATE AND TERM
4.1 EFFECTIVE DATE
The Agreement shall become effective on the date when it is accepted by
the Licensee as set forth in Article 3.1.
4.2 TERM
The Agreement shall remain in force for the entire legal term of
protection of the economic rights over the Software.
Article 5 - SCOPE OF RIGHTS GRANTED
The Licensor hereby grants to the Licensee, who accepts, the following
rights over the Software for any or all use, and for the term of the
Agreement, on the basis of the terms and conditions set forth hereinafter.
Besides, if the Licensor owns or comes to own one or more patents
protecting all or part of the functions of the Software or of its
components, the Licensor undertakes not to enforce the rights granted by
these patents against successive Licensees using, exploiting or
modifying the Software. If these patents are transferred, the Licensor
undertakes to have the transferees subscribe to the obligations set
forth in this paragraph.
5.1 RIGHT OF USE
The Licensee is authorized to use the Software, without any limitation
as to its fields of application, with it being hereinafter specified
that this comprises:
1. permanent or temporary reproduction of all or part of the Software
by any or all means and in any or all form.
2. loading, displaying, running, or storing the Software on any or
all medium.
3. entitlement to observe, study or test its operation so as to
determine the ideas and principles behind any or all constituent
elements of said Software. This shall apply when the Licensee
carries out any or all loading, displaying, running, transmission
or storage operation as regards the Software, that it is entitled
to carry out hereunder.
5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
The right to make Contributions includes the right to translate, adapt,
arrange, or make any or all modifications to the Software, and the right
to reproduce the resulting software.
The Licensee is authorized to make any or all Contributions to the
Software provided that it includes an explicit notice that it is the
author of said Contribution and indicates the date of the creation thereof.
5.3 RIGHT OF DISTRIBUTION
In particular, the right of distribution includes the right to publish,
transmit and communicate the Software to the general public on any or
all medium, and by any or all means, and the right to market, either in
consideration of a fee, or free of charge, one or more copies of the
Software by any means.
The Licensee is further authorized to distribute copies of the modified
or unmodified Software to third parties according to the terms and
conditions set forth hereinafter.
5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
The Licensee is authorized to distribute true copies of the Software in
Source Code or Object Code form, provided that said distribution
complies with all the provisions of the Agreement and is accompanied by:
1. a copy of the Agreement,
2. a notice relating to the limitation of both the Licensor's
warranty and liability as set forth in Articles 8 and 9,
and that, in the event that only the Object Code of the Software is
redistributed, the Licensee allows effective access to the full Source
Code of the Software at a minimum during the entire period of its
distribution of the Software, it being understood that the additional
cost of acquiring the Source Code shall not exceed the cost of
transferring the data.
5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
If the Licensee makes any Contribution to the Software, the resulting
Modified Software may be distributed under a license agreement other
than this Agreement subject to compliance with the provisions of Article
5.3.4.
5.3.3 DISTRIBUTION OF EXTERNAL MODULES
When the Licensee has developed an External Module, the terms and
conditions of this Agreement do not apply to said External Module, that
may be distributed under a separate license agreement.
5.3.4 CREDITS
Any Licensee who may distribute a Modified Software hereby expressly
agrees to:
1. indicate in the related documentation that it is based on the
Software licensed hereunder, and reproduce the intellectual
property notice for the Software,
2. ensure that written indications of the Software intended use,
intellectual property notice and license hereunder are included in
easily accessible format from the Modified Software interface,
3. mention, on a freely accessible website describing the Modified
Software, at least throughout the distribution term thereof, that
it is based on the Software licensed hereunder, and reproduce the
Software intellectual property notice,
4. where it is distributed to a third party that may distribute a
Modified Software without having to make its source code
available, make its best efforts to ensure that said third party
agrees to comply with the obligations set forth in this Article .
If the Software, whether or not modified, is distributed with an
External Module designed for use in connection with the Software, the
Licensee shall submit said External Module to the foregoing obligations.
5.3.5 COMPATIBILITY WITH THE CeCILL AND CeCILL-C LICENSES
Where a Modified Software contains a Contribution subject to the CeCILL
license, the provisions set forth in Article 5.3.4 shall be optional.
A Modified Software may be distributed under the CeCILL-C license. In
such a case the provisions set forth in Article 5.3.4 shall be optional.
Article 6 - INTELLECTUAL PROPERTY
6.1 OVER THE INITIAL SOFTWARE
The Holder owns the economic rights over the Initial Software. Any or
all use of the Initial Software is subject to compliance with the terms
and conditions under which the Holder has elected to distribute its work
and no one shall be entitled to modify the terms and conditions for the
distribution of said Initial Software.
The Holder undertakes that the Initial Software will remain ruled at
least by this Agreement, for the duration set forth in Article 4.2.
6.2 OVER THE CONTRIBUTIONS
The Licensee who develops a Contribution is the owner of the
intellectual property rights over this Contribution as defined by
applicable law.
6.3 OVER THE EXTERNAL MODULES
The Licensee who develops an External Module is the owner of the
intellectual property rights over this External Module as defined by
applicable law and is free to choose the type of agreement that shall
govern its distribution.
6.4 JOINT PROVISIONS
The Licensee expressly undertakes:
1. not to remove, or modify, in any manner, the intellectual property
notices attached to the Software;
2. to reproduce said notices, in an identical manner, in the copies
of the Software modified or not.
The Licensee undertakes not to directly or indirectly infringe the
intellectual property rights of the Holder and/or Contributors on the
Software and to take, where applicable, vis-р-vis its staff, any and all
measures required to ensure respect of said intellectual property rights
of the Holder and/or Contributors.
Article 7 - RELATED SERVICES
7.1 Under no circumstances shall the Agreement oblige the Licensor to
provide technical assistance or maintenance services for the Software.
However, the Licensor is entitled to offer this type of services. The
terms and conditions of such technical assistance, and/or such
maintenance, shall be set forth in a separate instrument. Only the
Licensor offering said maintenance and/or technical assistance services
shall incur liability therefor.
7.2 Similarly, any Licensor is entitled to offer to its licensees, under
its sole responsibility, a warranty, that shall only be binding upon
itself, for the redistribution of the Software and/or the Modified
Software, under terms and conditions that it is free to decide. Said
warranty, and the financial terms and conditions of its application,
shall be subject of a separate instrument executed between the Licensor
and the Licensee.
Article 8 - LIABILITY
8.1 Subject to the provisions of Article 8.2, the Licensee shall be
entitled to claim compensation for any direct loss it may have suffered
from the Software as a result of a fault on the part of the relevant
Licensor, subject to providing evidence thereof.
8.2 The Licensor's liability is limited to the commitments made under
this Agreement and shall not be incurred as a result of in particular:
(i) loss due the Licensee's total or partial failure to fulfill its
obligations, (ii) direct or consequential loss that is suffered by the
Licensee due to the use or performance of the Software, and (iii) more
generally, any consequential loss. In particular the Parties expressly
agree that any or all pecuniary or business loss (i.e. loss of data,
loss of profits, operating loss, loss of customers or orders,
opportunity cost, any disturbance to business activities) or any or all
legal proceedings instituted against the Licensee by a third party,
shall constitute consequential loss and shall not provide entitlement to
any or all compensation from the Licensor.
Article 9 - WARRANTY
9.1 The Licensee acknowledges that the scientific and technical
state-of-the-art when the Software was distributed did not enable all
possible uses to be tested and verified, nor for the presence of
possible defects to be detected. In this respect, the Licensee's
attention has been drawn to the risks associated with loading, using,
modifying and/or developing and reproducing the Software which are
reserved for experienced users.
The Licensee shall be responsible for verifying, by any or all means,
the suitability of the product for its requirements, its good working
order, and for ensuring that it shall not cause damage to either persons
or properties.
9.2 The Licensor hereby represents, in good faith, that it is entitled
to grant all the rights over the Software (including in particular the
rights set forth in Article 5).
9.3 The Licensee acknowledges that the Software is supplied "as is" by
the Licensor without any other express or tacit warranty, other than
that provided for in Article 9.2 and, in particular, without any warranty
as to its commercial value, its secured, safe, innovative or relevant
nature.
Specifically, the Licensor does not warrant that the Software is free
from any error, that it will operate without interruption, that it will
be compatible with the Licensee's own equipment and software
configuration, nor that it will meet the Licensee's requirements.
9.4 The Licensor does not either expressly or tacitly warrant that the
Software does not infringe any third party intellectual property right
relating to a patent, software or any other property right. Therefore,
the Licensor disclaims any and all liability towards the Licensee
arising out of any or all proceedings for infringement that may be
instituted in respect of the use, modification and redistribution of the
Software. Nevertheless, should such proceedings be instituted against
the Licensee, the Licensor shall provide it with technical and legal
assistance for its defense. Such technical and legal assistance shall be
decided on a case-by-case basis between the relevant Licensor and the
Licensee pursuant to a memorandum of understanding. The Licensor
disclaims any and all liability as regards the Licensee's use of the
name of the Software. No warranty is given as regards the existence of
prior rights over the name of the Software or as regards the existence
of a trademark.
Article 10 - TERMINATION
10.1 In the event of a breach by the Licensee of its obligations
hereunder, the Licensor may automatically terminate this Agreement
thirty (30) days after notice has been sent to the Licensee and has
remained ineffective.
10.2 A Licensee whose Agreement is terminated shall no longer be
authorized to use, modify or distribute the Software. However, any
licenses that it may have granted prior to termination of the Agreement
shall remain valid subject to their having been granted in compliance
with the terms and conditions hereof.
Article 11 - MISCELLANEOUS
11.1 EXCUSABLE EVENTS
Neither Party shall be liable for any or all delay, or failure to
perform the Agreement, that may be attributable to an event of force
majeure, an act of God or an outside cause, such as defective
functioning or interruptions of the electricity or telecommunications
networks, network paralysis following a virus attack, intervention by
government authorities, natural disasters, water damage, earthquakes,
fire, explosions, strikes and labor unrest, war, etc.
11.2 Any failure by either Party, on one or more occasions, to invoke
one or more of the provisions hereof, shall under no circumstances be
interpreted as being a waiver by the interested Party of its right to
invoke said provision(s) subsequently.
11.3 The Agreement cancels and replaces any or all previous agreements,
whether written or oral, between the Parties and having the same
purpose, and constitutes the entirety of the agreement between said
Parties concerning said purpose. No supplement or modification to the
terms and conditions hereof shall be effective as between the Parties
unless it is made in writing and signed by their duly authorized
representatives.
11.4 In the event that one or more of the provisions hereof were to
conflict with a current or future applicable act or legislative text,
said act or legislative text shall prevail, and the Parties shall make
the necessary amendments so as to comply with said act or legislative
text. All other provisions shall remain effective. Similarly, invalidity
of a provision of the Agreement, for any reason whatsoever, shall not
cause the Agreement as a whole to be invalid.
11.5 LANGUAGE
The Agreement is drafted in both French and English and both versions
are deemed authentic.
Article 12 - NEW VERSIONS OF THE AGREEMENT
12.1 Any person is authorized to duplicate and distribute copies of this
Agreement.
12.2 So as to ensure coherence, the wording of this Agreement is
protected and may only be modified by the authors of the License, who
reserve the right to periodically publish updates or new versions of the
Agreement, each with a separate number. These subsequent versions may
address new issues encountered by Free Software.
12.3 Any Software distributed under a given version of the Agreement may
only be subsequently distributed under the same version of the Agreement
or a subsequent version.
Article 13 - GOVERNING LAW AND JURISDICTION
13.1 The Agreement is governed by French law. The Parties agree to
endeavor to seek an amicable solution to any disagreements or disputes
that may arise during the performance of the Agreement.
13.2 Failing an amicable solution within two (2) months as from their
occurrence, and unless emergency proceedings are necessary, the
disagreements or disputes shall be referred to the Paris Courts having
jurisdiction, by the more diligent Party.
Version 1.0 dated 2006-09-05.

56
ompi/mca/topo/treematch/treematch/tgt_map.c Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
//#include "tm_hwloc.h"
#include "tm_tree.h"
#include "tm_mapping.h"
#include "tm_timings.h"
int main(int argc, char**argv){;
tree_t *comm_tree=NULL;
double **comm,**arch;
tm_topology_t *topology;
int nb_processes,nb_cores;
int *sol,*k;
if(argc<3){
fprintf(stderr,"Usage: %s <Architecture tgt> <communication partern file>\n",argv[0]);
return -1;
}
topology=tgt_to_tm(argv[1],&arch);
optimize_topology(&topology);
nb_processes=build_comm(argv[2],&comm);
sol=(int*)MALLOC(sizeof(int)*nb_processes);
nb_cores=nb_processing_units(topology);
k=(int*)MALLOC(sizeof(int)*nb_cores);
// TreeMatchMapping(nb_processes,nb_cores,comm,sol);
if(nb_processes>nb_cores){
fprintf(stderr,"Error: to many processes (%d) for this topology (%d nodes)\n",nb_processes,nb_cores);
exit(-1);
}
TIC;
comm_tree=build_tree_from_topology(topology,comm,nb_processes,NULL,NULL);
map_topology_simple(topology,comm_tree,sol,k);
double duration=TOC;
printf("mapping duration: %f\n",duration);
printf("TreeMatch: ");
print_sol_inv(nb_processes,sol,comm,arch);
//print_1D_tab(k,nb_cores);
// display_other_heuristics(topology,nb_processes,comm,arch);
//display_tab(arch,nb_cores);
FREE_topology(topology);
//FREE_tree(comm_tree);
FREE(sol);
FREE(comm);
FREE(arch);
return 0;
}

Просмотреть файл

@ -0,0 +1,31 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "tm_hwloc.h"
#include "tm_tree.h"
#include "tm_mapping.h"
#include "tm_timings.h"
int main(int argc, char**argv){;
tm_topology_t *topology;
int nb_cores;
double **arch;
if(argc<2){
fprintf(stderr,"Usage: %s <Architecture tgt>\n",argv[0]);
return -1;
}
topology=tgt_to_tm(argv[1],&arch);
nb_cores=nb_nodes(topology);
display_tab(arch,nb_cores);
FREE_topology(topology);
FREE(arch);
return 0;
}

669
ompi/mca/topo/treematch/treematch/tm_bucket.c Обычный файл
Просмотреть файл

@ -0,0 +1,669 @@
#include <stdio.h>
#include <float.h>
#include <math.h>
#include <assert.h>
#include "tm_tree.h"
#include "tm_bucket.h"
#include "tm_timings.h"
#include "tm_verbose.h"
#include "tm_thread_pool.h"
#include "tm_mt.h"
#ifdef _WIN32
#include <windows.h>
#include <winbase.h>
#endif
#ifndef __CHARMC__
#define __CHARMC__ 0
#endif
#if __CHARMC__
#include "converse.h"
#else
static int ilog2(int val)
{
int i = 0;
for( ; val != 0; val >>= 1, i++ );
return i;
}
#define CmiLog2(VAL) ilog2((int)(VAL))
#endif
static int verbose_level = ERROR;
bucket_list_t global_bl;
int tab_cmp(const void*,const void*);
int old_bucket_id(int,int,bucket_list_t);
int bucket_id(int,int,bucket_list_t);
void display_bucket(bucket_t *);
void check_bucket(bucket_t *,double **,double, double);
void display_pivots(bucket_list_t);
void display_bucket_list(bucket_list_t);
void add_to_bucket(int,int,int,bucket_list_t);
void dfs(int,int,int,double *,double *,int,int);
void built_pivot_tree(bucket_list_t);
void fill_buckets(bucket_list_t);
int is_power_of_2(int);
void partial_sort(bucket_list_t *,double **,int);
void next_bucket_elem(bucket_list_t,int *,int *);
int add_edge_3(tree_t *,tree_t *,int,int,int *);
void FREE_bucket(bucket_t *);
void FREE_tab_bucket(bucket_t **,int);
void FREE_bucket_list(bucket_list_t);
void partial_update_val (int nb_args, void **args);
int tab_cmp(const void* x1,const void* x2)
{
int *e1 = NULL,*e2 = NULL,i1,i2,j1,j2;
double **tab = NULL;
bucket_list_t bl;
bl = global_bl;
e1 = ((int *)x1);
e2 = ((int *)x2);
tab = bl->tab;
i1 = e1[0];
j1 = e1[1];
i2 = e2[0];
j2 = e2[1];
if(tab[i1][j1]==tab[i2][j2]){
if(i1==i2){
return (j1 > j2) ? -1 : 1;
}else{
return (i1 > i2) ? -1 : 1;
}
}
return (tab[i1][j1] > tab[i2][j2]) ? -1 : 1;
}
int old_bucket_id(int i,int j,bucket_list_t bucket_list)
{
double *pivot = NULL,val;
int n,sup,inf,p;
pivot = bucket_list->pivot;
n = bucket_list->nb_buckets;
val = bucket_list->tab[i][j];
inf = -1;
sup = n;
while( (sup - inf) > 1){
p = (sup + inf)/2;
/* printf("%f [%d,%d,%d]=%f\n",val,inf,p,sup,pivot[p]); */
if( val < pivot[p] ){
inf = p;
if( inf == sup )
inf--;
} else {
sup = p;
if( sup == inf )
sup++;
}
}
/*exit(-1);*/
return sup;
}
int bucket_id(int i,int j,bucket_list_t bucket_list)
{
double *pivot_tree = NULL,val;
int p,k;
pivot_tree = bucket_list->pivot_tree;
val = bucket_list->tab[i][j];
p = 1;
for( k = 0 ; k < bucket_list->max_depth ; k++){
if( val > pivot_tree[p] )
p = p*2;
else
p = p*2 + 1;
}
return (int)pivot_tree[p];
}
void display_bucket(bucket_t *b)
{
printf("\tb.bucket=%p\n",(void *)b->bucket);
printf("\tb.bucket_len=%d\n",(int)b->bucket_len);
printf("\tb.nb_elem=%d\n",(int)b->nb_elem);
}
void check_bucket(bucket_t *b,double **tab,double inf, double sup)
{
int i,j,k;
for( k = 0 ; k < b->nb_elem ; k++ ){
i = b->bucket[k].i;
j = b->bucket[k].j;
if((tab[i][j] < inf) || (tab[i][j] > sup)){
if(verbose_level >= CRITICAL)
printf("[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
exit(-1);
}
}
}
void display_pivots(bucket_list_t bucket_list)
{
int i;
for( i = 0 ; i < bucket_list->nb_buckets-1 ; i++)
printf("pivot[%d]=%f\n",i,bucket_list->pivot[i]);
printf("\n");
}
void display_bucket_list(bucket_list_t bucket_list)
{
int i;
double inf,sup;
/*display_pivots(bucket_list);*/
for(i = 0 ; i < bucket_list->nb_buckets ; i++){
inf = bucket_list->pivot[i];
sup = bucket_list->pivot[i-1];
if( i == 0 )
sup=DBL_MAX;
if( i == bucket_list->nb_buckets - 1 )
inf = 0;
if(verbose_level >= DEBUG){
printf("Bucket %d:\n",i);
display_bucket(bucket_list->bucket_tab[i]);
printf("\n");
}
check_bucket(bucket_list->bucket_tab[i],bucket_list->tab,inf,sup);
}
}
void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
{
bucket_t *bucket = NULL;
int N,n,size;
bucket = bucket_list->bucket_tab[id];
/* display_bucket(bucket);*/
if( bucket->bucket_len == bucket->nb_elem ){
N = bucket_list->N;
n = bucket_list->nb_buckets;
size = N*N/n;
/* display_bucket(bucket);*/
bucket->bucket = (coord*)realloc(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
bucket->bucket_len += size;
if(verbose_level >= DEBUG){
printf("MALLOC/realloc: %d\n",id);
printf("(%d,%d)\n",i,j);
display_bucket(bucket);
printf("\n");
}
}
bucket->bucket[bucket->nb_elem].i=i;
bucket->bucket[bucket->nb_elem].j=j;
bucket->nb_elem++;
/* printf("\n"); */
/* exit(-1); */
}
void dfs(int i,int inf,int sup,double *pivot,double *pivot_tree,int depth,int max_depth)
{
int p;
if( depth == max_depth )
return;
p = (inf + sup)/2;
pivot_tree[i] = pivot[p-1];
dfs(2*i,inf,p-1,pivot,pivot_tree,depth+1,max_depth);
dfs(2*i+1,p+1,sup,pivot,pivot_tree,depth+1,max_depth);
}
void built_pivot_tree(bucket_list_t bucket_list)
{
double *pivot_tree = NULL,*pivot = NULL;
int n,i,k;
pivot = bucket_list->pivot;
n = bucket_list->nb_buckets;
pivot_tree = (double*)MALLOC(sizeof(double)*2*n);
bucket_list->max_depth = (int)CmiLog2(n) - 1;
dfs(1,1,n-1,pivot,pivot_tree,0,bucket_list->max_depth);
k = 0;
pivot_tree[0] = -1;
for( i = n ; i < 2*n ; i++)
pivot_tree[i] = k++;
bucket_list->pivot_tree = pivot_tree;
if(verbose_level >= DEBUG){
for(i=0;i<2*n;i++)
printf("%d:%f\t",i,pivot_tree[i]);
printf("\n");
}
}
void fill_buckets(bucket_list_t bucket_list)
{
int N,i,j,id;
N = bucket_list->N;
for( i = 0 ; i < N ; i++ )
for( j = i+1 ; j < N ; j++ ){
id = bucket_id(i,j,bucket_list);
add_to_bucket(id,i,j,bucket_list);
}
}
int is_power_of_2(int val)
{
int n = 1;
do{
if( n == val)
return 1;
n <<= 1;
}while( n > 0);
return 0;
}
void partial_sort(bucket_list_t *bl,double **tab,int N)
{
double *pivot = NULL;
int *sample = NULL;
int i,j,k,n,id;
bucket_list_t bucket_list;
int nb_buckets, nb_bits;
/* after these operations, nb_bucket is a power of 2 interger close to log2(N)*/
nb_buckets = (int)floor(CmiLog2(N));
nb_bits = (int)ceil(CmiLog2(nb_buckets));
nb_buckets = nb_buckets >> (nb_bits-1);
nb_buckets = nb_buckets << (nb_bits-1);
/* check the result*/
if(!is_power_of_2(nb_buckets)){
if(verbose_level >= ERROR)
fprintf(stderr,"Error! Paramater nb_buckets is: %d and should be a power of 2\n",nb_buckets);
exit(-1);
}
bucket_list = (bucket_list_t)MALLOC(sizeof(_bucket_list_t));
bucket_list->tab = tab;
bucket_list->N = N;
n = pow(nb_buckets,2);
if(verbose_level >= INFO)
printf("N=%d, n=%d\n",N,n);
sample = (int*)MALLOC(2*sizeof(int)*n);
for( k = 0 ; k < n ; k++ ){
i = genrand_int32()%(N-2)+1;
if( i == N-2 )
j = N-1;
else
j = genrand_int32()%(N-i-2)+i+1;
if(verbose_level >= DEBUG)
printf("i=%d, j=%d\n",i,j);
assert( i != j );
assert( i < j );
assert( i < N );
assert( j < N );
sample[2*k] = i;
sample[2*k+1] = j;
}
/* printf("k=%d\n",k); */
global_bl = bucket_list;
qsort(sample,n,2*sizeof(int),tab_cmp);
if(verbose_level >= DEBUG)
for(k=0;k<n;k++){
i=sample[2*k];
j=sample[2*k+1];
printf("%f\n",tab[i][j]);
}
pivot = (double*)MALLOC(sizeof(double)*nb_buckets-1);
id = 1;
for( k = 1 ; k < nb_buckets ; k++ ){
/* fprintf(stderr,"k=%d, id=%d\n",k,id); */
i = sample[2*(id-1)];
j = sample[2*(id-1)+1];
id *= 2;
/* i=sample[k*N/nb_buckets]/N;
j=sample[k*N/nb_buckets]%N;*/
pivot[k-1] = tab[i][j];
/* printf("pivot[%d]=%f\n",k-1,tab[i][j]); */
}
bucket_list->pivot = pivot;
bucket_list->nb_buckets = nb_buckets;
built_pivot_tree(bucket_list);
bucket_list->bucket_tab = (bucket_t**)MALLOC(nb_buckets*sizeof(bucket_t*));
for( i = 0 ; i < nb_buckets ; i++ )
bucket_list->bucket_tab[i] = (bucket_t*)CALLOC(1,sizeof(bucket_t));
fill_buckets(bucket_list);
/* display_bucket_list(bucket_list); */
bucket_list->cur_bucket = 0;
bucket_list->bucket_indice = 0;
FREE(sample);
*bl = bucket_list;
}
void next_bucket_elem(bucket_list_t bucket_list,int *i,int *j)
{
bucket_t *bucket = bucket_list->bucket_tab[bucket_list->cur_bucket];
/* display_bucket_list(bucket_list);
printf("nb_elem: %d, indice: %d, bucket_id: %d\n",(int)bucket->nb_elem,bucket_list->bucket_indice,bucket_list->cur_bucket);
*/
while( bucket->nb_elem <= bucket_list->bucket_indice ){
bucket_list->bucket_indice = 0;
bucket_list->cur_bucket++;
bucket = bucket_list->bucket_tab[bucket_list->cur_bucket];
if(verbose_level >= DEBUG){
printf("### From bucket %d to bucket %d\n",bucket_list->cur_bucket-1,bucket_list->cur_bucket);
printf("nb_elem: %d, indice: %d, bucket_id: %d\n",(int)bucket->nb_elem,bucket_list->bucket_indice,bucket_list->cur_bucket);
}
}
if(!bucket->sorted){
global_bl = bucket_list;
qsort(bucket->bucket,bucket->nb_elem,2*sizeof(int),tab_cmp);
bucket->sorted = 1;
}
*i = bucket->bucket[bucket_list->bucket_indice].i;
*j = bucket->bucket[bucket_list->bucket_indice].j;
bucket_list->bucket_indice++;
}
int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups)
{
/* printf("%d <-> %d ?\n",tab_node[i].id,tab_node[j].id); */
if((!tab_node[i].parent) && (!tab_node[j].parent)){
if(parent){
parent->child[0] = &tab_node[i];
parent->child[1] = &tab_node[j];
tab_node[i].parent = parent;
tab_node[j].parent = parent;
if(verbose_level >= DEBUG)
printf("%d: %d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id);
return 1;
}
return 0;
}
if( tab_node[i].parent && (!tab_node[j].parent) ){
parent = tab_node[i].parent;
if(!parent->child[2]){
parent->child[2] = &tab_node[j];
tab_node[j].parent = parent;
if(verbose_level >= DEBUG)
printf("%d: %d-%d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id,parent->child[2]->id);
(*nb_groups)++;
}
return 0;
}
if(tab_node[j].parent && (!tab_node[i].parent)){
parent = tab_node[j].parent;
if(!parent->child[2]){
parent->child[2] = &tab_node[i];
tab_node[i].parent = parent;
if(verbose_level >= DEBUG)
printf("%d: %d-%d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id,parent->child[2]->id);
(*nb_groups)++;
}
return 0;
}
return 0;
}
int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups)
{
assert( i != j );
switch(arity){
case 2:
if(tab_node[i].parent)
return 0;
if(tab_node[j].parent)
return 0;
parent->child[0] = &tab_node[i];
parent->child[1] = &tab_node[j];
tab_node[i].parent = parent;
tab_node[j].parent = parent;
(*nb_groups)++;
return 1;
case 3:
return add_edge_3(tab_node,parent,i,j,nb_groups);
default:
if(verbose_level >= ERROR)
fprintf(stderr,"Cannot handle arity %d\n",parent->arity);
exit(-1);
}
}
void FREE_bucket(bucket_t *bucket)
{
FREE(bucket->bucket);
FREE(bucket);
}
void FREE_tab_bucket(bucket_t **bucket_tab,int N)
{
int i;
for( i = 0 ; i < N ; i++ )
FREE_bucket(bucket_tab[i]);
FREE(bucket_tab);
}
void FREE_bucket_list(bucket_list_t bucket_list)
{
/* Do not FREE the tab field it is used elsewhere */
FREE_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
FREE(bucket_list->pivot);
FREE(bucket_list->pivot_tree);
FREE(bucket_list);
}
void partial_update_val (int nb_args, void **args){
int inf = *(int*)args[0];
int sup = *(int*)args[1];
affinity_mat_t *aff_mat = (affinity_mat_t*)args[2];
tree_t *new_tab_node = (tree_t*)args[3];
double *res=(double*)args[4];
int l;
if(nb_args != 6){
if(verbose_level >= ERROR)
fprintf(stderr,"Wrong number of args in %s: %d\n",__FUNCTION__, nb_args);
exit(-1);
}
for( l = inf ; l < sup ; l++ ){
update_val(aff_mat,&new_tab_node[l]);
*res += new_tab_node[l].val;
}
}
void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
int arity,int M)
{
bucket_list_t bucket_list;
double duration,val = 0;
int l,i,j,nb_groups;
double gr1_1=0;
double gr1_2=0;
double gr1, gr2, gr3;
int N = aff_mat->order;
double **mat = aff_mat->mat;
verbose_level = get_verbose_level();
if(verbose_level >= INFO )
printf("starting sort of N=%d elements\n",N);
TIC;
partial_sort(&bucket_list,mat,N);
duration = TOC;
if(verbose_level >= INFO)
printf("Partial sorting=%fs\n",duration);
if(verbose_level >= DEBUG)
display_pivots(bucket_list);
TIC;
l = 0;
i = 0;
nb_groups = 0;
TIC;
if(verbose_level >= INFO){
while( l < M ){
TIC;
next_bucket_elem(bucket_list,&i,&j);
if(verbose_level >= DEBUG)
printf("elem[%d][%d]=%f ",i,j,mat[i][j]);
gr1_1 += TOC;
TIC;
if(try_add_edge(tab_node,&new_tab_node[l],arity,i,j,&nb_groups)){
l++;
}
gr1_2 += TOC;
}
}else{
while( l < M ){
next_bucket_elem(bucket_list,&i,&j);
if(try_add_edge(tab_node,&new_tab_node[l],arity,i,j,&nb_groups)){
l++;
}
}
}
gr1=TOC;
if(verbose_level >= INFO)
printf("Grouping phase 1=%fs (%fs+%fs) \n",gr1, gr1_1, gr1_2);
if(verbose_level >= DEBUG)
printf("l=%d,nb_groups=%d\n",l,nb_groups);
TIC;
while( nb_groups < M ){
next_bucket_elem(bucket_list,&i,&j);
try_add_edge(tab_node,NULL,arity,i,j,&nb_groups);
}
gr2=TOC;
if(verbose_level >= INFO)
printf("Grouping phase 2=%fs\n",gr2);
if(verbose_level >= DEBUG)
printf("l=%d,nb_groups=%d\n",l,nb_groups);
TIC;
if(M>512){ /* perform this part in parallel*/
int id;
int nb_threads;
work_t **works;
int *inf;
int *sup;
double *tab_val;
nb_threads = get_nb_threads();
works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
inf = (int*)MALLOC(sizeof(int)*nb_threads);
sup = (int*)MALLOC(sizeof(int)*nb_threads);
tab_val = (double*)CALLOC(nb_threads,sizeof(double));
for(id=0;id<nb_threads;id++){
void **args=(void**)MALLOC(sizeof(void*)*5);
inf[id]=id*M/nb_threads;
sup[id]=(id+1)*M/nb_threads;
if(id == nb_threads-1) sup[id]=M;
args[0]=(void*)(inf+id);
args[1]=(void*)(sup+id);
args[2]=(void*)aff_mat;
args[3]=(void*)new_tab_node;
args[4]=(void*)(tab_val+id);
works[id]= create_work(5,args,partial_update_val);
if(verbose_level >= DEBUG)
printf("Executing %p\n",(void *)works[id]);
submit_work( works[id], id);
}
for(id=0;id<nb_threads;id++){
wait_work_completion(works[id]);
val+=tab_val[id];
FREE(works[id]->args);
}
FREE(inf);
FREE(sup);
FREE(tab_val);
FREE(works);
}else{
for( l = 0 ; l < M ; l++ ){
update_val(aff_mat,&new_tab_node[l]);
val += new_tab_node[l].val;
}
}
gr3=TOC;
if(verbose_level >= INFO)
printf("Grouping phase 3=%fs\n",gr3);
/* printf("val=%f\n",val);exit(-1); */
duration = TOC;
if(verbose_level >= INFO)
printf("Grouping =%fs\n",duration);
if(verbose_level >= DEBUG){
printf("Bucket: %d, indice:%d\n",bucket_list->cur_bucket,bucket_list->bucket_indice);
printf("val=%f\n",val);
}
FREE_bucket_list(bucket_list);
/* exit(-1); */
/* display_grouping(new_tab_node,M,arity,val); */
}

Просмотреть файл

@ -0,0 +1,34 @@
#ifndef __BUCKET_H__
#define __BUCKET_H__
typedef struct{
int i;
int j;
}coord;
typedef struct{
coord * bucket; /* store i,j */
int bucket_len; /* allocated size in the heap */
int nb_elem; /* number of usefull elements (nb_elem should be lower than bucket_len) */
int sorted;
}bucket_t;
typedef struct{
bucket_t **bucket_tab;
int nb_buckets;
double **tab;
int N;/* length of tab */
/* For iterating over the buckets */
int cur_bucket;
int bucket_indice;
double *pivot;
double *pivot_tree;
int max_depth;
}_bucket_list_t;
typedef _bucket_list_t *bucket_list_t;
void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
int arity,int M);
int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups);
#endif

280
ompi/mca/topo/treematch/treematch/tm_hwloc.c Обычный файл
Просмотреть файл

@ -0,0 +1,280 @@
#include <hwloc.h>
#include <hwloc/helper.h>
#include "tm_tree.h"
#include "tm_mapping.h"
#include <ctype.h>
#include "tm_verbose.h"
double ** tm_topology_to_arch(tm_topology_t *topology,double *cost);
tm_topology_t * tgt_to_tm(char *filename,double **pcost);
int topo_nb_proc(hwloc_topology_t topology,int N);
double ** topology_to_arch(hwloc_topology_t topology);
int symetric(hwloc_topology_t topology);
tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
tm_topology_t* get_local_topo_with_hwloc(void);
/* transform a tgt scotch file into a topology file*/
tm_topology_t * tgt_to_tm(char *filename, double **pcost)
{
tm_topology_t *topology = NULL;
FILE *pf = NULL;
char line[1024];
char *s = NULL;
double *cost = NULL;
int i;
pf = fopen(filename,"r");
if(!pf){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Cannot open %s\n",filename);
exit(-1);
}
if(get_verbose_level() >= INFO)
printf("Reading TGT file: %s\n",filename);
fgets(line,1024,pf);
s = strstr(line,"tleaf");
if(!s){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
exit(-1);
}
s += 5;
while(isspace(*s))
s++;
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
topology->nb_levels = atoi(strtok(s," "))+1;
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
topology->arity[i] = atoi(strtok(NULL," "));
cost[i] = atoi(strtok(NULL," "));
}
topology->arity[topology->nb_levels-1] = 0;
/* cost[topology->nb_levels-1]=0; */
/*aggregate costs*/
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
cost[i] += cost[i+1];
build_synthetic_proc_id(topology);
*pcost = cost;
/* FREE(cost); */
/*
topology->arity[0]=nb_proc;
topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
printf("levels=%d\n",topology->nb_levels);
*/
if(get_verbose_level() >= INFO)
printf("Topology built from %s!\n",filename);
return topology;
}
int topo_nb_proc(hwloc_topology_t topology,int N)
{
hwloc_obj_t *objs = NULL;
int nb_proc;
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
FREE(objs);
return nb_proc;
}
double ** topology_to_arch(hwloc_topology_t topology)
{
int nb_proc,i,j;
hwloc_obj_t obj_proc1,obj_proc2,obj_res;
double **arch = NULL;
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
for( i = 0 ; i < nb_proc ; i++ ){
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
for( j = 0 ; j < nb_proc ; j++ ){
obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
/* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
arch[obj_proc1->os_index][obj_proc2->os_index]=speed(obj_res->depth+1);
}
}
return arch;
}
int symetric(hwloc_topology_t topology)
{
int depth,i,topodepth = hwloc_topology_get_depth(topology);
unsigned int arity;
hwloc_obj_t obj;
for ( depth = 0; depth < topodepth-1 ; depth++ ) {
int N = hwloc_get_nbobjs_by_depth(topology, depth);
obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
arity = obj->arity;
/* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
for (i = 1; i < N; i++ ){
obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
if( obj->arity != arity){
/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
return 0;
}
}
}
return 1;
}
tm_topology_t* hwloc_to_tm(char *filename,double **pcost)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
int nb_nodes,i;
double *cost;
int err;
/* Build the topology */
hwloc_topology_init(&topology);
err = hwloc_topology_set_xml(topology,filename);
if(err == -1){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
exit(-1);
}
hwloc_topology_ignore_all_keep_structure(topology);
hwloc_topology_load(topology);
/* Test if symetric */
if(!symetric(topology)){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"%s not symetric!\n",filename);
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
if(get_verbose_level() >= INFO)
printf("topodepth = %d\n",topodepth);
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
res->arity[depth] = objs[0]->arity;
if(get_verbose_level() >= INFO)
printf("%d(%d):",res->arity[depth],nb_nodes);
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
res->node_id[depth][i] = objs[i]->os_index;
/* if(depth==topodepth-1) */
}
FREE(objs);
}
cost = (double*)CALLOC(res->nb_levels,sizeof(double));
for(i=0; i<res->nb_levels; i++){
cost[i] = speed(i);
}
*pcost = cost;
/* Destroy topology object. */
hwloc_topology_destroy(topology);
if(get_verbose_level() >= INFO)
printf("\n");
return res;
}
tm_topology_t* get_local_topo_with_hwloc(void)
{
hwloc_topology_t topology;
tm_topology_t *res = NULL;
hwloc_obj_t *objs = NULL;
unsigned topodepth,depth;
int nb_nodes,i;
/* Build the topology */
hwloc_topology_init(&topology);
hwloc_topology_ignore_all_keep_structure(topology);
hwloc_topology_load(topology);
/* Test if symetric */
if(!symetric(topology)){
if(get_verbose_level() >= CRITICAL)
fprintf(stderr,"Local toplogy not symetric!\n");
exit(-1);
}
/* work on depth */
topodepth = hwloc_topology_get_depth(topology);
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
res->nb_levels = topodepth;
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
/* Build TreeMatch topology */
for( depth = 0 ; depth < topodepth ; depth++ ){
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
res->nb_nodes[depth] = nb_nodes;
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
res->arity[depth] = objs[0]->arity;
/* printf("%d:",res->arity[depth]); */
/* Build process id tab */
for (i = 0; i < nb_nodes; i++){
res->node_id[depth][i] = objs[i]->os_index;
/* if(depth==topodepth-1) */
}
FREE(objs);
}
/* Destroy HWLOC topology object. */
hwloc_topology_destroy(topology);
/* printf("\n"); */
return res;
}

Просмотреть файл

@ -0,0 +1,7 @@
#include <hwloc.h>
#include "tm_tree.h"
void hwloc_topology_tag(hwloc_topology_t topology);
tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
tm_topology_t * tgt_to_tm(char *filename,double **pcost);
tm_topology_t* get_local_topo_with_hwloc(void);

Просмотреть файл

@ -0,0 +1,505 @@
#include "tm_mapping.h"
#include "tm_mt.h"
#include "tm_kpartitioning.h"
#include <stdlib.h>
#include <stdio.h>
#define USE_KL_KPART 0
#if USE_KL_KPART
#include "k-partitioning.h"
#endif /* USE_KL_KPART */
#define KL_KPART_GREEDY_TRIALS 0
static int verbose_level = ERROR;
#define MAX_TRIALS 10
#define USE_KL_STRATEGY 1
#define MIN(a,b) ((a)<(b)?(a):(b))
int fill_tab(int **,int *,int,int,int,int);
void complete_com_mat(double ***,int,int);
void complete_obj_weight(double **,int,int);
void allocate_vertex(int,int *,com_mat_t *,int,int *,int);
double eval_cost(int *, com_mat_t *);
int *kpartition_greedy(int, com_mat_t *,int,int *,int);
constraint_t *split_constraints (int *,int,int,tm_topology_t *,int);
com_mat_t **split_com_mat(com_mat_t *,int,int,int *);
int **split_vertices(int *,int,int,int *);
void FREE_tab_com_mat(com_mat_t **,int);
void FREE_tab_local_vertices(int **,int);
void FREE_const_tab(constraint_t *,int);
void kpartition_build_level_topology(tree_t *,com_mat_t *,int,int,tm_topology_t *,
int *,int *,int,double *,double *);
void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int max_size)
{
int i,best_part=0;
double cost, best_cost = -1;
/*printf("\n");
print_1D_tab(res,n);*/
if(u>=com_mat->n){
for( i = 0 ; i < n ; i++)
if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
best_part = res[i];
break;
}
}else{
for( i = 0 ; i < n ; i++){
if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
cost = (((i)<com_mat->n)) ?com_mat->comm[u][i]:0;
if (( cost > best_cost)){
best_cost = cost;
best_part = res[i];
}
}
}
}
/* printf("size[%d]: %d\n",best_part, size[best_part]);*/
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
res[u] = best_part;
size[best_part]++;
}
double eval_cost(int *partition, com_mat_t *com_mat)
{
double cost = 0;
int i,j;
for( i = 0 ; i < com_mat->n ; i++ )
for( j = i+1 ; j < com_mat->n ; j++ )
if(partition[i] != partition[j])
cost += com_mat->comm[i][j];
return cost;
}
int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
{
int *res = NULL, *best_res=NULL, *size = NULL;
int i,j,nb_trials;
int max_size, max_val;
double cost, best_cost = -1;
int start, end;
int dumb_id, nb_dumb;
for( nb_trials = 0 ; nb_trials < MAX_TRIALS ; nb_trials++ ){
res = (int *)MALLOC(sizeof(int)*n);
for ( i = 0 ; i < n ; i ++ )
res[i] = -1;
size = (int *)CALLOC(k,sizeof(int));
max_size = n/k;
/*printf("Constraints: ");print_1D_tab(constraints,nb_constraints);*/
/* put "dumb" vertices in the correct partition if there are any*/
if (nb_constraints){
start = 0;
dumb_id = n-1;
for( i = 0 ; i < k ; i ++){
max_val = (i+1)* (n/k);
end = start;
while( end < nb_constraints){
if(constraints[end] >= max_val)
break;
end++;
}
/* now end - start is the number of constarints for the ith subtree
hence the number of dumb vertices is the differences between the
number of leaves of the subtree (n/k) and the number of constraints
*/
nb_dumb = n/k - (end-start);
/*printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k);*/
/* dumb vertices are the one with highest indices:
put them in the ith partitions*/
for( j = 0; j < nb_dumb; j ++ ){
res[dumb_id] = i;
dumb_id--;
}
/* increase the size of the ith partition accordingly*/
size[i] += nb_dumb;
start=end;
}
}
/*printf("After dumb vertices mapping: ");print_1D_tab(res,n);*/
/* choose k initial "true" vertices at random and put them in a different partition */
for ( i = 0 ; i < k ; i ++ ){
/* if the partition is full of dumb vertices go to next partition*/
if(size[i] >= max_size)
continue;
/* find a vertex not allready partitionned*/
do{
/* call the mersenne twister PRNG of tm_mt.c*/
j = genrand_int32() % n;
} while ( res[j] != -1 );
/* allocate and update size of partition*/
res[j] = i;
/* printf("random: %d -> %d\n",j,i); */
size[i]++;
}
/* allocate each unaloacted vertices in the partition that maximize the communication*/
for( i = 0 ; i < n ; i ++)
if( res[i] == -1)
allocate_vertex(i, res, com_mat, n, size, max_size);
cost = eval_cost(res,com_mat);
/*print_1D_tab(res,n);
printf("cost=%.2f\n",cost);*/
if((cost<best_cost) || (best_cost == -1)){
best_cost=cost;
FREE(best_res);
best_res=res;
}else
FREE(res);
FREE(size);
}
/*print_1D_tab(best_res,n);
printf("best_cost=%.2f\n",best_cost);
*/
return best_res;
}
int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
{
int *res= NULL;
if( n%k != 0){
if(verbose_level >= ERROR)
fprintf(stderr,"Error: Cannot partition %d elements in %d parts\n",n,k);
return NULL;
}
/* if(USE_KL_KPART) */
/* res = kPartitioning(comm, n, k, constraints, nb_constraints, KL_KPART_GREEDY_TRIALS); */
/* else */
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
return res;
}
constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth)
{
constraint_t *const_tab = NULL;
int nb_leaves, start, end;
int i;
const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
/* nb_leaves is the number of leaves of the current subtree
this will help to detremine where to split constraints and how to shift values
*/
nb_leaves = compute_nb_leaves_from_level( depth + 1, topology );
/* split the constraints into k sub-constraints
each sub-contraints 'i' contains constraints of value in [i*nb_leaves,(i+1)*nb_leaves[
*/
start = 0;
for( i = 0; i < k; i++ ){
/*returns the indice in contsraints that contains the smallest value not copied
end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
end = fill_tab(&(const_tab[i].constraints), constraints, nb_constraints,start, (i+1) * nb_leaves, i * nb_leaves);
const_tab[i].length = end-start;
const_tab[i].id = i;
start = end;
}
return const_tab;
}
com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
{
com_mat_t **res = NULL, *sub_com_mat;
double **sub_mat = NULL;
int *perm = NULL;
int cur_part, i, ii, j, jj, m = n/k, s;
res = (com_mat_t**)MALLOC(k*sizeof(com_mat_t *));
if(verbose_level >= DEBUG){
printf("Partition: "); print_1D_tab(partition,n);
display_tab(com_mat->comm,com_mat->n);
}
perm = (int*)MALLOC(sizeof(int)*m);
for( cur_part = 0 ; cur_part < k ; cur_part ++ ){
/* build perm such that submat[i][j] correspond to com_mat[perm[i]][perm[j]] according to the partition*/
s = 0;
for( j = 0; j < com_mat->n; j ++) /* check only non zero element of of com_mat*/
if ( partition[j] == cur_part )
perm[s++] = j;
/* s is now the size of the non zero sub matrix for this partition*/
/* built a sub-matrix for partition cur_part*/
sub_mat = (double **) MALLOC(sizeof(double *) * s);
for( i = 0 ; i < s ; i++)
sub_mat[i] = (double *) MALLOC(sizeof(double ) * s);
/* build the sub_mat corresponding to the partiion cur_part*/
for ( i = 0 ; i < s ; i ++){
ii = perm[i];
for( j = i ; j < s ; j ++){
jj = perm[j];
sub_mat[i][j] = com_mat->comm[ii][jj];
sub_mat[j][i] = sub_mat[i][j];
}
}
sub_com_mat = (com_mat_t *)malloc(sizeof(com_mat_t));
sub_com_mat -> n = s;
sub_com_mat -> comm = sub_mat;
/* printf("\n\npartition:%d\n",cur_part);display_tab(sub_mat,m);*/
/* assign the sub_mat to the result*/
res[cur_part] = sub_com_mat;
}
FREE(perm);
return res;
}
int **split_vertices( int *vertices, int n, int k, int *partition)
{
int **res = NULL, *sub_vertices = NULL;
int m = n/k;
int i, j, cur_part;
/*allocate resuts*/
res = (int**) MALLOC(sizeof(int*) * k);
if(verbose_level >= DEBUG){
printf("Partition: ");print_1D_tab(partition,n);
printf("Vertices id: ");print_1D_tab(vertices,n);
}
/*split the vertices tab of the partition cur_part to the sub_vertices tab*/
for( cur_part = 0; cur_part < k ; cur_part ++){
sub_vertices = (int*) MALLOC(sizeof(int) * m);
i = 0;
for( j = 0; j < n; j ++)
if ( partition[j] == cur_part )
sub_vertices[i++] = vertices[j];
res[cur_part] = sub_vertices;
if(verbose_level >= DEBUG){
printf("partition %d: ",cur_part);print_1D_tab(sub_vertices,m);
}
}
/*exit(-1);*/
return res;
}
void FREE_tab_com_mat(com_mat_t **mat,int k)
{
int i,j;
if( !mat )
return;
for ( i = 0 ; i < k ; i ++){
for ( j = 0 ; j < mat[i]->n ; j ++)
FREE( mat[i]->comm[j] );
FREE( mat[i]->comm );
}
FREE(mat);
}
void FREE_tab_local_vertices(int **mat, int k)
{
int i; /* m=n/k; */
if( !mat )
return;
for ( i = 0 ; i < k ; i ++){
FREE( mat[i] );
}
FREE(mat);
}
void FREE_const_tab(constraint_t *const_tab, int k)
{
int i;
if( !const_tab )
return;
for(i = 0; i < k; i++){
if(const_tab[i].length)
FREE(const_tab[i].constraints);
}
FREE(const_tab);
}
void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
tm_topology_t *topology, int *local_vertices,
int *constraints, int nb_constraints,
double *obj_weight, double *comm_speed)
{
com_mat_t **tab_com_mat = NULL; /* table of comunication matrix. We will have k of such comunication matrix, one for each subtree */
int k = topology->arity[depth];
tree_t **tab_child = NULL;
int *partition = NULL;
int **tab_local_vertices = NULL;
constraint_t *const_tab = NULL;
int i;
verbose_level = get_verbose_level();
/* if we are at the bottom of the tree set cur_node
and return*/
if ( depth == topology->nb_levels - 1 ){
if(verbose_level>=DEBUG)
printf("id : %d, com_mat= %p\n",local_vertices[0], (void *)com_mat->comm);
set_node(cur_node,NULL, 0, NULL, local_vertices[0], 0, NULL, depth);
return;
}
/* partition the com_matrix in k partitions*/
partition = kpartition(topology->arity[depth], com_mat, N, constraints, nb_constraints);
/* split the communication matrix in k parts according to the partition just found above */
tab_com_mat = split_com_mat( com_mat, N, k, partition);
/* split the local vertices in k parts according to the partition just found above */
tab_local_vertices = split_vertices( local_vertices, N, k, partition);
/* construct a tab of constraints of size k: one for each partitions*/
const_tab = split_constraints (constraints, nb_constraints, k, topology, depth);
/* create the table of k nodes of the resulting sub-tree */
tab_child = (tree_t **) CALLOC (k,sizeof(tree_t));
for( i = 0 ; i < k ; i++){
tab_child[i] = (tree_t *) MALLOC(sizeof(tree_t));
}
/* for each child, proceeed recursively*/
for( i = 0 ; i < k ; i++){
tab_child[i]->id = i;
kpartition_build_level_topology ( tab_child[i], tab_com_mat[i], N/k, depth + 1,
topology, tab_local_vertices[i],
const_tab[i].constraints, const_tab[i].length,
obj_weight, comm_speed);
tab_child[i]->parent = cur_node;
}
/* link the node with its child */
set_node( cur_node, tab_child, k, NULL, cur_node->id, 0, NULL, depth);
/* FREE local data*/
FREE(partition);
FREE_tab_com_mat(tab_com_mat,k);
FREE_tab_local_vertices(tab_local_vertices,k);
FREE_const_tab(const_tab,k);
}
tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
{
int depth,i, K;
tree_t *root = NULL;
int *local_vertices = NULL;
int nb_cores;
com_mat_t com_mat;
verbose_level = get_verbose_level();
if(verbose_level>=INFO)
printf("Number of constraints: %d\n", nb_constraints);
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
nb_cores=nb_processing_units(topology);
if((constraints == NULL) && (nb_constraints != 0)){
if(verbose_level>=ERROR)
fprintf(stderr,"size of constraint table not zero while constraint tab is NULL\n");
return NULL;
}
if((constraints != NULL) && (nb_constraints > nb_cores)){
if(verbose_level>=ERROR)
fprintf(stderr,"size of constraint table (%d) is greater than the number of cores (%d)\n", nb_constraints, nb_cores);
return NULL;
}
depth = 0;
/* if we have more cores than processes add new dumb process to the com matrix*/
if((K=nb_cores - N)>0){
/* add K element to the object weight*/
complete_obj_weight(&obj_weight,N,K);
/* display_tab(tab,N+K);*/
} else if( K < 0){
if(verbose_level>=ERROR)
fprintf(stderr,"Not enough cores!\n");
return NULL;
}
com_mat.comm = comm;
com_mat.n = N;
/*
local_vertices is the array of vertices that can be used
the min(N,nb_contraints) 1st element are number from 0 to N
the last ones have value -1
the value of this array will be used to number the leaves of the tree_t tree
that start at "root"
min(N,nb_contraints) is used to takle the case where thre is less processes than constraints
*/
local_vertices = (int*) MALLOC (sizeof(int) * (K+N));
for( i = 0 ; i < MIN(N,nb_constraints) ; i++)
local_vertices[i] = i;
for( i = MIN(N,nb_constraints) ;i < N + K ; i++)
local_vertices[i] = -1;
/* we assume all objects have the same arity*/
/* assign the root of the tree*/
root = (tree_t*) MALLOC (sizeof(tree_t));
/*build the tree downward from the root*/
kpartition_build_level_topology(root, &com_mat, N+K, depth, topology, local_vertices,
constraints, nb_constraints, obj_weight, com_speed);
/*print_1D_tab(local_vertices,K+N);*/
if(verbose_level>=INFO)
printf("Build (bottom-up) tree done!\n");
FREE(local_vertices);
/* tell the system it is a constraint tree, this is usefull for freeing pointers*/
root->constraint = 1;
return root;
}

Просмотреть файл

@ -0,0 +1,9 @@
typedef struct _com_mat_t{
double **comm;
int n; /*comm is of size n by n the other element are zeroes*/
} com_mat_t;
int *kpartition(int, com_mat_t*, int, int *, int);
tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);

157
ompi/mca/topo/treematch/treematch/tm_malloc.c Обычный файл
Просмотреть файл

@ -0,0 +1,157 @@
#include "uthash.h"
#include <stdio.h>
#include "tm_verbose.h"
#include "tm_malloc.h"
#define EXTRA_BYTE 100
typedef signed char byte;
/* static int verbose_level = ERROR;*/
typedef struct _hash_t {
void *key; /* we'll use this field as the key */
size_t size;
UT_hash_handle hh; /* makes this structure hashable */
}hash_t;
static hash_t *size_hash = NULL;
static char extra_data[EXTRA_BYTE];
static void save_size(void *ptr, size_t size);
static size_t retreive_size(void *someaddr);
static void init_extra_data(void);
void save_size(void *ptr, size_t size) {
hash_t *elem;
elem = (hash_t*) malloc(sizeof(hash_t));
elem -> key = ptr;
elem -> size = size;
if(get_verbose_level() >= DEBUG)
printf("Storing (%p,%ld)\n",ptr,size);
HASH_ADD_PTR( size_hash, key, elem );
}
size_t retreive_size(void *someaddr){
size_t res;
hash_t *elem = NULL;
HASH_FIND_PTR(size_hash, &someaddr, elem);
if(!elem){
fprintf(stderr,"cannot find ptr %p to free!\n",someaddr);
return 0;
}
res = elem->size;
if(get_verbose_level()>=DEBUG)
printf("Retreiving (%p,%ld)\n",someaddr, res);
HASH_DEL( size_hash, elem);
return res;
}
void my_mem_check(void){
hash_t *s;
int nb_errors = 0;
for(s=size_hash; s != NULL; s=s->hh.next) {
if(get_verbose_level()>=ERROR)
printf("pointer %p of size %ld has not been freed!\n", s->key, s->size);
nb_errors ++;
}
if(get_verbose_level() >= INFO)
printf ("Number of errors in managing memory: %d\n",nb_errors);
}
void init_extra_data(void){
static int done = 0;
int i;
if(done)
return;
srandom(0);
for( i = 0 ; i < EXTRA_BYTE; i++)
extra_data[i] = (char) random() % 256;
done = 1;
}
void *my_malloc(size_t size, char *file, int line){
byte *ptr;
init_extra_data();
size+=2*EXTRA_BYTE;
ptr = malloc(size);
if(get_verbose_level()>=DEBUG)
printf("my_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,ptr,file,line);
save_size(ptr,size);
memcpy(ptr, extra_data, EXTRA_BYTE);
memcpy(ptr + size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
if(get_verbose_level()>=DEBUG)
printf("my_malloc returning: %p\n",ptr+EXTRA_BYTE);
return (void *)(ptr + EXTRA_BYTE);
}
void *my_calloc(size_t count, size_t size, char *file, int line){
byte *ptr;
size_t full_size;
init_extra_data();
full_size = count * size + 2 * EXTRA_BYTE;
ptr = malloc(full_size);
bzero(ptr,full_size);
save_size(ptr, full_size);
if(get_verbose_level()>=DEBUG)
printf("my_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line);
memcpy(ptr, extra_data, EXTRA_BYTE);
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
if(get_verbose_level()>=DEBUG)
printf("my_calloc returning: %p\n",ptr+EXTRA_BYTE);
return (void *)(ptr+EXTRA_BYTE);
}
void my_free(void *ptr){
byte *original_ptr = ((byte *)ptr) - EXTRA_BYTE;
size_t size;
if(!ptr)
return;
size = retreive_size(original_ptr);
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
fprintf(stderr,"cannot find special string ***before*** %p!\n",ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
fprintf(stderr,"cannot find special string ***after*** %p!\n",ptr);
fprintf(stderr,"memory is probably corrupted here!\n");
}
if(get_verbose_level()>=DEBUG)
printf("my_free freeing: %p\n",original_ptr);
free(original_ptr);
}

Просмотреть файл

@ -0,0 +1,5 @@
#include <stdlib.h>
void *my_malloc(size_t size, char *, int);
void *my_calloc(size_t count, size_t size, char *, int);
void my_free(void *ptr);
void my_mem_check(void);

1368
ompi/mca/topo/treematch/treematch/tm_mapping.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,43 @@
#include "tm_tree.h"
#include "tm_hwloc.h"
#include "tm_timings.h"
#include "tm_verbose.h"
int build_comm(char *filename,double ***pcomm);
void TreeMatchMapping(int nb_obj, int nb_proc,double **comm_mat, double * obj_weigth, double *com_speed, int d, int *sol);
/*Map topology to cores:
sigma_i is such that process i is mapped on core sigma_i
k_i is such that core i exectutes process k_i
size of sigma is the number of process (nb_objs)
size of k is the number of cores/nodes (nb_proc)
We must have numbe of process<=number of cores
k_i =-1 if no process is mapped on core i
*/
void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma, int nb_processes, int *k);
int nb_processing_units(tm_topology_t *topology);
void free_topology(tm_topology_t *topology);
void display_other_heuristics(tm_topology_t *topology,int N,double **comm,int TGT_flag, int *constraints, double *cost);
void print_1D_tab(int *tab,int N);
void build_synthetic_proc_id(tm_topology_t *topology);
void display_topology(tm_topology_t *topology);
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_node);
tm_topology_t *optimize_topology(tm_topology_t *topology);
double print_sol_inv(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
double print_sol(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
int build_binding_constraints(char *filename, int **ptab);
void canonize_constraints(tm_topology_t *topology, int *constraints, int **canonical, int n, int **perm, int *m);
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
void FREE_topology(tm_topology_t *);
/* use to split a constaint into subconstraint according the tree*/
typedef struct _constraint{
int *constraints; /* the subconstraints*/
int length; /*length of *constraints*/
int id; /* id of the corresponding subtree*/
}constraint_t;

198
ompi/mca/topo/treematch/treematch/tm_mt.c Обычный файл
Просмотреть файл

@ -0,0 +1,198 @@
/*
A C-program for MT19937, with improved initialization 2002/1/26.
This is an optimized version that amortizes the shift/reload cost,
by Eric Landry 2004-03-15.
Before using, initialize the state by using init_genrand(seed) or
init_by_array(init_key, key_length).
Copyright (C) 1997--2004, Makoto Matsumoto, Takuji Nishimura, and
Eric Landry; All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
3. The names of its contributors may not be used to endorse or
promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
Reference: M. Matsumoto and T. Nishimura, "Mersenne Twister:
A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number
Generator", ACM Transactions on Modeling and Computer Simulation,
Vol. 8, No. 1, January 1998, pp 3--30.
*/
#include "tm_mt.h"
/* Period parameters */
#define N 624
#define M 397
#define MATRIX_A 0x9908b0dfUL /* constant vector a */
#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
static unsigned long x[N]; /* the array for the state vector */
static unsigned long *p0, *p1, *pm;
/*
initialize with a seed
See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier.
In the previous versions, MSBs of the seed affect only MSBs of
the state.
2002-01-09 modified by Makoto Matsumoto
*/
void
init_genrand(unsigned long s)
{
int i;
x[0] = s & 0xffffffffUL;
for (i = 1; i < N; ++i) {
x[i] = (1812433253UL * (x[i - 1] ^ (x[i - 1] >> 30)) + i)
& 0xffffffffUL; /* for >32 bit machines */
}
p0 = x;
p1 = x + 1;
pm = x + M;
}
/*
initialize by an array with array-length
init_key is the array for initializing keys
key_length is its length
2004-02-26 slight change for C++
*/
void
init_by_array(unsigned long init_key[], int key_length)
{
int i, j, k;
init_genrand(19650218UL);
i = 1;
j = 0;
for (k = (N > key_length ? N : key_length); k; --k) {
/* non linear */
x[i] = ((x[i] ^ ((x[i - 1] ^ (x[i - 1] >> 30)) * 1664525UL))
+ init_key[j] + j) & 0xffffffffUL; /* for WORDSIZE > 32 machines */
if (++i >= N) {
x[0] = x[N - 1];
i = 1;
}
if (++j >= key_length) {
j = 0;
}
}
for (k = N - 1; k; --k) {
/* non linear */
x[i] = ((x[i] ^ ((x[i - 1] ^ (x[i - 1] >> 30)) * 1566083941UL)) - i)
& 0xffffffffUL; /* for WORDSIZE > 32 machines */
if (++i >= N) {
x[0] = x[N - 1];
i = 1;
}
}
x[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
}
/* generates a random number on the interval [0,0xffffffff] */
unsigned long
genrand_int32(void)
{
unsigned long y;
if (!p0) {
/* Default seed */
init_genrand(5489UL);
}
/* Twisted feedback */
y = *p0 = *pm++ ^ (((*p0 & UPPER_MASK) | (*p1 & LOWER_MASK)) >> 1)
^ (-(*p1 & 1) & MATRIX_A);
p0 = p1++;
if (pm == x + N) {
pm = x;
}
if (p1 == x + N) {
p1 = x;
}
/* Temper */
y ^= y >> 11;
y ^= y << 7 & 0x9d2c5680UL;
y ^= y << 15 & 0xefc60000UL;
y ^= y >> 18;
return y;
}
/* generates a random number on the interval [0,0x7fffffff] */
long
genrand_int31(void)
{
return (long) (genrand_int32() >> 1);
}
/* generates a random number on the real interval [0,1] */
double
genrand_real1(void)
{
return genrand_int32() * (1.0 / 4294967295.0);
/* divided by 2^32-1 */
}
/* generates a random number on the real interval [0,1) */
double
genrand_real2(void)
{
return genrand_int32() * (1.0 / 4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on the real interval (0,1) */
double
genrand_real3(void)
{
return (((double) genrand_int32()) + 0.5) * (1.0 / 4294967296.0);
/* divided by 2^32 */
}
/* generates a 53-bit random number on the real interval [0,1) */
double
genrand_res53(void)
{
unsigned long a = genrand_int32() >> 5, b = genrand_int32() >> 6;
return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
}
/* 2002-01-09 These real versions are due to Isaku Wada */

11
ompi/mca/topo/treematch/treematch/tm_mt.h Обычный файл
Просмотреть файл

@ -0,0 +1,11 @@
void init_genrand(unsigned long s);
void init_by_array(unsigned long init_key[], int key_length);
/* generates a random number on the interval [0,0x7fffffff] */
unsigned long genrand_int32(void);
long genrand_int31(void);
double genrand_real1(void);
double genrand_real2(void);
double genrand_real3(void);
double genrand_res53(void);

Просмотреть файл

@ -0,0 +1,349 @@
#include <pthread.h>
#include "tm_thread_pool.h"
#include "tm_verbose.h"
#include <hwloc.h>
#include "tm_verbose.h"
#include "tm_tree.h"
#include <errno.h>
static int verbose_level = ERROR;
static thread_pool_t *pool = NULL;
static thread_pool_t *get_thread_pool(void);
static void execute_work(work_t *work);
static int bind_myself_to_core(hwloc_topology_t topology, int id);
static void *thread_loop(void *arg);
static void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work);
static thread_pool_t *create_threads(void);
static void f1 (int nb_args, void **args);
static void f2 (int nb_args, void **args);
static void destroy_work(work_t *work);
void f1 (int nb_args, void **args){
int a, b;
a = *(int*)args[0];
b = *(int*)args[1];
printf("nb_args=%d, a=%d, b=%d\n",nb_args,a,b);
}
void f2 (int nb_args, void **args){
int n, *tab;
int *res;
int i,j;
n = *(int*)args[0];
tab = (int*)args[1];
res=(int*)args[2];
for(j=0;j<1000000;j++){
*res=0;
for (i=0;i<n;i++)
*res+=tab[i];
}
printf("done: %d!\n",nb_args);
}
void execute_work(work_t *work){
work->task(work->nb_args, work->args);
}
int bind_myself_to_core(hwloc_topology_t topology, int id){
hwloc_cpuset_t cpuset;
hwloc_obj_t obj;
char *str;
int binding_res;
int depth = hwloc_topology_get_depth(topology);
/* printf("depth=%d\n",depth); */
/* Get my core. */
obj = hwloc_get_obj_by_depth(topology, depth-1, id);
if (obj) {
/* Get a copy of its cpuset that we may modify. */
cpuset = hwloc_bitmap_dup(obj->cpuset);
/* Get only one logical processor (in case the core is
SMT/hyperthreaded). */
hwloc_bitmap_singlify(cpuset);
/*hwloc_bitmap_asprintf(&str, cpuset);
printf("Binding thread %d to cpuset %s\n", id,str);
FREE(str);
*/
/* And try to bind ourself there. */
binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD);
if (binding_res == -1){
int error = errno;
hwloc_bitmap_asprintf(&str, obj->cpuset);
if(verbose_level>=WARNING)
fprintf(stderr,"%d Couldn't bind to cpuset %s: %s\n", id, str, strerror(error));
FREE(str);
return 0;
}
/* FREE our cpuset copy */
hwloc_bitmap_free(cpuset);
return 1;
}else{
if(verbose_level>=WARNING)
fprintf(stderr,"No valid object for core id %d!\n",id);
return 0;
}
}
void *thread_loop(void *arg){
local_thread_t *local=(local_thread_t*)arg;
int id = local->id;
hwloc_topology_t topology= local->topology;
work_t *start_working_list = local ->working_list;
pthread_cond_t *cond_var = local->cond_var;
pthread_mutex_t *list_lock = local->list_lock;
work_t *work;
int *ret = (int *)MALLOC(sizeof(int));
bind_myself_to_core(topology,id);
while(1){
pthread_mutex_lock(list_lock);
while(start_working_list->next == NULL) {
pthread_cond_wait(cond_var, list_lock);
}
work = start_working_list->next;
start_working_list->next = work-> next;
pthread_mutex_unlock(list_lock);
if(!work->task){
*ret = 0;
pthread_exit(ret);
}
execute_work(work);
pthread_mutex_lock(&work->mutex);
work->done=1;
pthread_mutex_unlock(&work->mutex);
pthread_cond_signal(&work->work_done);
}
}
void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work){
work_t *elem = working_list;
pthread_mutex_lock(list_lock);
while(elem->next!=NULL){
elem=elem->next;
}
elem->next=work;
work -> next = NULL;
work -> done = 0;
pthread_cond_signal(cond_var);
pthread_mutex_unlock(list_lock);
}
void wait_work_completion(work_t *work){
pthread_mutex_lock(&work->mutex);
while(!work->done)
pthread_cond_wait(&work->work_done, &work->mutex);
}
int submit_work(work_t *work, int thread_id){
if( (thread_id>=0) && (thread_id< pool->nb_threads)){
add_work(&pool->list_lock[thread_id], &pool->cond_var[thread_id], &pool->working_list[thread_id], work);
return 1;
}
return 0;
}
thread_pool_t *create_threads(){
hwloc_topology_t topology;
int i;
local_thread_t *local;
int nb_cores;
int depth;
verbose_level = get_verbose_level();
/*Get number of cores: set 1 thread per core*/
/* Allocate and initialize topology object. */
hwloc_topology_init(&topology);
/* Only keep relevant levels
hwloc_topology_ignore_all_keep_structure(topology);*/
/* Perform the topology detection. */
hwloc_topology_load(topology);
depth = hwloc_topology_get_depth(topology);
if (depth == -1 ) {
if(verbose_level>=CRITICAL)
fprintf(stderr,"Error: topology with unknown depth\n");
exit(-1);
}
/* at depth 'depth' it is necessary a PU/core where we can execute things*/
nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t));
pool -> topology = topology;
pool -> nb_threads = nb_cores;
pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_cores);
pool -> working_list = (work_t*)CALLOC(nb_cores,sizeof(work_t));
pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_cores);
pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_cores);
local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_cores);
pool->local = local;
for (i=0;i<nb_cores;i++){
local[i].topology = topology;
local[i].id = i;
local[i].working_list = &pool->working_list[i];
pthread_cond_init(pool->cond_var +i, NULL);
local[i].cond_var = pool->cond_var +i;
pthread_mutex_init(pool->list_lock +i, NULL);
local[i].list_lock = pool->list_lock+i;
if (pthread_create (pool->thread_list+i, NULL, thread_loop, local+i) < 0) {
if(verbose_level>=CRITICAL)
fprintf(stderr, "pthread_create error for exec thread %d\n",i);
return NULL;
}
}
return pool;
}
thread_pool_t *get_thread_pool(){;
if (pool == NULL)
return create_threads();
return pool;
}
void terminate_thread_pool(){
int id;
int *ret=NULL;
work_t work;
if(pool){
work.task=NULL;
for (id=0;id<pool->nb_threads;id++){
submit_work(&work,id);
}
for (id=0;id<pool->nb_threads;id++){
pthread_join(pool->thread_list[id],(void **) &ret);
pthread_cond_destroy(pool->cond_var +id);
pthread_mutex_destroy(pool->list_lock +id);
if (pool->working_list[id].next != NULL)
if(verbose_level >= WARNING)
fprintf(stderr,"Working list of thread %d not empty!\n",id);
}
hwloc_topology_destroy(pool->topology);
FREE(pool -> thread_list);
FREE(pool -> working_list);
FREE(pool -> cond_var);
FREE(pool -> list_lock);
FREE(pool -> local);
FREE(pool);
pool = NULL;
}
}
int get_nb_threads(){
pool = get_thread_pool();
return pool -> nb_threads;
}
work_t *create_work(int nb_args, void **args, void (*task) (int, void **)){
work_t *work;
work = MALLOC(sizeof(work_t));
work -> nb_args = nb_args;
work -> args = args;
work -> task = task;
work -> done = 0;
pthread_cond_init (&work->work_done, NULL);
pthread_mutex_init(&work->mutex, NULL);
if( verbose_level >= DEBUG)
printf("work %p created\n",(void *)work);
return work;
}
void destroy_work(work_t *work){
pthread_cond_destroy(&work->work_done);
pthread_mutex_destroy(&work->mutex);
FREE(work);
}
int test_main(void){
int a=3, c;
int b=-5;
void *args1[3];
void *args2[3];
int tab[100];
int i,res;
work_t *work1,*work2,*work3,*work4;
int nb_threads = get_nb_threads();
printf("nb_threads= %d\n", nb_threads);
args1[0] = &a;
args1[1] = &b;
work1 = create_work(2,args1,f1);
for (i=0;i<100;i++)
tab[i]=i;
c=100;
args2[0] = &c;
args2[1] = tab;
args2[2] = &res;
work2 = create_work(3, args2, f2);
work3 = create_work(4, args2, f2);
work4 = create_work(5, args2, f2);
submit_work(work1,0);
submit_work(work2,1);
submit_work(work3,1);
submit_work(work4,1);
terminate_thread_pool();
wait_work_completion(work1);
wait_work_completion(work2);
wait_work_completion(work3);
wait_work_completion(work4);
printf("res=%d\n",res);
destroy_work(work1);
destroy_work(work2);
destroy_work(work3);
destroy_work(work4);
return 0;
}

Просмотреть файл

@ -0,0 +1,45 @@
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#include <pthread.h>
#include <hwloc.h>
typedef struct _work_t{
int nb_args;
void (*task)(int nb_args, void **args);
void **args;
struct _work_t *next;
pthread_cond_t work_done;
pthread_mutex_t mutex;
int done;
}work_t;
typedef struct {
int id;
hwloc_topology_t topology;
work_t *working_list;
pthread_cond_t *cond_var;
pthread_mutex_t *list_lock;
}local_thread_t;
typedef struct _thread_pool_t{
int nb_threads;
pthread_t *thread_list;
work_t *working_list;
pthread_cond_t *cond_var;
pthread_mutex_t *list_lock;
local_thread_t *local;
hwloc_topology_t topology;
}thread_pool_t;
int get_nb_threads(void);
int submit_work(work_t *work, int thread_id);
void wait_work_completion(work_t *work);
void terminate_thread_pool(void);
work_t *create_work(int nb_args, void **args, void (int, void **));
int test_main(void);
#endif /* THREAD_POOL_H */

Просмотреть файл

@ -0,0 +1,32 @@
#include "tm_timings.h"
static CLOCK_T time_tab[MAX_CLOCK];
static int clock_num = -1;
void get_time(void)
{
clock_num++;
if(clock_num>MAX_CLOCK-1)
return;
CLOCK(time_tab[clock_num]);
}
double time_diff(void)
{
CLOCK_T t2,t1;
if(clock_num>MAX_CLOCK-1){
clock_num--;
return -1.0;
}
if(clock_num < 0){
return -1.0;
}
CLOCK(t2);
t1=time_tab[clock_num--];
return CLOCK_DIFF(t2,t1);
}

Просмотреть файл

@ -0,0 +1,47 @@
#ifndef TIMINGS_H
#define TIMINGS_H
#include <stdio.h>
#ifndef _WIN32
#include <sys/time.h>
#else
#include <sys/timeb.h>
#endif
#include <stdlib.h>
#include <unistd.h>
#define MAX_CLOCK 1000
#ifndef _WIN32
typedef struct timeval CLOCK_T;
#define CLOCK(c) gettimeofday(&c,(struct timezone *)NULL)
#define CLOCK_DIFF(c1,c2) \
((double)(c1.tv_sec-c2.tv_sec)+(double)(c1.tv_usec-c2.tv_usec)/1e+6)
#define CLOCK_DISPLAY(c) fprintf(stderr,"%d.%d",(int)c.tv_sec,(int)c.tv_usec)
#else /* for windows */
#ifdef __CYGWIN__
typedef struct timeb CLOCK_T;
#else
typedef struct _timeb CLOCK_T;
#endif
#define CLOCK(c) _ftime(&c)
#define CLOCK_DIFF(c1,c2) \
((double)(c1.time-c2.time)+(double)(c1.millitm-c2.millitm)/1e+3)
#define CLOCK_DISPLAY(c) fprintf(stderr,"%d.%d",(int)c.time,(int)c.millitm*1e+3)
#endif
double time_diff(void);
void get_time(void);
#define TIC get_time()
#define TOC time_diff()
#endif /*TIMINGS_H*/

1648
ompi/mca/topo/treematch/treematch/tm_tree.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

94
ompi/mca/topo/treematch/treematch/tm_tree.h Обычный файл
Просмотреть файл

@ -0,0 +1,94 @@
#ifndef __TREE_H__
#define __TREE_H__
#include <stdlib.h>
typedef struct _node_info_t{
int submit_date;
int job_id;
int finish_date;
} job_info_t;
typedef struct _tree_t{
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not. usefull for freeing it. needs to be set on the root only*/
struct _tree_t **child;
struct _tree_t *parent;
struct _tree_t *tab_child; /*the pointer to be freed*/
double val;
int arity;
int depth;
int id;
int uniq;
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
job_info_t *job_info;
}tree_t;
/* Maximum number of levels in the tree*/
#define MAX_LEVELS 100
typedef struct {
int *arity; /* arity of the nodes of each level*/
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
int *nb_nodes; /*nb of nodes of each level*/
int *nb_free_nodes; /*nb of available nodes of each level*/
int **node_id; /*ID of the nodes of the tree for each level*/
int **free_nodes; /*ID of the nodes of the tree for each level*/
}tm_topology_t;
typedef struct {
double ** mat;
double * sum_row;
int order;
} affinity_mat_t;
tree_t * build_tree(double **tab,int N);
tree_t * build_tree_from_topology(tm_topology_t *topology,double **tab,int N, double *obj_weight, double *comm_speed);
void map_tree(tree_t *,tree_t*);
void display_tab(double **tab,int N);
double speed(int depth);
void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,int id,double val,tree_t *deb_tab_child, int depth);
void free_constraint_tree(tree_t *tree);
void free_tree(tree_t *tree);
void free_tab_double(double**tab,int N);
void free_tab_int(int**tab,int N);
void update_val(affinity_mat_t *aff_mat,tree_t *parent);
void FREE_tree(tree_t *tree);
void FREE_tab_double(double**,int);
typedef struct _group_list_t{
struct _group_list_t *next;
tree_t **tab;
double val;
double sum_neighbour;
double wg;
}group_list_t;
typedef struct{
int i;
int j;
double val;
}adjacency_t;
/* for debugging malloc */
/* #define __DEBUG_MY_MALLOC__ */
#undef __DEBUG_MY_MALLOC__
#ifdef __DEBUG_MY_MALLOC__
#include "tm_malloc.h"
#define MALLOC(x) my_malloc(x,__FILE__,__LINE__)
#define CALLOC(x,y) my_calloc(x,y,__FILE__,__LINE__)
#define FREE my_free
#define MEM_CHECK my_mem_check
#else
#define MALLOC malloc
#define CALLOC calloc
#define FREE free
#define MEM_CHECK my_mem_check
#endif
#endif

Просмотреть файл

@ -0,0 +1,11 @@
#include "tm_verbose.h"
static unsigned int verbose_level = ERROR;
void set_verbose_level(unsigned int level){
verbose_level = level;
}
unsigned int get_verbose_level(){
return verbose_level;
}

Просмотреть файл

@ -0,0 +1,11 @@
#define NONE 0
#define CRITICAL 1
#define ERROR 2
#define WARNING 3
#define INFO 4
#define DEBUG 5
void set_verbose_level(unsigned int level);
unsigned int get_verbose_level(void);

905
ompi/mca/topo/treematch/treematch/uthash.h Обычный файл
Просмотреть файл

@ -0,0 +1,905 @@
/*
Copyright (c) 2003-2011, Troy D. Hanson http://uthash.sourceforge.net
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTHASH_H
#define UTHASH_H
#include <string.h> /* memcmp,strlen */
#include <stddef.h> /* ptrdiff_t */
#include <stdlib.h> /* exit() */
/* These macros use decltype or the earlier __typeof GNU extension.
As decltype is only available in newer compilers (VS2010 or gcc 4.3+
when compiling c++ source) this code uses whatever method is needed
or, for VS2008 where neither is available, uses casting workarounds. */
#ifdef _MSC_VER /* MS compiler */
#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
#define DECLTYPE(x) (decltype(x))
#else /* VS2008 or older (or VS2010 in C mode) */
#define NO_DECLTYPE
#define DECLTYPE(x)
#endif
#else /* GNU, Sun and other compilers */
#define DECLTYPE(x) (__typeof(x))
#endif
#ifdef NO_DECLTYPE
#define DECLTYPE_ASSIGN(dst,src) \
do { \
char **_da_dst = (char**)(&(dst)); \
*_da_dst = (char*)(src); \
} while(0)
#else
#define DECLTYPE_ASSIGN(dst,src) \
do { \
(dst) = DECLTYPE(dst)(src); \
} while(0)
#endif
/* a number of the hash function use uint32_t which isn't defined on win32 */
#ifdef _MSC_VER
typedef unsigned int uint32_t;
typedef unsigned char uint8_t;
#else
#include <inttypes.h> /* uint32_t */
#endif
#define UTHASH_VERSION 1.9.4
#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
#define uthash_free(ptr,sz) free(ptr) /* free fcn */
#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
#define uthash_expand_fyi(tbl) /* can be defined to log expands */
/* initial number of buckets */
#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
/* calculate the element whose hash handle address is hhe */
#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
#define HASH_FIND(hh,head,keyptr,keylen,out) \
do { \
unsigned _hf_bkt,_hf_hashv; \
out=NULL; \
if (head) { \
HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
keyptr,keylen,out); \
} \
} \
} while (0)
#ifdef HASH_BLOOM
#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
#define HASH_BLOOM_MAKE(tbl) \
do { \
(tbl)->bloom_nbits = HASH_BLOOM; \
(tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
(tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
} while (0);
#define HASH_BLOOM_FREE(tbl) \
do { \
uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
} while (0);
#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
#define HASH_BLOOM_ADD(tbl,hashv) \
HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#define HASH_BLOOM_TEST(tbl,hashv) \
HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#else
#define HASH_BLOOM_MAKE(tbl)
#define HASH_BLOOM_FREE(tbl)
#define HASH_BLOOM_ADD(tbl,hashv)
#define HASH_BLOOM_TEST(tbl,hashv) (1)
#endif
#define HASH_MAKE_TABLE(hh,head) \
do { \
(head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
sizeof(UT_hash_table)); \
if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
(head)->hh.tbl->tail = &((head)->hh); \
(head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
(head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
(head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
(head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
memset((head)->hh.tbl->buckets, 0, \
HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
HASH_BLOOM_MAKE((head)->hh.tbl); \
(head)->hh.tbl->signature = HASH_SIGNATURE; \
} while(0)
#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
do { \
unsigned _ha_bkt; \
(add)->hh.next = NULL; \
(add)->hh.key = (char*)keyptr; \
(add)->hh.keylen = keylen_in; \
if (!(head)) { \
head = (add); \
(head)->hh.prev = NULL; \
HASH_MAKE_TABLE(hh,head); \
} else { \
(head)->hh.tbl->tail->next = (add); \
(add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
(head)->hh.tbl->tail = &((add)->hh); \
} \
(head)->hh.tbl->num_items++; \
(add)->hh.tbl = (head)->hh.tbl; \
HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
(add)->hh.hashv, _ha_bkt); \
HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
HASH_FSCK(hh,head); \
} while(0)
#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
do { \
bkt = ((hashv) & ((num_bkts) - 1)); \
} while(0)
/* delete "delptr" from the hash table.
* "the usual" patch-up process for the app-order doubly-linked-list.
* The use of _hd_hh_del below deserves special explanation.
* These used to be expressed using (delptr) but that led to a bug
* if someone used the same symbol for the head and deletee, like
* HASH_DELETE(hh,users,users);
* We want that to work, but by changing the head (users) below
* we were forfeiting our ability to further refer to the deletee (users)
* in the patch-up process. Solution: use scratch space to
* copy the deletee pointer, then the latter references are via that
* scratch pointer rather than through the repointed (users) symbol.
*/
#define HASH_DELETE(hh,head,delptr) \
do { \
unsigned _hd_bkt; \
struct UT_hash_handle *_hd_hh_del; \
if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
uthash_free((head)->hh.tbl->buckets, \
(head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
HASH_BLOOM_FREE((head)->hh.tbl); \
uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
head = NULL; \
} else { \
_hd_hh_del = &((delptr)->hh); \
if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
(head)->hh.tbl->tail = \
(UT_hash_handle*)((char*)((delptr)->hh.prev) + \
(head)->hh.tbl->hho); \
} \
if ((delptr)->hh.prev) { \
((UT_hash_handle*)((char*)((delptr)->hh.prev) + \
(head)->hh.tbl->hho))->next = (delptr)->hh.next; \
} else { \
DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
} \
if (_hd_hh_del->next) { \
((UT_hash_handle*)((char*)_hd_hh_del->next + \
(head)->hh.tbl->hho))->prev = \
_hd_hh_del->prev; \
} \
HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
(head)->hh.tbl->num_items--; \
} \
HASH_FSCK(hh,head); \
} while (0)
/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
#define HASH_FIND_STR(head,findstr,out) \
HASH_FIND(hh,head,findstr,strlen(findstr),out)
#define HASH_ADD_STR(head,strfield,add) \
HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
#define HASH_FIND_INT(head,findint,out) \
HASH_FIND(hh,head,findint,sizeof(int),out)
#define HASH_ADD_INT(head,intfield,add) \
HASH_ADD(hh,head,intfield,sizeof(int),add)
#define HASH_FIND_PTR(head,findptr,out) \
HASH_FIND(hh,head,findptr,sizeof(void *),out)
#define HASH_ADD_PTR(head,ptrfield,add) \
HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
#define HASH_DEL(head,delptr) \
HASH_DELETE(hh,head,delptr)
/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
* This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
*/
#ifdef HASH_DEBUG
#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
#define HASH_FSCK(hh,head) \
do { \
unsigned _bkt_i; \
unsigned _count, _bkt_count; \
char *_prev; \
struct UT_hash_handle *_thh; \
if (head) { \
_count = 0; \
for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
_bkt_count = 0; \
_thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
_prev = NULL; \
while (_thh) { \
if (_prev != (char*)(_thh->hh_prev)) { \
HASH_OOPS("invalid hh_prev %p, actual %p\n", \
_thh->hh_prev, _prev ); \
} \
_bkt_count++; \
_prev = (char*)(_thh); \
_thh = _thh->hh_next; \
} \
_count += _bkt_count; \
if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
HASH_OOPS("invalid bucket count %d, actual %d\n", \
(head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
} \
} \
if (_count != (head)->hh.tbl->num_items) { \
HASH_OOPS("invalid hh item count %d, actual %d\n", \
(head)->hh.tbl->num_items, _count ); \
} \
/* traverse hh in app order; check next/prev integrity, count */ \
_count = 0; \
_prev = NULL; \
_thh = &(head)->hh; \
while (_thh) { \
_count++; \
if (_prev !=(char*)(_thh->prev)) { \
HASH_OOPS("invalid prev %p, actual %p\n", \
_thh->prev, _prev ); \
} \
_prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
_thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
(head)->hh.tbl->hho) : NULL ); \
} \
if (_count != (head)->hh.tbl->num_items) { \
HASH_OOPS("invalid app item count %d, actual %d\n", \
(head)->hh.tbl->num_items, _count ); \
} \
} \
} while (0)
#else
#define HASH_FSCK(hh,head)
#endif
/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
* the descriptor to which this macro is defined for tuning the hash function.
* The app can #include <unistd.h> to get the prototype for write(2). */
#ifdef HASH_EMIT_KEYS
#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
do { \
unsigned _klen = fieldlen; \
write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
write(HASH_EMIT_KEYS, keyptr, fieldlen); \
} while (0)
#else
#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
#endif
/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
#ifdef HASH_FUNCTION
#define HASH_FCN HASH_FUNCTION
#else
#define HASH_FCN HASH_JEN
#endif
/* The Bernstein hash function, used in Perl prior to v5.6 */
#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
do { \
unsigned _hb_keylen=keylen; \
char *_hb_key=(char*)(key); \
(hashv) = 0; \
while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
bkt = (hashv) & (num_bkts-1); \
} while (0)
/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
do { \
unsigned _sx_i; \
char *_hs_key=(char*)(key); \
hashv = 0; \
for(_sx_i=0; _sx_i < keylen; _sx_i++) \
hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
bkt = hashv & (num_bkts-1); \
} while (0)
#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
do { \
unsigned _fn_i; \
char *_hf_key=(char*)(key); \
hashv = 2166136261UL; \
for(_fn_i=0; _fn_i < keylen; _fn_i++) \
hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
bkt = hashv & (num_bkts-1); \
} while(0);
#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
do { \
unsigned _ho_i; \
char *_ho_key=(char*)(key); \
hashv = 0; \
for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
hashv += _ho_key[_ho_i]; \
hashv += (hashv << 10); \
hashv ^= (hashv >> 6); \
} \
hashv += (hashv << 3); \
hashv ^= (hashv >> 11); \
hashv += (hashv << 15); \
bkt = hashv & (num_bkts-1); \
} while(0)
#define HASH_JEN_MIX(a,b,c) \
do { \
a -= b; a -= c; a ^= ( c >> 13 ); \
b -= c; b -= a; b ^= ( a << 8 ); \
c -= a; c -= b; c ^= ( b >> 13 ); \
a -= b; a -= c; a ^= ( c >> 12 ); \
b -= c; b -= a; b ^= ( a << 16 ); \
c -= a; c -= b; c ^= ( b >> 5 ); \
a -= b; a -= c; a ^= ( c >> 3 ); \
b -= c; b -= a; b ^= ( a << 10 ); \
c -= a; c -= b; c ^= ( b >> 15 ); \
} while (0)
#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
do { \
unsigned _hj_i,_hj_j,_hj_k; \
char *_hj_key=(char*)(key); \
hashv = 0xfeedbeef; \
_hj_i = _hj_j = 0x9e3779b9; \
_hj_k = keylen; \
while (_hj_k >= 12) { \
_hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
+ ( (unsigned)_hj_key[2] << 16 ) \
+ ( (unsigned)_hj_key[3] << 24 ) ); \
_hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
+ ( (unsigned)_hj_key[6] << 16 ) \
+ ( (unsigned)_hj_key[7] << 24 ) ); \
hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
+ ( (unsigned)_hj_key[10] << 16 ) \
+ ( (unsigned)_hj_key[11] << 24 ) ); \
\
HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
\
_hj_key += 12; \
_hj_k -= 12; \
} \
hashv += keylen; \
switch ( _hj_k ) { \
case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
case 5: _hj_j += _hj_key[4]; \
case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
case 1: _hj_i += _hj_key[0]; \
} \
HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
bkt = hashv & (num_bkts-1); \
} while(0)
/* The Paul Hsieh hash function */
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
|| defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif
#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
do { \
char *_sfh_key=(char*)(key); \
uint32_t _sfh_tmp, _sfh_len = keylen; \
\
int _sfh_rem = _sfh_len & 3; \
_sfh_len >>= 2; \
hashv = 0xcafebabe; \
\
/* Main loop */ \
for (;_sfh_len > 0; _sfh_len--) { \
hashv += get16bits (_sfh_key); \
_sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \
hashv = (hashv << 16) ^ _sfh_tmp; \
_sfh_key += 2*sizeof (uint16_t); \
hashv += hashv >> 11; \
} \
\
/* Handle end cases */ \
switch (_sfh_rem) { \
case 3: hashv += get16bits (_sfh_key); \
hashv ^= hashv << 16; \
hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \
hashv += hashv >> 11; \
break; \
case 2: hashv += get16bits (_sfh_key); \
hashv ^= hashv << 11; \
hashv += hashv >> 17; \
break; \
case 1: hashv += *_sfh_key; \
hashv ^= hashv << 10; \
hashv += hashv >> 1; \
} \
\
/* Force "avalanching" of final 127 bits */ \
hashv ^= hashv << 3; \
hashv += hashv >> 5; \
hashv ^= hashv << 4; \
hashv += hashv >> 17; \
hashv ^= hashv << 25; \
hashv += hashv >> 6; \
bkt = hashv & (num_bkts-1); \
} while(0);
#ifdef HASH_USING_NO_STRICT_ALIASING
/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
* For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
* MurmurHash uses the faster approach only on CPU's where we know it's safe.
*
* Note the preprocessor built-in defines can be emitted using:
*
* gcc -m64 -dM -E - < /dev/null (on gcc)
* cc -## a.c (where a.c is a simple test file) (Sun Studio)
*/
#if (defined(__i386__) || defined(__x86_64__))
#define MUR_GETBLOCK(p,i) p[i]
#else /* non intel */
#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
#else /* assume little endian non-intel */
#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
#endif
#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
(MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
(MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
MUR_ONE_THREE(p))))
#endif
#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
#define MUR_FMIX(_h) \
do { \
_h ^= _h >> 16; \
_h *= 0x85ebca6b; \
_h ^= _h >> 13; \
_h *= 0xc2b2ae35l; \
_h ^= _h >> 16; \
} while(0)
#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
do { \
const uint8_t *_mur_data = (const uint8_t*)(key); \
const int _mur_nblocks = (keylen) / 4; \
uint32_t _mur_h1 = 0xf88D5353; \
uint32_t _mur_c1 = 0xcc9e2d51; \
uint32_t _mur_c2 = 0x1b873593; \
const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
int _mur_i; \
for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
_mur_k1 *= _mur_c1; \
_mur_k1 = MUR_ROTL32(_mur_k1,15); \
_mur_k1 *= _mur_c2; \
\
_mur_h1 ^= _mur_k1; \
_mur_h1 = MUR_ROTL32(_mur_h1,13); \
_mur_h1 = _mur_h1*5+0xe6546b64; \
} \
const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
uint32_t _mur_k1=0; \
switch((keylen) & 3) { \
case 3: _mur_k1 ^= _mur_tail[2] << 16; \
case 2: _mur_k1 ^= _mur_tail[1] << 8; \
case 1: _mur_k1 ^= _mur_tail[0]; \
_mur_k1 *= _mur_c1; \
_mur_k1 = MUR_ROTL32(_mur_k1,15); \
_mur_k1 *= _mur_c2; \
_mur_h1 ^= _mur_k1; \
} \
_mur_h1 ^= (keylen); \
MUR_FMIX(_mur_h1); \
hashv = _mur_h1; \
bkt = hashv & (num_bkts-1); \
} while(0)
#endif /* HASH_USING_NO_STRICT_ALIASING */
/* key comparison function; return 0 if keys equal */
#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
/* iterate over items in a known bucket to find desired item */
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
do { \
if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
else out=NULL; \
while (out) { \
if (out->hh.keylen == keylen_in) { \
if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \
} \
if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \
else out = NULL; \
} \
} while(0)
/* add an item to a bucket */
#define HASH_ADD_TO_BKT(head,addhh) \
do { \
head.count++; \
(addhh)->hh_next = head.hh_head; \
(addhh)->hh_prev = NULL; \
if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
(head).hh_head=addhh; \
if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
&& (addhh)->tbl->noexpand != 1) { \
HASH_EXPAND_BUCKETS((addhh)->tbl); \
} \
} while(0)
/* remove an item from a given bucket */
#define HASH_DEL_IN_BKT(hh,head,hh_del) \
(head).count--; \
if ((head).hh_head == hh_del) { \
(head).hh_head = hh_del->hh_next; \
} \
if (hh_del->hh_prev) { \
hh_del->hh_prev->hh_next = hh_del->hh_next; \
} \
if (hh_del->hh_next) { \
hh_del->hh_next->hh_prev = hh_del->hh_prev; \
}
/* Bucket expansion has the effect of doubling the number of buckets
* and redistributing the items into the new buckets. Ideally the
* items will distribute more or less evenly into the new buckets
* (the extent to which this is true is a measure of the quality of
* the hash function as it applies to the key domain).
*
* With the items distributed into more buckets, the chain length
* (item count) in each bucket is reduced. Thus by expanding buckets
* the hash keeps a bound on the chain length. This bounded chain
* length is the essence of how a hash provides constant time lookup.
*
* The calculation of tbl->ideal_chain_maxlen below deserves some
* explanation. First, keep in mind that we're calculating the ideal
* maximum chain length based on the *new* (doubled) bucket count.
* In fractions this is just n/b (n=number of items,b=new num buckets).
* Since the ideal chain length is an integer, we want to calculate
* ceil(n/b). We don't depend on floating point arithmetic in this
* hash, so to calculate ceil(n/b) with integers we could write
*
* ceil(n/b) = (n/b) + ((n%b)?1:0)
*
* and in fact a previous version of this hash did just that.
* But now we have improved things a bit by recognizing that b is
* always a power of two. We keep its base 2 log handy (call it lb),
* so now we can write this with a bit shift and logical AND:
*
* ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
*
*/
#define HASH_EXPAND_BUCKETS(tbl) \
do { \
unsigned _he_bkt; \
unsigned _he_bkt_i; \
struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
_he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
memset(_he_new_buckets, 0, \
2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
tbl->ideal_chain_maxlen = \
(tbl->num_items >> (tbl->log2_num_buckets+1)) + \
((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
tbl->nonideal_items = 0; \
for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
{ \
_he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
while (_he_thh) { \
_he_hh_nxt = _he_thh->hh_next; \
HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
_he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
tbl->nonideal_items++; \
_he_newbkt->expand_mult = _he_newbkt->count / \
tbl->ideal_chain_maxlen; \
} \
_he_thh->hh_prev = NULL; \
_he_thh->hh_next = _he_newbkt->hh_head; \
if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
_he_thh; \
_he_newbkt->hh_head = _he_thh; \
_he_thh = _he_hh_nxt; \
} \
} \
uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
tbl->num_buckets *= 2; \
tbl->log2_num_buckets++; \
tbl->buckets = _he_new_buckets; \
tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
(tbl->ineff_expands+1) : 0; \
if (tbl->ineff_expands > 1) { \
tbl->noexpand=1; \
uthash_noexpand_fyi(tbl); \
} \
uthash_expand_fyi(tbl); \
} while(0)
/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
/* Note that HASH_SORT assumes the hash handle name to be hh.
* HASH_SRT was added to allow the hash handle name to be passed in. */
#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
#define HASH_SRT(hh,head,cmpfcn) \
do { \
unsigned _hs_i; \
unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
if (head) { \
_hs_insize = 1; \
_hs_looping = 1; \
_hs_list = &((head)->hh); \
while (_hs_looping) { \
_hs_p = _hs_list; \
_hs_list = NULL; \
_hs_tail = NULL; \
_hs_nmerges = 0; \
while (_hs_p) { \
_hs_nmerges++; \
_hs_q = _hs_p; \
_hs_psize = 0; \
for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
_hs_psize++; \
_hs_q = (UT_hash_handle*)((_hs_q->next) ? \
((void*)((char*)(_hs_q->next) + \
(head)->hh.tbl->hho)) : NULL); \
if (! (_hs_q) ) break; \
} \
_hs_qsize = _hs_insize; \
while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
if (_hs_psize == 0) { \
_hs_e = _hs_q; \
_hs_q = (UT_hash_handle*)((_hs_q->next) ? \
((void*)((char*)(_hs_q->next) + \
(head)->hh.tbl->hho)) : NULL); \
_hs_qsize--; \
} else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
_hs_e = _hs_p; \
_hs_p = (UT_hash_handle*)((_hs_p->next) ? \
((void*)((char*)(_hs_p->next) + \
(head)->hh.tbl->hho)) : NULL); \
_hs_psize--; \
} else if (( \
cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
) <= 0) { \
_hs_e = _hs_p; \
_hs_p = (UT_hash_handle*)((_hs_p->next) ? \
((void*)((char*)(_hs_p->next) + \
(head)->hh.tbl->hho)) : NULL); \
_hs_psize--; \
} else { \
_hs_e = _hs_q; \
_hs_q = (UT_hash_handle*)((_hs_q->next) ? \
((void*)((char*)(_hs_q->next) + \
(head)->hh.tbl->hho)) : NULL); \
_hs_qsize--; \
} \
if ( _hs_tail ) { \
_hs_tail->next = ((_hs_e) ? \
ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
} else { \
_hs_list = _hs_e; \
} \
_hs_e->prev = ((_hs_tail) ? \
ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
_hs_tail = _hs_e; \
} \
_hs_p = _hs_q; \
} \
_hs_tail->next = NULL; \
if ( _hs_nmerges <= 1 ) { \
_hs_looping=0; \
(head)->hh.tbl->tail = _hs_tail; \
DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
} \
_hs_insize *= 2; \
} \
HASH_FSCK(hh,head); \
} \
} while (0)
/* This function selects items from one hash into another hash.
* The end result is that the selected items have dual presence
* in both hashes. There is no copy of the items made; rather
* they are added into the new hash through a secondary hash
* hash handle that must be present in the structure. */
#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
do { \
unsigned _src_bkt, _dst_bkt; \
void *_last_elt=NULL, *_elt; \
UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
if (src) { \
for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
_src_hh; \
_src_hh = _src_hh->hh_next) { \
_elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
if (cond(_elt)) { \
_dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
_dst_hh->key = _src_hh->key; \
_dst_hh->keylen = _src_hh->keylen; \
_dst_hh->hashv = _src_hh->hashv; \
_dst_hh->prev = _last_elt; \
_dst_hh->next = NULL; \
if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
if (!dst) { \
DECLTYPE_ASSIGN(dst,_elt); \
HASH_MAKE_TABLE(hh_dst,dst); \
} else { \
_dst_hh->tbl = (dst)->hh_dst.tbl; \
} \
HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
(dst)->hh_dst.tbl->num_items++; \
_last_elt = _elt; \
_last_elt_hh = _dst_hh; \
} \
} \
} \
} \
HASH_FSCK(hh_dst,dst); \
} while (0)
#define HASH_CLEAR(hh,head) \
do { \
if (head) { \
uthash_free((head)->hh.tbl->buckets, \
(head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
HASH_BLOOM_FREE((head)->hh.tbl); \
uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
(head)=NULL; \
} \
} while(0)
#ifdef NO_DECLTYPE
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
#else
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
#endif
/* obtain a count of items in the hash */
#define HASH_COUNT(head) HASH_CNT(hh,head)
#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
typedef struct UT_hash_bucket {
struct UT_hash_handle *hh_head;
unsigned count;
/* expand_mult is normally set to 0. In this situation, the max chain length
* threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
* the bucket's chain exceeds this length, bucket expansion is triggered).
* However, setting expand_mult to a non-zero value delays bucket expansion
* (that would be triggered by additions to this particular bucket)
* until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
* (The multiplier is simply expand_mult+1). The whole idea of this
* multiplier is to reduce bucket expansions, since they are expensive, in
* situations where we know that a particular bucket tends to be overused.
* It is better to let its chain length grow to a longer yet-still-bounded
* value, than to do an O(n) bucket expansion too often.
*/
unsigned expand_mult;
} UT_hash_bucket;
/* random signature used only to find hash tables in external analysis */
#define HASH_SIGNATURE 0xa0111fe1
#define HASH_BLOOM_SIGNATURE 0xb12220f2
typedef struct UT_hash_table {
UT_hash_bucket *buckets;
unsigned num_buckets, log2_num_buckets;
unsigned num_items;
struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
/* in an ideal situation (all buckets used equally), no bucket would have
* more than ceil(#items/#buckets) items. that's the ideal chain length. */
unsigned ideal_chain_maxlen;
/* nonideal_items is the number of items in the hash whose chain position
* exceeds the ideal chain maxlen. these items pay the penalty for an uneven
* hash distribution; reaching them in a chain traversal takes >ideal steps */
unsigned nonideal_items;
/* ineffective expands occur when a bucket doubling was performed, but
* afterward, more than half the items in the hash had nonideal chain
* positions. If this happens on two consecutive expansions we inhibit any
* further expansion, as it's not helping; this happens when the hash
* function isn't a good fit for the key domain. When expansion is inhibited
* the hash will still work, albeit no longer in constant time. */
unsigned ineff_expands, noexpand;
uint32_t signature; /* used only to find hash tables in external analysis */
#ifdef HASH_BLOOM
uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
uint8_t *bloom_bv;
char bloom_nbits;
#endif
} UT_hash_table;
typedef struct UT_hash_handle {
struct UT_hash_table *tbl;
void *prev; /* prev element in app order */
void *next; /* next element in app order */
struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
struct UT_hash_handle *hh_next; /* next hh in bucket order */
void *key; /* ptr to enclosing struct's key */
unsigned keylen; /* enclosing struct's key len */
unsigned hashv; /* result of hash-fcn(key) */
} UT_hash_handle;
#endif /* UTHASH_H */