1
1

Merge pull request #1495 from hjelmn/new_hooks

Add new patcher memory hooks
This commit is contained in:
Nathan Hjelm 2016-04-13 18:19:23 -06:00
commit 1e6b4f2f55
74 changed files with 2356 additions and 11349 deletions

View File

@ -283,7 +283,8 @@ else
OPAL_ENABLE_DLOPEN_SUPPORT=1
AC_MSG_RESULT([yes])
fi
AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT,
[Whether we want to enable dlopen support])
#
# Heterogeneous support

View File

@ -1,4 +1,3 @@
enable_dlopen=no
enable_mem_profile=no
enable_binaries=yes
enable_heterogeneous=no
@ -33,8 +32,5 @@ enable_mca_direct=pml-ob1
# enable development headers
with_devel_headers=yes
# enable ptmalloc (enables lazy deregistration)
with_memory_manager=linux
# disable valgrind
with_valgrind=no

View File

@ -1,5 +1,3 @@
enable_dlopen=no
enable_mem_profile=no
enable_binaries=yes
@ -40,8 +38,5 @@ enable_mca_direct=pml-ob1
# enable development headers
with_devel_headers=yes
# enable ptmalloc (enables lazy deregistration)
with_memory_manager=linux
# disable valgrind
with_valgrind=no

View File

@ -1,7 +1,6 @@
# (c) 2013 Los Alamos National Security, LLC. All rights reserved.
# Open MPI common configuration for TOSS/TOSS2 v1.7.x/1.8.x
enable_dlopen=no
enable_binaries=yes
enable_heterogeneous=no
enable_shared=yes

View File

@ -98,15 +98,13 @@
#endif
#include "ompi/runtime/ompi_cr.h"
#if defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2
#include "opal/mca/memory/linux/memory_linux.h"
#include "opal/mca/memory/base/base.h"
/* So this sucks, but with OPAL in its own library that is brought in
implicity from libmpi, there are times when the malloc initialize
hook in the memory component doesn't work. So we have to do it
from here, since any MPI code is going to call MPI_Init... */
OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) =
opal_memory_linux_malloc_init_hook;
#endif /* defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 */
opal_memory_base_malloc_init_hook;
/* This is required for the boundaries of the hash tables used to store
* the F90 types returned by the MPI_Type_create_f90_XXX functions.

View File

@ -4,3 +4,4 @@ opal_show_help_yyleng
opal_show_help_yytext
opal_util_keyval_yyleng
opal_util_keyval_yytext
__curbrk

View File

@ -2562,39 +2562,6 @@ btl_openib_component_init(int *num_btl_modules,
goto no_btls;
}
/* If we are using ptmalloc2 and there are no posix threads
available, this will cause memory corruption. Refuse to run.
Right now, ptmalloc2 is the only memory manager that we have on
OS's that support OpenFabrics that provide both FREE and MUNMAP
support, so the following test is [currently] good enough... */
value = opal_mem_hooks_support_level();
/* If we have a memory manager available, and
opal_leave_pinned==-1, then unless the user explicitly set
opal_leave_pinned_pipeline==0, then set opal_leave_pinned to 1.
We have a memory manager if we have both FREE and MUNMAP
support */
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
if (0 == opal_leave_pinned_pipeline &&
-1 == opal_leave_pinned) {
opal_leave_pinned = 1;
}
} else {
opal_leave_pinned = 0;
opal_leave_pinned_pipeline = 0;
}
#if OPAL_CUDA_SUPPORT
if (mca_btl_openib_component.cuda_want_gdr && (0 == opal_leave_pinned)) {
opal_show_help("help-mpi-btl-openib.txt",
"CUDA_gdr_and_nopinned", true,
opal_process_info.nodename);
goto no_btls;
}
#endif /* OPAL_CUDA_SUPPORT */
index = mca_base_var_find("ompi", "btl", "openib", "max_inline_data");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_var_get_value(index, NULL, &source, NULL)) {
@ -2931,6 +2898,22 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.if_exclude_list = NULL;
}
/* If we are using ptmalloc2 and there are no posix threads
available, this will cause memory corruption. Refuse to run.
Right now, ptmalloc2 is the only memory manager that we have on
OS's that support OpenFabrics that provide both FREE and MUNMAP
support, so the following test is [currently] good enough... */
value = opal_mem_hooks_support_level();
#if OPAL_CUDA_SUPPORT
if (mca_btl_openib_component.cuda_want_gdr && (0 == opal_leave_pinned)) {
opal_show_help("help-mpi-btl-openib.txt",
"CUDA_gdr_and_nopinned", true,
opal_process_info.nodename);
goto no_btls;
}
#endif /* OPAL_CUDA_SUPPORT */
mca_btl_openib_component.memory_registration_verbose = opal_output_open(NULL);
opal_output_set_verbosity (mca_btl_openib_component.memory_registration_verbose,
mca_btl_openib_component.memory_registration_verbose_level);

View File

@ -331,26 +331,6 @@ btl_ugni_component_close(void)
return OPAL_SUCCESS;
}
static void mca_btl_ugni_autoset_leave_pinned (void) {
if (MCA_BTL_UGNI_RCACHE_UDREG != mca_btl_ugni_component.rcache_type) {
int value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
/* Set leave pinned to 1 if leave pinned pipeline is not set */
if (-1 == opal_leave_pinned) {
opal_leave_pinned = !opal_leave_pinned_pipeline;
}
} else {
opal_leave_pinned = 0;
opal_leave_pinned_pipeline = 0;
}
} else if (-1 == opal_leave_pinned) {
/* if udreg is in use we can set leave pinned without checking for the
* memory hooks. */
opal_leave_pinned = !opal_leave_pinned_pipeline;
}
}
static mca_btl_base_module_t **
mca_btl_ugni_component_init (int *num_btl_modules,
bool enable_progress_threads,
@ -409,8 +389,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
}
}
mca_btl_ugni_autoset_leave_pinned ();
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {

View File

@ -32,5 +32,7 @@ BEGIN_C_DECLS
*/
OPAL_DECLSPEC extern mca_base_framework_t opal_memory_base_framework;
OPAL_DECLSPEC void opal_memory_base_malloc_init_hook (void);
END_C_DECLS
#endif /* OPAL_BASE_MEMORY_H */

View File

@ -13,6 +13,8 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -43,20 +45,22 @@ static int empty_process(void)
return OPAL_SUCCESS;
}
static int empty_query (int *priority)
{
*priority = 0;
return OPAL_SUCCESS;
}
/*
* Local variables
*/
static opal_memory_base_component_2_0_0_t empty_component = {
/* Don't care about the version info */
{ 0, },
/* Don't care about the data */
{ 0, },
/* Empty / safe functions to call if no memory componet is selected */
empty_process,
opal_memory_base_component_register_empty,
opal_memory_base_component_deregister_empty,
opal_memory_base_component_set_alignment_empty,
.memoryc_query = empty_query,
.memoryc_process = empty_process,
.memoryc_register = opal_memory_base_component_register_empty,
.memoryc_deregister = opal_memory_base_component_deregister_empty,
.memoryc_set_alignment = opal_memory_base_component_set_alignment_empty,
};
@ -66,6 +70,12 @@ static opal_memory_base_component_2_0_0_t empty_component = {
opal_memory_base_component_2_0_0_t *opal_memory = &empty_component;
void opal_memory_base_malloc_init_hook (void)
{
if (opal_memory->memoryc_init_hook) {
opal_memory->memoryc_init_hook ();
}
}
/*
* Function for finding and opening either all MCA components, or the one
@ -73,23 +83,36 @@ opal_memory_base_component_2_0_0_t *opal_memory = &empty_component;
*/
static int opal_memory_base_open(mca_base_open_flag_t flags)
{
mca_base_component_list_item_t *item, *next;
opal_memory_base_component_2_0_0_t *tmp;
int priority, highest_priority = 0;
int ret;
/* Open up all available components */
/* can only be zero or one */
OPAL_LIST_FOREACH(item, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) {
tmp = (opal_memory_base_component_2_0_0_t *) item->cli_component;
ret = tmp->memoryc_query (&priority);
if (OPAL_SUCCESS != ret || priority < highest_priority) {
continue;
}
highest_priority = priority;
opal_memory = tmp;
}
OPAL_LIST_FOREACH_SAFE(item, next, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) {
if ((void *) opal_memory != (void *) item->cli_component) {
mca_base_component_unload (item->cli_component, opal_memory_base_framework.framework_output);
opal_list_remove_item (&opal_memory_base_framework.framework_components, &item->super);
}
}
/* open remaining component */
ret = mca_base_framework_components_open (&opal_memory_base_framework, flags);
if (ret != OPAL_SUCCESS) {
return ret;
}
/* can only be zero or one */
if (opal_list_get_size(&opal_memory_base_framework.framework_components) == 1) {
mca_base_component_list_item_t *item;
item = (mca_base_component_list_item_t*)
opal_list_get_first(&opal_memory_base_framework.framework_components);
opal_memory = (opal_memory_base_component_2_0_0_t*)
item->cli_component;
}
/* All done */
return OPAL_SUCCESS;
}

View File

@ -1,19 +0,0 @@
Copyright (c) 2001-2004 Wolfram Gloger
Permission to use, copy, modify, distribute, and sell this software
and its documentation for any purpose is hereby granted without fee,
provided that (i) the above copyright notices and this permission
notice appear in all copies of the software and related documentation,
and (ii) the name of Wolfram Gloger may not be used in any advertising
or publicity relating to the software.
THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

View File

@ -1,181 +0,0 @@
2004-11-05 Wolfram Gloger <wg@malloc.de>
* malloc/hooks.c (malloc_starter, memalign_starter): Call
ptmalloc_init_minimal().
2004-11-04 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c (USE_STARTER): New macro.
* malloc/hooks.c: Use USE_STARTER.
* malloc/arena.c: Use USE_STARTER.
2004-08-13 Ulrich Drepper <drepper@redhat.com>
* malloc/malloc.c: Use strong_alias instead of weak_alias wherever
possible.
2002-12-06 Roland McGrath <roland@redhat.com>
* malloc/arena.c (ptmalloc_init_minimal): New function, broken out
of ptmalloc_init.
2002-08-23 Roland McGrath <roland@redhat.com>
* malloc/hooks.c (__malloc_initialize_hook, __free_hook,
__malloc_hook, __realloc_hook, __memalign_hook,
__after_morecore_hook): Variable definitions moved to ...
* malloc/malloc.c: ... here, so as to be before all references.
2004-10-19 Wolfram Gloger <wg@malloc.de>
* malloc/hooks.c (mem2chunk_check, top_check): Handle
non-contiguous arena. Reported by Michael Dalton
<mwdalton@stanford.edu> [BZ #457]. Add further checks for top
chunk.
2004-08-08 Wolfram Gloger <wg@malloc.de>
* include/malloc.h (mstate): Move type declaration from here...
* malloc/malloc.h: ...to here.
(struct malloc_arena_info, struct malloc_global_info): New types.
(_int_get_arena, _int_get_arena_info, _int_get_global_info): New
functions.
* malloc/malloc.c (mSTATS, public_mSTATs, mALLINFo): Remove.
(_int_get_arena_info, _int_get_global_info): New functions.
* malloc/arena.c (_int_get_arena): New function.
* malloc/malloc-stats.c: New file.
* malloc/tst-mstats.c: New file.
* malloc/Makefile (tests): Add tst-mstats.
(distribute): Remove no-longer existing thread-m.h.
(dist-routines): Add malloc-stats.
* malloc/Versions: Add _int_get_arena, _int_get_arena_info,
_int_get_global_info.
2004-07-25 Wolfram Gloger <wg@malloc.de>
* sysdeps/generic/thread-st.h: New file.
* sysdeps/pthread/thread-st.h: New file.
* sysdeps/sproc/thread-st.h: New file.
* sysdeps/solaris/thread-st.h: New file.
* thread-st.h: Removed.
2004-03-18 Ulrich Drepper <drepper@redhat.com>
* malloc/malloc.c (__posix_memalign): Correct alignment check.
Reported by Don Heller <dheller@cse.psu.edu>.
2003-12-17 Jakub Jelinek <jakub@redhat.com>
* malloc/malloc.c (__posix_memalign): If __memalign_hook != NULL,
call it directly instead of memalign_internal.
2003-09-27 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c: Include <malloc-machine.h> earlier instead of
"thread-m.h", so that default parameters can be overridden in a
system-specific malloc-machine.h. Remove extra ; from extern "C"
closing brace.
* sysdeps/generic/malloc-machine.h: New file.
* malloc/thread-m.h: Removed.
2003-09-08 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c (sYSMALLOc): Move foreign sbrk accounting into
contiguous case. Bug report from Prem Gopalan
<prem@mazunetworks.com>.
2003-08-18 Art Haas <ahaas@airmail.net>
* malloc/malloc.h: Remove unneeded ';' where closing the C++
extern block.
2003-06-18 Ulrich Drepper <drepper@redhat.com>
* malloc/malloc.c (public_mALLINFo): Initialize malloc if it
hasn't happened yet.
2003-05-28 Roland McGrath <roland@redhat.com>
* malloc/malloc.h [! __GNUC__] (__const): Define if undefined.
2003-05-04 H.J. Lu <hongjiu.lu@intel.com>
* malloc/arena.c (arena_get2): Add atomic_write_barrier.
* malloc/thread-m.h: Include <atomic.h>.
(atomic_full_barrier): Provide default.
(atomic_read_barrier): Likewise.
(atomic_write_barrier): Likewise.
2003-05-01 Ulrich Drepper <drepper@redhat.com>
* malloc/malloc.c (mSTATs): Call ptmalloc_init if necessary.
2003-01-27 Wolfram Gloger <wg@malloc.de>
* malloc/hooks.c (mem2chunk_check): Check alignment of mem
pointer, not of the computed chunk. Bug report from Carlos
O'Donell <carlos@baldric.uwo.ca>.
2002-12-27 Jakub Jelinek <jakub@redhat.com>
* malloc/arena.c (ptmalloc_init): Don't call next_env_entry if
_environ is NULL.
2002-12-17 Ulrich Drepper <drepper@redhat.com>
* malloc/malloc.c (mALLOPt): Make sure malloc is initialized.
2002-12-06 Roland McGrath <roland@redhat.com>
* malloc/hooks.c [_LIBC && (USE___THREAD || (USE_TLS && !SHARED))]
(malloc_starter, memalign_starter, free_starter): Don't define these.
* malloc/hooks.c (memalign_starter): New function.
* malloc/malloc.c: Declare it.
* malloc/arena.c (save_memalign_hook): New variable.
(ptmalloc_init): Set __memalign_hook to memalign_starter.
2002-11-18 Wolfram Gloger <wg@malloc.de>
* malloc/arena.c
(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2): Do
nothing if not initialized. Bug report from Marcus Brinkmann
<Marcus.Brinkmann@ruhr-uni-bochum.de>.
2002-10-07 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c (sYSMALLOc): Only check for breakage due
to foreign sbrk()'s if arena is contiguous. Bug report from
Bruno Haible <bruno@clisp.org>.
2002-07-11 Wolfram Gloger <wmglo@dent.med.uni-muenchen.de>
* malloc/hooks.c: typo fix in NO_THREADS case, realloc_check
fix in HAVE_MREMAP case.
2002-06-11 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c: Fix error path when new_heap() returns NULL.
Reported by Michael Meissner <meissner@redhat.com>.
2002-03-29 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c: Add short description and prototypes for
malloc_get_state, malloc_set_state and posix_memalign, for
consistency and to avoid warnings with -Wstrict-prototypes.
Reported by Andreas Jaeger <aj@suse.de>.
2002-03-13 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c (sYSMALLOc): Don't change brk if mmap
failed.
2002-01-18 Wolfram Gloger <wg@malloc.de>
* malloc/malloc.c: Rewrite, adapted from Doug Lea's malloc-2.7.0.c.
* malloc/malloc.h: Likewise.
* malloc/arena.c: New file.
* malloc/hooks.c: New file.
* malloc/tst-mallocstate.c: New file.
* malloc/Makefile: Add new testcase tst-mallocstate.
Add arena.c and hooks.c to distribute. Fix commented CPPFLAGS.

View File

@ -1,92 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2015 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = -DMALLOC_DEBUG=0
AM_CPPFLAGS += \
-D_GNU_SOURCE=1 \
-DMALLOC_HOOKS=1 \
-I$(srcdir)/sysdeps/pthread
# this must come *after* the threads -Is
AM_CPPFLAGS += -I$(srcdir)/sysdeps/generic
# ptmalloc requires us to include the copyright notice in the
# software. So install it in the same place that we install ROMIO's
# copyright notices.
docdir = $(opaldatadir)/doc
doc_DATA = COPYRIGHT-ptmalloc2.txt
# Help file
dist_opaldata_DATA = help-opal-memory-linux.txt
# This component is only ever built statically (i.e., slurped into
# libopen-pal) -- it is never built as a DSO.
noinst_LTLIBRARIES = libmca_memory_linux.la
libmca_memory_linux_la_SOURCES = \
memory_linux.h \
memory_linux_component.c
libmca_memory_linux_la_LDFLAGS = \
-module -avoid-version $(memory_linux_LDFLAGS)
libmca_memory_linux_la_LIBADD = $(memory_linux_LIBS)
# Do we have ptmalloc2 support?
if MEMORY_LINUX_PTMALLOC2
libmca_memory_linux_la_SOURCES += \
memory_linux_ptmalloc2.c \
memory_linux_munmap.c \
rename.h \
malloc.c \
malloc-stats.c \
malloc.h
endif
# Do we have ummunotify support?
if MEMORY_LINUX_UMMUNOTIFY
libmca_memory_linux_la_SOURCES += memory_linux_ummunotify.c public.h
endif
# these are included directly and shouldn't be built solo
EXTRA_libmca_memory_linux_la_SOURCES = \
arena.c \
hooks.c
EXTRA_DIST = \
README-open-mpi.txt \
README-ptmalloc2.txt \
ChangeLog-ptmalloc2.txt \
COPYRIGHT-ptmalloc2.txt \
lran2.h \
t-test.h \
t-test1.c \
t-test2.c \
tst-mallocstate.c \
tst-mstats.c \
sysdeps/sproc/malloc-machine.h \
sysdeps/sproc/thread-st.h \
sysdeps/pthread/malloc-machine.h \
sysdeps/pthread/thread-st.h \
sysdeps/solaris/malloc-machine.h \
sysdeps/solaris/thread-st.h \
sysdeps/generic/malloc-machine.h \
sysdeps/generic/thread-st.h \
sysdeps/generic/atomic.h \
$(doc_DATA)

View File

@ -1,161 +0,0 @@
30 March 2009
This file documents Open MPI's usage of ptmalloc2. This is perhaps
our 7,208,499th iteration of ptmalloc2 support, so let's document it
here so that some future developer might spend *slightly* less time
understanding what the heck is going on.
See glibc documentation about malloc hooks before continuing. This is
pretty much required reading before reading the rest of this file /
having a hope of understanding what's going on here:
http://www.gnu.org/software/libc/manual/html_mono/libc.html#Hooks-for-Malloc
The overall goal is that we're using the Linux glibc hooks to wholly
replace the underlying allocator. We *used* to use horrid linker
tricks to interpose OMPI's ptmalloc2 symbols with the glibc ones --
meaning that user apps would call our symbols and not the glibc ones.
But that scheme is fraught with problems, not the least of which is
that *all* MPI applications will be forced to use our overridden
allocator (not just the ones that need it, such as the ones running on
OpenFabrics-based networks). Instead, what we do here is, frankly,
quite similar to what is done in MX: we use the 4 glibc hooks to
assert our own malloc, realloc, free, and memalign functions. This
allows the decision as to whether to use this internal ptmalloc2
allocate to be a run-time decision. This is quite important; using
this internal allocator has both benefits (allowing using
mpi_leave_pinned=1 behavior) and drawbacks (breaking some debuggers,
being unnecessary for non-OpenFabrics-based networks, etc.).
Here's how it works...
This component *must* be linked statically as part of libopen-pal; it
*cannot* be a DSO. Specifically, this library must be present during
pre-main() initialization phases so that its __malloc_initialize_hook
can be found and executed. Loading it as a DSO during MPI_INIT is far
too late. In configure.m4, we define the M4 macro
MCA_memory_ptmalloc2_COMPILE_MODE to always compile this component in
static mode. Yay flexible build system.
This component provides an munmap() function that will intercept calls
to munmap() and do the Right Thing. That is fairly straightforward to
do. Intercepting the malloc/free/etc. allocator is much more
complicated.
All the ptmalloc2 public symbols in this component have been name
shifted via the rename.h file. Hence, what used to be "malloc" is now
opal_memory_ptmalloc2_malloc. Since all the public symbols are
name-shifted, we can safely link this component in all MPI
applications. Specifically: just because this ptmalloc2 allocator is
present in all OMPI executables and user-level applications, it won't
necessarily be used -- it's a separate/run-time decision as to whether
it will be used.
We set the __malloc_initialize_hook variable to point to
opal_memory_ptmalloc2_malloc_init_hook (in hooks.c). This function is
called by the underlying glibc allocator before any allocations occur
and before the memory allocation subsystem is setup. As such, this
function is *extremely* restricted in what it can do. It cannot call
any form of malloc, for example (which seems fairly obvious, but it's
worth mentioning :-) ). This function is one of the determining
steps as to whether we'll use the internal ptmalloc2 allocator or
not. Several checks are performed:
- Was either the MCA params mpi_leave_pinned or
mpi_leave_pinned_pipeline set?
- Is a driver found to be active indicating that an OS-bypass network
is in effect (OpenFabrics, MX, Open-MX, ...etc.)
- Was an environment variable set indicating that we want to disable
this component?
If the $OMPI_MCA_memory_ptmalloc2_disable or the $FAKEROOTKEY env
variables are set, we don't enable the memory hooks.
We then use the following matrix to determine whether to enable the
memory hooks or not (explanation of the matrix is below):
lp / lpp yes no runtime not found
yes yes yes yes yes
no yes no no no
runtime yes no runtime runtime
not found yes no runtime runtime
lp = leave_pinned (the rows), lpp = leave_pinned_pipeline (the columns)
yes = found that variable to be set to "yes" (i.e., 1)
no = found that variable to be set to "no" (i.e., 0)
runtime = found that variable to be set to "determine at runtime" (i.e., -1)
not found = that variable was not set at all
Hence, if we end up on a "yes" block in the matrix, we enable the
hooks. If we end up in a "no" block in the matrix, we disable the
hooks. If we end up in a "runtime" block in the matrix, then we
enable the hooks *if* we can find indications that an OS bypass
network is present and available for use (e.g., OpenFabrics, MX,
Open-MX, ...etc.).
To be clear: sometime during process startup, this function will
definitely be called. It will either set the 4 hook functions to
point to our name-shifted ptmalloc2 functions, or it won't. If the 4
hook functions are set, then the underlying glibc allocator will
always call our 4 functions in all the relevant places instead of
calling its own functions. Specifically: the process is calling the
underlying glibc allocator, but that underlying glibc allocator will
make function pointer callbacks to our name-shifted ptmalloc2
functions to actually do the work.
Note that because we know our ptmalloc will not be providing all 5
hook variables (because we want to use the underlying glibc hook
variables), they are #if 0'ed out in our malloc.c. This has the
direct consequence that the *_hook_ini() in hooks.c are never used.
So to avoid compiler/linker warnings, I #if 0'ed those out as well.
All the public functions in malloc.c that call hook functions were
modified to #if 0 the hook function invocations. After all, that's
something that we want the *underlying* glibc allocator to do -- but
we are putting these functions as the hooks, so we don't want to
invoke ourselves in an infinite loop!
The next thing that happens in the startup sequence is that the
ptmalloc2 memory component's "open" function is called during
MPI_INIT. But we need to test to see if the glibc memory hooks have
been overridden before MPI_INIT was invoked. If so, we need to signal
that our allocator support may not be complete.
Patrick Geoffray/MX suggests a simple test: malloc() 4MB and then free
it. Watch to see if our name-shifted ptmalloc2 free() function was
invoked. If it was, then all of our hooks are probably in place and
we can proceed. If not, then set flags indicating that this memory
allocator only supports MUNMAP (not FREE/CHUNK).
We actually perform this test for malloc, realloc, and memalign. If
they all pass, then we say that the memory allocator supports
everything. If any of them fail, then we say that the memory
allocator does not support FREE/CHUNK.
NOTE: we *used* to simply set the FREE/CHUNK support flags during our
ptmalloc2's internal ptmalloc_init() function. This is not a good
idea becaus even after our ptmalloc_init() function has been invoked,
someone may come in an override our memory hooks. Doing tests during
the ptmalloc2 memory component's open function seems to be the safest
way to test whether we *actually* support FREE/CHUNK (this is what MX
does, too).
As stated above, we always intercept munmap() -- this is acceptable in
all environments. But we test that, too, just to be sure that the
munmap intercept is working. If we verify that it is working
properly, then we set that we have MUNMAP support.
Much later in the init sequence during MPI_INIT, components indicate
whether they want to use mpi_leave_pinned[_pipeline] support or not.
For example, the openib BTL queries the opal_mem_hooks_support_level()
function to see if FREE and MUNMAP are supported. If they are, then
the openib BTL sets mpi_leave_pinned = 1.
Finally, the mpool base does a final check. If
mpi_leave_pinned[_pipeline] is set to 1 and/or use_mem_hooks is set,
if FREE/MUNMAP are not set in the supported flags, then a warning is
printed. Otherwise, life continues (assumedly using
mpi_leave_pinned[_pipeline] support).
Simple, right?

View File

@ -1,192 +0,0 @@
ptmalloc2 - a multi-thread malloc implementation
================================================
Wolfram Gloger (wg@malloc.de)
Nov 2004
Introduction
============
This package is a modified version of Doug Lea's malloc-2.7.1pre
implementation (available seperately from ftp://g.oswego.edu/pub/misc)
that I adapted for multiple threads, while trying to avoid lock
contention as much as possible. Many thanks should go to Doug Lea
(dl@cs.oswego.edu) for the great original malloc implementation.
As part of the GNU C library, the source files are available under the
GNU Library General Public License (see the comments in the files).
But as part of this stand-alone package, the code is also available
under the (probably less restrictive) conditions described in the file
'COPYRIGHT'. In any case, there is no warranty whatsoever for this
package.
The current distribution should be available from:
http://www.malloc.de/malloc/ptmalloc2.tar.gz
Compilation
===========
It should be possible to build ptmalloc2 on any UN*X-like system that
implements the sbrk(), mmap(), munmap() and mprotect() calls. If
mmap() is not available, it is only possible to produce a
non-threadsafe implementation. Since there are now several source
files, a library (libmalloc.a) is generated. See the Makefile for
examples of the compile-time options.
Note that support for non-ANSI compilers is no longer a significant
goal.
Several example targets are provided in the Makefile:
o Posix threads (pthreads), compile with "make posix"
o Posix threads with explicit initialization, compile with
"make posix-explicit" (known to be required on HPUX)
o Posix threads without "tsd data hack" (see below), compile with
"make posix-with-tsd"
o Solaris threads, compile with "make solaris"
o SGI sproc() threads, compile with "make sproc"
o no threads, compile with "make nothreads"
For Linux:
o make "linux-pthread" (almost the same as "make posix")
Note that some compilers need special flags for multi-threaded code,
e.g. with Solaris cc with Posix threads, one should use:
% make posix SYS_FLAGS='-mt'
Some additional targets, ending in `-libc', are also provided in the
Makefile, to compare performance of the test programs to the case when
linking with the standard malloc implementation in libc.
A potential problem remains: If any of the system-specific functions
for getting/setting thread-specific data or for locking a mutex call
one of the malloc-related functions internally, the implementation
cannot work at all due to infinite recursion. One example seems to be
Solaris 2.4. I would like to hear if this problem occurs on other
systems, and whether similar workarounds could be applied.
For Posix threads, too, an optional hack like that has been integrated
(activated when defining USE_TSD_DATA_HACK) which depends on
`pthread_t' being convertible to an integral type (which is of course
not generally guaranteed). USE_TSD_DATA_HACK is now the default
because I haven't yet found a non-glibc pthreads system where this
hack is _not_ needed.
*NEW* and _important_: In (currently) one place in the ptmalloc2
source, a write memory barrier is needed, named
atomic_write_barrier(). This macro needs to be defined at the end of
malloc-machine.h. For gcc, a fallback in the form of a full memory
barrier is already defined, but you may need to add another definition
if you don't use gcc.
Usage
=====
Just link libmalloc.a into your application.
Some wicked systems (e.g. HPUX apparently) won't let malloc call _any_
thread-related functions before main(). On these systems,
USE_STARTER=2 must be defined during compilation (see "make
posix-explicit" above) and the global initialization function
ptmalloc_init() must be called explitly, preferably at the start of
main().
Otherwise, when using ptmalloc2, no special precautions are necessary.
Link order is important
=======================
On some systems, when overriding malloc and linking against shared
libraries, the link order becomes very important. E.g., when linking
C++ programs on Solaris, don't rely on libC being included by default,
but instead put `-lthread' behind `-lC' on the command line:
CC ... libmalloc.a -lC -lthread
This is because there are global constructors in libC that need
malloc/ptmalloc, which in turn needs to have the thread library to be
already initialized.
Debugging hooks
===============
All calls to malloc(), realloc(), free() and memalign() are routed
through the global function pointers __malloc_hook, __realloc_hook,
__free_hook and __memalign_hook if they are not NULL (see the malloc.h
header file for declarations of these pointers). Therefore the malloc
implementation can be changed at runtime, if care is taken not to call
free() or realloc() on pointers obtained with a different
implementation than the one currently in effect. (The easiest way to
guarantee this is to set up the hooks before any malloc call, e.g.
with a function pointed to by the global variable
__malloc_initialize_hook).
A useful application of the hooks is built-in into ptmalloc2: The
implementation is usually very unforgiving with respect to misuse,
such as free()ing a pointer twice or free()ing a pointer not obtained
with malloc() (these will typically crash the application
immediately). To debug in such situations, you can set the
environment variable `MALLOC_CHECK_' (note the trailing underscore).
Performance will suffer somewhat, but you will get more controlled
behaviour in the case of misuse. If MALLOC_CHECK_=0, wrong free()s
will be silently ignored, if MALLOC_CHECK_=1, diagnostics will be
printed on stderr, and if MALLOC_CHECK_=2, abort() will be called on
any error.
You can now also tune other malloc parameters (normally adjused via
mallopt() calls from the application) with environment variables:
MALLOC_TRIM_THRESHOLD_ for deciding to shrink the heap (in bytes)
MALLOC_TOP_PAD_ how much extra memory to allocate on
each system call (in bytes)
MALLOC_MMAP_THRESHOLD_ min. size for chunks allocated via
mmap() (in bytes)
MALLOC_MMAP_MAX_ max. number of mmapped regions to use
Tests
=====
Two testing applications, t-test1 and t-test2, are included in this
source distribution. Both perform pseudo-random sequences of
allocations/frees, and can be given numeric arguments (all arguments
are optional):
% t-test[12] <n-total> <n-parallel> <n-allocs> <size-max> <bins>
n-total = total number of threads executed (default 10)
n-parallel = number of threads running in parallel (2)
n-allocs = number of malloc()'s / free()'s per thread (10000)
size-max = max. size requested with malloc() in bytes (10000)
bins = number of bins to maintain
The first test `t-test1' maintains a completely seperate pool of
allocated bins for each thread, and should therefore show full
parallelism. On the other hand, `t-test2' creates only a single pool
of bins, and each thread randomly allocates/frees any bin. Some lock
contention is to be expected in this case, as the threads frequently
cross each others arena.
Performance results from t-test1 should be quite repeatable, while the
behaviour of t-test2 depends on scheduling variations.
Conclusion
==========
I'm always interested in performance data and feedback, just send mail
to ptmalloc@malloc.de.
Good luck!

View File

@ -1,805 +0,0 @@
/* Malloc implementation for multiple threads without lock contention.
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* $Id: arena.c,v 1.9 2004/11/05 14:42:23 wg Exp $ */
/* Compile-time constants. */
#define HEAP_MIN_SIZE (32*1024)
#ifndef HEAP_MAX_SIZE
#define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */
#endif
/* HEAP_MIN_SIZE and HEAP_MAX_SIZE limit the size of mmap()ed heaps
that are dynamically created for multi-threaded programs. The
maximum size must be a power of two, for fast determination of
which heap belongs to a chunk. It should be much larger than the
mmap threshold, so that requests with a size just below that
threshold can be fulfilled without creating too many heaps. */
#ifndef THREAD_STATS
#define THREAD_STATS 0
#endif
/* If THREAD_STATS is non-zero, some statistics on mutex locking are
computed. */
/***************************************************************************/
#define top(ar_ptr) ((ar_ptr)->top)
/* A heap is a single contiguous memory region holding (coalesceable)
malloc_chunks. It is allocated with mmap() and always starts at an
address aligned to HEAP_MAX_SIZE. Not used unless compiling with
USE_ARENAS. */
typedef struct _heap_info {
mstate ar_ptr; /* Arena for this heap. */
struct _heap_info *prev; /* Previous heap. */
size_t size; /* Current size in bytes. */
size_t pad; /* Make sure the following data is properly aligned. */
} heap_info;
/* Thread specific data */
static tsd_key_t arena_key;
static mutex_t list_lock;
#if THREAD_STATS
static int stat_n_heaps;
#define THREAD_STAT(x) x
#else
#define THREAD_STAT(x) do ; while(0)
#endif
/* Mapped memory in non-main arenas (reliable only for NO_THREADS). */
static unsigned long arena_mem;
/* Already initialized? */
int __malloc_initialized = -1;
/**************************************************************************/
#if USE_ARENAS
/* arena_get() acquires an arena and locks the corresponding mutex.
First, try the one last locked successfully by this thread. (This
is the common case and handled with a macro for speed.) Then, loop
once over the circularly linked list of arenas. If no arena is
readily available, create a new one. In this latter case, `size'
is just a hint as to how much memory will be required immediately
in the new arena. */
#define arena_get(ptr, size) do { \
Void_t *vptr = NULL; \
ptr = (mstate)tsd_getspecific(arena_key, vptr); \
if(ptr && !mutex_trylock(&ptr->mutex)) { \
THREAD_STAT(++(ptr->stat_lock_direct)); \
} else \
ptr = arena_get2(ptr, (size)); \
} while(0)
/* find the heap and corresponding arena for a given ptr */
#define heap_for_ptr(ptr) \
((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1)))
#define arena_for_chunk(ptr) \
(chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena)
#else /* !USE_ARENAS */
/* There is only one arena, main_arena. */
#if THREAD_STATS
#define arena_get(ar_ptr, sz) do { \
ar_ptr = &main_arena; \
if(!mutex_trylock(&ar_ptr->mutex)) \
++(ar_ptr->stat_lock_direct); \
else { \
(void)mutex_lock(&ar_ptr->mutex); \
++(ar_ptr->stat_lock_wait); \
} \
} while(0)
#else
#define arena_get(ar_ptr, sz) do { \
ar_ptr = &main_arena; \
(void)mutex_lock(&ar_ptr->mutex); \
} while(0)
#endif
#define arena_for_chunk(ptr) (&main_arena)
#endif /* USE_ARENAS */
/**************************************************************************/
#ifndef NO_THREADS
/* atfork support. */
static __malloc_ptr_t (*save_malloc_hook) __MALLOC_P ((size_t __size,
__const __malloc_ptr_t));
# if !defined _LIBC || !defined USE_TLS || (defined SHARED && !USE___THREAD)
static __malloc_ptr_t (*save_memalign_hook) __MALLOC_P ((size_t align,
size_t __size,
__const __malloc_ptr_t));
# endif
static void (*save_free_hook) __MALLOC_P ((__malloc_ptr_t __ptr,
__const __malloc_ptr_t));
static Void_t* save_arena;
/* Magic value for the thread-specific arena pointer when
malloc_atfork() is in use. */
#define ATFORK_ARENA_PTR ((Void_t*)-1)
/* The following hooks are used while the `atfork' handling mechanism
is active. */
static Void_t*
malloc_atfork(size_t sz, const Void_t *caller)
{
Void_t *vptr = NULL;
Void_t *victim;
tsd_getspecific(arena_key, vptr);
if(vptr == ATFORK_ARENA_PTR) {
/* We are the only thread that may allocate at all. */
if(save_malloc_hook != malloc_check) {
return _int_malloc(&main_arena, sz);
} else {
if(top_check()<0)
return 0;
victim = _int_malloc(&main_arena, sz+1);
return mem2mem_check(victim, sz);
}
} else {
/* Suspend the thread until the `atfork' handlers have completed.
By that time, the hooks will have been reset as well, so that
mALLOc() can be used again. */
(void)mutex_lock(&list_lock);
(void)mutex_unlock(&list_lock);
return public_mALLOc(sz);
}
}
static void
free_atfork(Void_t* mem, const Void_t *caller)
{
Void_t *vptr = NULL;
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */
if (mem == 0) /* free(0) has no effect */
return;
p = mem2chunk(mem); /* do not bother to replicate free_check here */
#if HAVE_MMAP
if (chunk_is_mmapped(p)) /* release mmapped memory. */
{
munmap_chunk(p);
return;
}
#endif
ar_ptr = arena_for_chunk(p);
tsd_getspecific(arena_key, vptr);
if(vptr != ATFORK_ARENA_PTR)
(void)mutex_lock(&ar_ptr->mutex);
_int_free(ar_ptr, mem);
if(vptr != ATFORK_ARENA_PTR)
(void)mutex_unlock(&ar_ptr->mutex);
}
/* The following two functions are registered via thread_atfork() to
make sure that the mutexes remain in a consistent state in the
fork()ed version of a thread. Also adapt the malloc and free hooks
temporarily, because the `atfork' handler mechanism may use
malloc/free internally (e.g. in LinuxThreads). */
static void
ptmalloc_lock_all __MALLOC_P((void))
{
mstate ar_ptr;
if(__malloc_initialized < 1)
return;
(void)mutex_lock(&list_lock);
for(ar_ptr = &main_arena;;) {
(void)mutex_lock(&ar_ptr->mutex);
ar_ptr = ar_ptr->next;
if(ar_ptr == &main_arena) break;
}
save_malloc_hook = __malloc_hook;
save_free_hook = __free_hook;
__malloc_hook = malloc_atfork;
__free_hook = free_atfork;
/* Only the current thread may perform malloc/free calls now. */
tsd_getspecific(arena_key, save_arena);
tsd_setspecific(arena_key, ATFORK_ARENA_PTR);
}
static void
ptmalloc_unlock_all __MALLOC_P((void))
{
mstate ar_ptr;
if(__malloc_initialized < 1)
return;
tsd_setspecific(arena_key, save_arena);
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
for(ar_ptr = &main_arena;;) {
(void)mutex_unlock(&ar_ptr->mutex);
ar_ptr = ar_ptr->next;
if(ar_ptr == &main_arena) break;
}
(void)mutex_unlock(&list_lock);
}
#ifdef __linux__
/* In LinuxThreads, unlocking a mutex in the child process after a
fork() is currently unsafe, whereas re-initializing it is safe and
does not leak resources. Therefore, a special atfork handler is
installed for the child. */
static void
ptmalloc_unlock_all2 __MALLOC_P((void))
{
mstate ar_ptr;
if(__malloc_initialized < 1)
return;
#if defined _LIBC || defined MALLOC_HOOKS
tsd_setspecific(arena_key, save_arena);
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
#endif
for(ar_ptr = &main_arena;;) {
(void)mutex_init(&ar_ptr->mutex);
ar_ptr = ar_ptr->next;
if(ar_ptr == &main_arena) break;
}
(void)mutex_init(&list_lock);
}
#else
#define ptmalloc_unlock_all2 ptmalloc_unlock_all
#endif
#endif /* !defined NO_THREADS */
/* Initialization routine. */
#ifdef _LIBC
#include <string.h>
extern char **_environ;
static char *
internal_function
next_env_entry (char ***position)
{
char **current = *position;
char *result = NULL;
while (*current != NULL)
{
if (__builtin_expect ((*current)[0] == 'M', 0)
&& (*current)[1] == 'A'
&& (*current)[2] == 'L'
&& (*current)[3] == 'L'
&& (*current)[4] == 'O'
&& (*current)[5] == 'C'
&& (*current)[6] == '_')
{
result = &(*current)[7];
/* Save current position for next visit. */
*position = ++current;
break;
}
++current;
}
return result;
}
#endif /* _LIBC */
/* Set up basic state so that _int_malloc et al can work. */
static void
ptmalloc_init_minimal __MALLOC_P((void))
{
#if DEFAULT_TOP_PAD != 0
mp_.top_pad = DEFAULT_TOP_PAD;
#endif
mp_.n_mmaps_max = DEFAULT_MMAP_MAX;
mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD;
mp_.pagesize = malloc_getpagesize;
}
#if !(USE_STARTER & 2)
static
#endif
void
ptmalloc_init __MALLOC_P((void))
{
#if __STD_C
const char* s;
#else
char* s;
#endif
int secure = 0;
if(__malloc_initialized >= 0) return;
__malloc_initialized = 0;
if (mp_.pagesize == 0)
ptmalloc_init_minimal();
#ifndef NO_THREADS
# if USE_STARTER & 1
/* With some threads implementations, creating thread-specific data
or initializing a mutex may call malloc() itself. Provide a
simple starter version (realloc() won't work). */
save_malloc_hook = __malloc_hook;
save_memalign_hook = __memalign_hook;
save_free_hook = __free_hook;
__malloc_hook = malloc_starter;
__memalign_hook = memalign_starter;
__free_hook = free_starter;
# ifdef _LIBC
/* Initialize the pthreads interface. */
if (__pthread_initialize != NULL)
__pthread_initialize();
# endif /* !defined _LIBC */
# endif /* USE_STARTER & 1 */
#endif /* !defined NO_THREADS */
mutex_init(&main_arena.mutex);
main_arena.next = &main_arena;
mutex_init(&list_lock);
tsd_key_create(&arena_key, NULL);
tsd_setspecific(arena_key, (Void_t *)&main_arena);
thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2);
#ifndef NO_THREADS
# if USE_STARTER & 1
__malloc_hook = save_malloc_hook;
__memalign_hook = save_memalign_hook;
__free_hook = save_free_hook;
# endif
# if USE_STARTER & 2
__malloc_hook = 0;
__memalign_hook = 0;
__free_hook = 0;
# endif
#endif
#ifdef _LIBC
secure = __libc_enable_secure;
s = NULL;
if (__builtin_expect (_environ != NULL, 1))
{
char **runp = _environ;
char *envline;
while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL,
0))
{
size_t len = strcspn (envline, "=");
if (envline[len] != '=')
/* This is a "MALLOC_" variable at the end of the string
without a '=' character. Ignore it since otherwise we
will access invalid memory below. */
continue;
switch (len)
{
case 6:
if (memcmp (envline, "CHECK_", 6) == 0)
s = &envline[7];
break;
case 8:
if (! secure && memcmp (envline, "TOP_PAD_", 8) == 0)
mALLOPt(M_TOP_PAD, atoi(&envline[9]));
break;
case 9:
if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0)
mALLOPt(M_MMAP_MAX, atoi(&envline[10]));
break;
case 15:
if (! secure)
{
if (memcmp (envline, "TRIM_THRESHOLD_", 15) == 0)
mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16]));
else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0)
mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16]));
}
break;
default:
break;
}
}
}
#else
if (! secure)
{
if((s = getenv("MALLOC_TRIM_THRESHOLD_")))
mALLOPt(M_TRIM_THRESHOLD, atoi(s));
if((s = getenv("MALLOC_TOP_PAD_")))
mALLOPt(M_TOP_PAD, atoi(s));
if((s = getenv("MALLOC_MMAP_THRESHOLD_")))
mALLOPt(M_MMAP_THRESHOLD, atoi(s));
if((s = getenv("MALLOC_MMAP_MAX_")))
mALLOPt(M_MMAP_MAX, atoi(s));
}
s = getenv("MALLOC_CHECK_");
#endif
if(s) {
if(s[0]) mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0'));
__malloc_check_init();
}
#if 0
/* OMPI Change: Don't call the initialize hook; it was us. */
if(__malloc_initialize_hook != NULL)
(*__malloc_initialize_hook)();
#endif
__malloc_initialized = 1;
}
/* There are platforms (e.g. Hurd) with a link-time hook mechanism. */
#ifdef thread_atfork_static
thread_atfork_static(ptmalloc_lock_all, ptmalloc_unlock_all, \
ptmalloc_unlock_all2)
#endif
/* Managing heaps and arenas (for concurrent threads) */
#if USE_ARENAS
#if MALLOC_DEBUG > 1
/* Print the complete contents of a single heap to stderr. */
static void
#if __STD_C
dump_heap(heap_info *heap)
#else
dump_heap(heap) heap_info *heap;
#endif
{
char *ptr;
mchunkptr p;
fprintf(stderr, "Heap %p, size %10lx:\n", heap, (long)heap->size);
ptr = (heap->ar_ptr != (mstate)(heap+1)) ?
(char*)(heap + 1) : (char*)(heap + 1) + sizeof(struct malloc_state);
p = (mchunkptr)(((unsigned long)ptr + MALLOC_ALIGN_MASK) &
~MALLOC_ALIGN_MASK);
for(;;) {
fprintf(stderr, "chunk %p size %10lx", p, (long)p->size);
if(p == top(heap->ar_ptr)) {
fprintf(stderr, " (top)\n");
break;
} else if(p->size == (0|PREV_INUSE)) {
fprintf(stderr, " (fence)\n");
break;
}
fprintf(stderr, "\n");
p = next_chunk(p);
}
}
#endif /* MALLOC_DEBUG > 1 */
/* Create a new heap. size is automatically rounded up to a multiple
of the page size. */
static heap_info *
internal_function
#if __STD_C
new_heap(size_t size, size_t top_pad)
#else
new_heap(size, top_pad) size_t size, top_pad;
#endif
{
size_t page_mask = malloc_getpagesize - 1;
char *p1, *p2;
unsigned long ul;
heap_info *h;
if(size+top_pad < HEAP_MIN_SIZE)
size = HEAP_MIN_SIZE;
else if(size+top_pad <= HEAP_MAX_SIZE)
size += top_pad;
else if(size > HEAP_MAX_SIZE)
return 0;
else
size = HEAP_MAX_SIZE;
size = (size + page_mask) & ~page_mask;
/* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
No swap space needs to be reserved for the following large
mapping (on Linux, this is the case for all non-writable mappings
anyway). */
p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE);
if(p1 != MAP_FAILED) {
p2 = (char *)(((unsigned long)p1 + (HEAP_MAX_SIZE-1)) & ~(HEAP_MAX_SIZE-1));
ul = p2 - p1;
munmap(p1, ul);
munmap(p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
} else {
/* Try to take the chance that an allocation of only HEAP_MAX_SIZE
is already aligned. */
p2 = (char *)MMAP(0, HEAP_MAX_SIZE, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE);
if(p2 == MAP_FAILED)
return 0;
if((unsigned long)p2 & (HEAP_MAX_SIZE-1)) {
munmap(p2, HEAP_MAX_SIZE);
return 0;