Merge pull request #1495 from hjelmn/new_hooks
Add new patcher memory hooks
This commit is contained in:
commit
1e6b4f2f55
@ -283,7 +283,8 @@ else
|
||||
OPAL_ENABLE_DLOPEN_SUPPORT=1
|
||||
AC_MSG_RESULT([yes])
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT,
|
||||
[Whether we want to enable dlopen support])
|
||||
|
||||
#
|
||||
# Heterogeneous support
|
||||
|
@ -1,4 +1,3 @@
|
||||
enable_dlopen=no
|
||||
enable_mem_profile=no
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=no
|
||||
@ -33,8 +32,5 @@ enable_mca_direct=pml-ob1
|
||||
# enable development headers
|
||||
with_devel_headers=yes
|
||||
|
||||
# enable ptmalloc (enables lazy deregistration)
|
||||
with_memory_manager=linux
|
||||
|
||||
# disable valgrind
|
||||
with_valgrind=no
|
||||
|
@ -1,5 +1,3 @@
|
||||
enable_dlopen=no
|
||||
|
||||
enable_mem_profile=no
|
||||
enable_binaries=yes
|
||||
|
||||
@ -40,8 +38,5 @@ enable_mca_direct=pml-ob1
|
||||
# enable development headers
|
||||
with_devel_headers=yes
|
||||
|
||||
# enable ptmalloc (enables lazy deregistration)
|
||||
with_memory_manager=linux
|
||||
|
||||
# disable valgrind
|
||||
with_valgrind=no
|
||||
|
@ -1,7 +1,6 @@
|
||||
# (c) 2013 Los Alamos National Security, LLC. All rights reserved.
|
||||
# Open MPI common configuration for TOSS/TOSS2 v1.7.x/1.8.x
|
||||
|
||||
enable_dlopen=no
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=no
|
||||
enable_shared=yes
|
||||
|
@ -98,15 +98,13 @@
|
||||
#endif
|
||||
#include "ompi/runtime/ompi_cr.h"
|
||||
|
||||
#if defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2
|
||||
#include "opal/mca/memory/linux/memory_linux.h"
|
||||
#include "opal/mca/memory/base/base.h"
|
||||
/* So this sucks, but with OPAL in its own library that is brought in
|
||||
implicity from libmpi, there are times when the malloc initialize
|
||||
hook in the memory component doesn't work. So we have to do it
|
||||
from here, since any MPI code is going to call MPI_Init... */
|
||||
OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) =
|
||||
opal_memory_linux_malloc_init_hook;
|
||||
#endif /* defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 */
|
||||
opal_memory_base_malloc_init_hook;
|
||||
|
||||
/* This is required for the boundaries of the hash tables used to store
|
||||
* the F90 types returned by the MPI_Type_create_f90_XXX functions.
|
||||
|
@ -4,3 +4,4 @@ opal_show_help_yyleng
|
||||
opal_show_help_yytext
|
||||
opal_util_keyval_yyleng
|
||||
opal_util_keyval_yytext
|
||||
__curbrk
|
||||
|
@ -2562,39 +2562,6 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
/* If we are using ptmalloc2 and there are no posix threads
|
||||
available, this will cause memory corruption. Refuse to run.
|
||||
Right now, ptmalloc2 is the only memory manager that we have on
|
||||
OS's that support OpenFabrics that provide both FREE and MUNMAP
|
||||
support, so the following test is [currently] good enough... */
|
||||
value = opal_mem_hooks_support_level();
|
||||
|
||||
/* If we have a memory manager available, and
|
||||
opal_leave_pinned==-1, then unless the user explicitly set
|
||||
opal_leave_pinned_pipeline==0, then set opal_leave_pinned to 1.
|
||||
|
||||
We have a memory manager if we have both FREE and MUNMAP
|
||||
support */
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
if (0 == opal_leave_pinned_pipeline &&
|
||||
-1 == opal_leave_pinned) {
|
||||
opal_leave_pinned = 1;
|
||||
}
|
||||
} else {
|
||||
opal_leave_pinned = 0;
|
||||
opal_leave_pinned_pipeline = 0;
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (mca_btl_openib_component.cuda_want_gdr && (0 == opal_leave_pinned)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"CUDA_gdr_and_nopinned", true,
|
||||
opal_process_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
index = mca_base_var_find("ompi", "btl", "openib", "max_inline_data");
|
||||
if (index >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_var_get_value(index, NULL, &source, NULL)) {
|
||||
@ -2931,6 +2898,22 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
mca_btl_openib_component.if_exclude_list = NULL;
|
||||
}
|
||||
|
||||
/* If we are using ptmalloc2 and there are no posix threads
|
||||
available, this will cause memory corruption. Refuse to run.
|
||||
Right now, ptmalloc2 is the only memory manager that we have on
|
||||
OS's that support OpenFabrics that provide both FREE and MUNMAP
|
||||
support, so the following test is [currently] good enough... */
|
||||
value = opal_mem_hooks_support_level();
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if (mca_btl_openib_component.cuda_want_gdr && (0 == opal_leave_pinned)) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"CUDA_gdr_and_nopinned", true,
|
||||
opal_process_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
mca_btl_openib_component.memory_registration_verbose = opal_output_open(NULL);
|
||||
opal_output_set_verbosity (mca_btl_openib_component.memory_registration_verbose,
|
||||
mca_btl_openib_component.memory_registration_verbose_level);
|
||||
|
@ -331,26 +331,6 @@ btl_ugni_component_close(void)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_autoset_leave_pinned (void) {
|
||||
if (MCA_BTL_UGNI_RCACHE_UDREG != mca_btl_ugni_component.rcache_type) {
|
||||
int value = opal_mem_hooks_support_level();
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
/* Set leave pinned to 1 if leave pinned pipeline is not set */
|
||||
if (-1 == opal_leave_pinned) {
|
||||
opal_leave_pinned = !opal_leave_pinned_pipeline;
|
||||
}
|
||||
} else {
|
||||
opal_leave_pinned = 0;
|
||||
opal_leave_pinned_pipeline = 0;
|
||||
}
|
||||
} else if (-1 == opal_leave_pinned) {
|
||||
/* if udreg is in use we can set leave pinned without checking for the
|
||||
* memory hooks. */
|
||||
opal_leave_pinned = !opal_leave_pinned_pipeline;
|
||||
}
|
||||
}
|
||||
|
||||
static mca_btl_base_module_t **
|
||||
mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
bool enable_progress_threads,
|
||||
@ -409,8 +389,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
}
|
||||
}
|
||||
|
||||
mca_btl_ugni_autoset_leave_pinned ();
|
||||
|
||||
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
|
||||
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
|
@ -32,5 +32,7 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
OPAL_DECLSPEC extern mca_base_framework_t opal_memory_base_framework;
|
||||
|
||||
OPAL_DECLSPEC void opal_memory_base_malloc_init_hook (void);
|
||||
|
||||
END_C_DECLS
|
||||
#endif /* OPAL_BASE_MEMORY_H */
|
||||
|
@ -13,6 +13,8 @@
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -43,20 +45,22 @@ static int empty_process(void)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int empty_query (int *priority)
|
||||
{
|
||||
*priority = 0;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Local variables
|
||||
*/
|
||||
static opal_memory_base_component_2_0_0_t empty_component = {
|
||||
/* Don't care about the version info */
|
||||
{ 0, },
|
||||
/* Don't care about the data */
|
||||
{ 0, },
|
||||
/* Empty / safe functions to call if no memory componet is selected */
|
||||
empty_process,
|
||||
opal_memory_base_component_register_empty,
|
||||
opal_memory_base_component_deregister_empty,
|
||||
opal_memory_base_component_set_alignment_empty,
|
||||
.memoryc_query = empty_query,
|
||||
.memoryc_process = empty_process,
|
||||
.memoryc_register = opal_memory_base_component_register_empty,
|
||||
.memoryc_deregister = opal_memory_base_component_deregister_empty,
|
||||
.memoryc_set_alignment = opal_memory_base_component_set_alignment_empty,
|
||||
};
|
||||
|
||||
|
||||
@ -66,6 +70,12 @@ static opal_memory_base_component_2_0_0_t empty_component = {
|
||||
opal_memory_base_component_2_0_0_t *opal_memory = &empty_component;
|
||||
|
||||
|
||||
void opal_memory_base_malloc_init_hook (void)
|
||||
{
|
||||
if (opal_memory->memoryc_init_hook) {
|
||||
opal_memory->memoryc_init_hook ();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
@ -73,23 +83,36 @@ opal_memory_base_component_2_0_0_t *opal_memory = &empty_component;
|
||||
*/
|
||||
static int opal_memory_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
mca_base_component_list_item_t *item, *next;
|
||||
opal_memory_base_component_2_0_0_t *tmp;
|
||||
int priority, highest_priority = 0;
|
||||
int ret;
|
||||
|
||||
/* Open up all available components */
|
||||
/* can only be zero or one */
|
||||
OPAL_LIST_FOREACH(item, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
tmp = (opal_memory_base_component_2_0_0_t *) item->cli_component;
|
||||
ret = tmp->memoryc_query (&priority);
|
||||
if (OPAL_SUCCESS != ret || priority < highest_priority) {
|
||||
continue;
|
||||
}
|
||||
|
||||
highest_priority = priority;
|
||||
opal_memory = tmp;
|
||||
}
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(item, next, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
if ((void *) opal_memory != (void *) item->cli_component) {
|
||||
mca_base_component_unload (item->cli_component, opal_memory_base_framework.framework_output);
|
||||
opal_list_remove_item (&opal_memory_base_framework.framework_components, &item->super);
|
||||
}
|
||||
}
|
||||
|
||||
/* open remaining component */
|
||||
ret = mca_base_framework_components_open (&opal_memory_base_framework, flags);
|
||||
if (ret != OPAL_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* can only be zero or one */
|
||||
if (opal_list_get_size(&opal_memory_base_framework.framework_components) == 1) {
|
||||
mca_base_component_list_item_t *item;
|
||||
item = (mca_base_component_list_item_t*)
|
||||
opal_list_get_first(&opal_memory_base_framework.framework_components);
|
||||
opal_memory = (opal_memory_base_component_2_0_0_t*)
|
||||
item->cli_component;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -1,19 +0,0 @@
|
||||
Copyright (c) 2001-2004 Wolfram Gloger
|
||||
|
||||
Permission to use, copy, modify, distribute, and sell this software
|
||||
and its documentation for any purpose is hereby granted without fee,
|
||||
provided that (i) the above copyright notices and this permission
|
||||
notice appear in all copies of the software and related documentation,
|
||||
and (ii) the name of Wolfram Gloger may not be used in any advertising
|
||||
or publicity relating to the software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
|
||||
WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
|
||||
INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
|
||||
DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
|
||||
OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THIS SOFTWARE.
|
@ -1,181 +0,0 @@
|
||||
2004-11-05 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/hooks.c (malloc_starter, memalign_starter): Call
|
||||
ptmalloc_init_minimal().
|
||||
|
||||
2004-11-04 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c (USE_STARTER): New macro.
|
||||
* malloc/hooks.c: Use USE_STARTER.
|
||||
* malloc/arena.c: Use USE_STARTER.
|
||||
|
||||
2004-08-13 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* malloc/malloc.c: Use strong_alias instead of weak_alias wherever
|
||||
possible.
|
||||
|
||||
2002-12-06 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* malloc/arena.c (ptmalloc_init_minimal): New function, broken out
|
||||
of ptmalloc_init.
|
||||
|
||||
2002-08-23 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* malloc/hooks.c (__malloc_initialize_hook, __free_hook,
|
||||
__malloc_hook, __realloc_hook, __memalign_hook,
|
||||
__after_morecore_hook): Variable definitions moved to ...
|
||||
* malloc/malloc.c: ... here, so as to be before all references.
|
||||
|
||||
2004-10-19 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/hooks.c (mem2chunk_check, top_check): Handle
|
||||
non-contiguous arena. Reported by Michael Dalton
|
||||
<mwdalton@stanford.edu> [BZ #457]. Add further checks for top
|
||||
chunk.
|
||||
|
||||
2004-08-08 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* include/malloc.h (mstate): Move type declaration from here...
|
||||
* malloc/malloc.h: ...to here.
|
||||
(struct malloc_arena_info, struct malloc_global_info): New types.
|
||||
(_int_get_arena, _int_get_arena_info, _int_get_global_info): New
|
||||
functions.
|
||||
* malloc/malloc.c (mSTATS, public_mSTATs, mALLINFo): Remove.
|
||||
(_int_get_arena_info, _int_get_global_info): New functions.
|
||||
* malloc/arena.c (_int_get_arena): New function.
|
||||
* malloc/malloc-stats.c: New file.
|
||||
* malloc/tst-mstats.c: New file.
|
||||
* malloc/Makefile (tests): Add tst-mstats.
|
||||
(distribute): Remove no-longer existing thread-m.h.
|
||||
(dist-routines): Add malloc-stats.
|
||||
* malloc/Versions: Add _int_get_arena, _int_get_arena_info,
|
||||
_int_get_global_info.
|
||||
|
||||
2004-07-25 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* sysdeps/generic/thread-st.h: New file.
|
||||
* sysdeps/pthread/thread-st.h: New file.
|
||||
* sysdeps/sproc/thread-st.h: New file.
|
||||
* sysdeps/solaris/thread-st.h: New file.
|
||||
* thread-st.h: Removed.
|
||||
|
||||
2004-03-18 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* malloc/malloc.c (__posix_memalign): Correct alignment check.
|
||||
Reported by Don Heller <dheller@cse.psu.edu>.
|
||||
|
||||
2003-12-17 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* malloc/malloc.c (__posix_memalign): If __memalign_hook != NULL,
|
||||
call it directly instead of memalign_internal.
|
||||
|
||||
2003-09-27 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c: Include <malloc-machine.h> earlier instead of
|
||||
"thread-m.h", so that default parameters can be overridden in a
|
||||
system-specific malloc-machine.h. Remove extra ; from extern "C"
|
||||
closing brace.
|
||||
* sysdeps/generic/malloc-machine.h: New file.
|
||||
* malloc/thread-m.h: Removed.
|
||||
|
||||
2003-09-08 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c (sYSMALLOc): Move foreign sbrk accounting into
|
||||
contiguous case. Bug report from Prem Gopalan
|
||||
<prem@mazunetworks.com>.
|
||||
|
||||
2003-08-18 Art Haas <ahaas@airmail.net>
|
||||
|
||||
* malloc/malloc.h: Remove unneeded ';' where closing the C++
|
||||
extern block.
|
||||
|
||||
2003-06-18 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* malloc/malloc.c (public_mALLINFo): Initialize malloc if it
|
||||
hasn't happened yet.
|
||||
|
||||
2003-05-28 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* malloc/malloc.h [! __GNUC__] (__const): Define if undefined.
|
||||
|
||||
2003-05-04 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* malloc/arena.c (arena_get2): Add atomic_write_barrier.
|
||||
* malloc/thread-m.h: Include <atomic.h>.
|
||||
(atomic_full_barrier): Provide default.
|
||||
(atomic_read_barrier): Likewise.
|
||||
(atomic_write_barrier): Likewise.
|
||||
|
||||
2003-05-01 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* malloc/malloc.c (mSTATs): Call ptmalloc_init if necessary.
|
||||
|
||||
2003-01-27 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/hooks.c (mem2chunk_check): Check alignment of mem
|
||||
pointer, not of the computed chunk. Bug report from Carlos
|
||||
O'Donell <carlos@baldric.uwo.ca>.
|
||||
|
||||
2002-12-27 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* malloc/arena.c (ptmalloc_init): Don't call next_env_entry if
|
||||
_environ is NULL.
|
||||
|
||||
2002-12-17 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* malloc/malloc.c (mALLOPt): Make sure malloc is initialized.
|
||||
|
||||
2002-12-06 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* malloc/hooks.c [_LIBC && (USE___THREAD || (USE_TLS && !SHARED))]
|
||||
(malloc_starter, memalign_starter, free_starter): Don't define these.
|
||||
|
||||
* malloc/hooks.c (memalign_starter): New function.
|
||||
* malloc/malloc.c: Declare it.
|
||||
* malloc/arena.c (save_memalign_hook): New variable.
|
||||
(ptmalloc_init): Set __memalign_hook to memalign_starter.
|
||||
|
||||
2002-11-18 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/arena.c
|
||||
(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2): Do
|
||||
nothing if not initialized. Bug report from Marcus Brinkmann
|
||||
<Marcus.Brinkmann@ruhr-uni-bochum.de>.
|
||||
|
||||
2002-10-07 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c (sYSMALLOc): Only check for breakage due
|
||||
to foreign sbrk()'s if arena is contiguous. Bug report from
|
||||
Bruno Haible <bruno@clisp.org>.
|
||||
|
||||
2002-07-11 Wolfram Gloger <wmglo@dent.med.uni-muenchen.de>
|
||||
|
||||
* malloc/hooks.c: typo fix in NO_THREADS case, realloc_check
|
||||
fix in HAVE_MREMAP case.
|
||||
|
||||
2002-06-11 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c: Fix error path when new_heap() returns NULL.
|
||||
Reported by Michael Meissner <meissner@redhat.com>.
|
||||
|
||||
2002-03-29 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c: Add short description and prototypes for
|
||||
malloc_get_state, malloc_set_state and posix_memalign, for
|
||||
consistency and to avoid warnings with -Wstrict-prototypes.
|
||||
Reported by Andreas Jaeger <aj@suse.de>.
|
||||
|
||||
2002-03-13 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c (sYSMALLOc): Don't change brk if mmap
|
||||
failed.
|
||||
|
||||
2002-01-18 Wolfram Gloger <wg@malloc.de>
|
||||
|
||||
* malloc/malloc.c: Rewrite, adapted from Doug Lea's malloc-2.7.0.c.
|
||||
* malloc/malloc.h: Likewise.
|
||||
* malloc/arena.c: New file.
|
||||
* malloc/hooks.c: New file.
|
||||
* malloc/tst-mallocstate.c: New file.
|
||||
* malloc/Makefile: Add new testcase tst-mallocstate.
|
||||
Add arena.c and hooks.c to distribute. Fix commented CPPFLAGS.
|
@ -1,92 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = -DMALLOC_DEBUG=0
|
||||
|
||||
AM_CPPFLAGS += \
|
||||
-D_GNU_SOURCE=1 \
|
||||
-DMALLOC_HOOKS=1 \
|
||||
-I$(srcdir)/sysdeps/pthread
|
||||
# this must come *after* the threads -Is
|
||||
AM_CPPFLAGS += -I$(srcdir)/sysdeps/generic
|
||||
|
||||
# ptmalloc requires us to include the copyright notice in the
|
||||
# software. So install it in the same place that we install ROMIO's
|
||||
# copyright notices.
|
||||
|
||||
docdir = $(opaldatadir)/doc
|
||||
doc_DATA = COPYRIGHT-ptmalloc2.txt
|
||||
|
||||
# Help file
|
||||
dist_opaldata_DATA = help-opal-memory-linux.txt
|
||||
|
||||
# This component is only ever built statically (i.e., slurped into
|
||||
# libopen-pal) -- it is never built as a DSO.
|
||||
noinst_LTLIBRARIES = libmca_memory_linux.la
|
||||
libmca_memory_linux_la_SOURCES = \
|
||||
memory_linux.h \
|
||||
memory_linux_component.c
|
||||
libmca_memory_linux_la_LDFLAGS = \
|
||||
-module -avoid-version $(memory_linux_LDFLAGS)
|
||||
libmca_memory_linux_la_LIBADD = $(memory_linux_LIBS)
|
||||
|
||||
# Do we have ptmalloc2 support?
|
||||
if MEMORY_LINUX_PTMALLOC2
|
||||
libmca_memory_linux_la_SOURCES += \
|
||||
memory_linux_ptmalloc2.c \
|
||||
memory_linux_munmap.c \
|
||||
rename.h \
|
||||
malloc.c \
|
||||
malloc-stats.c \
|
||||
malloc.h
|
||||
endif
|
||||
|
||||
# Do we have ummunotify support?
|
||||
if MEMORY_LINUX_UMMUNOTIFY
|
||||
libmca_memory_linux_la_SOURCES += memory_linux_ummunotify.c public.h
|
||||
endif
|
||||
|
||||
# these are included directly and shouldn't be built solo
|
||||
EXTRA_libmca_memory_linux_la_SOURCES = \
|
||||
arena.c \
|
||||
hooks.c
|
||||
|
||||
EXTRA_DIST = \
|
||||
README-open-mpi.txt \
|
||||
README-ptmalloc2.txt \
|
||||
ChangeLog-ptmalloc2.txt \
|
||||
COPYRIGHT-ptmalloc2.txt \
|
||||
lran2.h \
|
||||
t-test.h \
|
||||
t-test1.c \
|
||||
t-test2.c \
|
||||
tst-mallocstate.c \
|
||||
tst-mstats.c \
|
||||
sysdeps/sproc/malloc-machine.h \
|
||||
sysdeps/sproc/thread-st.h \
|
||||
sysdeps/pthread/malloc-machine.h \
|
||||
sysdeps/pthread/thread-st.h \
|
||||
sysdeps/solaris/malloc-machine.h \
|
||||
sysdeps/solaris/thread-st.h \
|
||||
sysdeps/generic/malloc-machine.h \
|
||||
sysdeps/generic/thread-st.h \
|
||||
sysdeps/generic/atomic.h \
|
||||
$(doc_DATA)
|
@ -1,161 +0,0 @@
|
||||
30 March 2009
|
||||
|
||||
This file documents Open MPI's usage of ptmalloc2. This is perhaps
|
||||
our 7,208,499th iteration of ptmalloc2 support, so let's document it
|
||||
here so that some future developer might spend *slightly* less time
|
||||
understanding what the heck is going on.
|
||||
|
||||
See glibc documentation about malloc hooks before continuing. This is
|
||||
pretty much required reading before reading the rest of this file /
|
||||
having a hope of understanding what's going on here:
|
||||
|
||||
http://www.gnu.org/software/libc/manual/html_mono/libc.html#Hooks-for-Malloc
|
||||
|
||||
The overall goal is that we're using the Linux glibc hooks to wholly
|
||||
replace the underlying allocator. We *used* to use horrid linker
|
||||
tricks to interpose OMPI's ptmalloc2 symbols with the glibc ones --
|
||||
meaning that user apps would call our symbols and not the glibc ones.
|
||||
But that scheme is fraught with problems, not the least of which is
|
||||
that *all* MPI applications will be forced to use our overridden
|
||||
allocator (not just the ones that need it, such as the ones running on
|
||||
OpenFabrics-based networks). Instead, what we do here is, frankly,
|
||||
quite similar to what is done in MX: we use the 4 glibc hooks to
|
||||
assert our own malloc, realloc, free, and memalign functions. This
|
||||
allows the decision as to whether to use this internal ptmalloc2
|
||||
allocate to be a run-time decision. This is quite important; using
|
||||
this internal allocator has both benefits (allowing using
|
||||
mpi_leave_pinned=1 behavior) and drawbacks (breaking some debuggers,
|
||||
being unnecessary for non-OpenFabrics-based networks, etc.).
|
||||
|
||||
Here's how it works...
|
||||
|
||||
This component *must* be linked statically as part of libopen-pal; it
|
||||
*cannot* be a DSO. Specifically, this library must be present during
|
||||
pre-main() initialization phases so that its __malloc_initialize_hook
|
||||
can be found and executed. Loading it as a DSO during MPI_INIT is far
|
||||
too late. In configure.m4, we define the M4 macro
|
||||
MCA_memory_ptmalloc2_COMPILE_MODE to always compile this component in
|
||||
static mode. Yay flexible build system.
|
||||
|
||||
This component provides an munmap() function that will intercept calls
|
||||
to munmap() and do the Right Thing. That is fairly straightforward to
|
||||
do. Intercepting the malloc/free/etc. allocator is much more
|
||||
complicated.
|
||||
|
||||
All the ptmalloc2 public symbols in this component have been name
|
||||
shifted via the rename.h file. Hence, what used to be "malloc" is now
|
||||
opal_memory_ptmalloc2_malloc. Since all the public symbols are
|
||||
name-shifted, we can safely link this component in all MPI
|
||||
applications. Specifically: just because this ptmalloc2 allocator is
|
||||
present in all OMPI executables and user-level applications, it won't
|
||||
necessarily be used -- it's a separate/run-time decision as to whether
|
||||
it will be used.
|
||||
|
||||
We set the __malloc_initialize_hook variable to point to
|
||||
opal_memory_ptmalloc2_malloc_init_hook (in hooks.c). This function is
|
||||
called by the underlying glibc allocator before any allocations occur
|
||||
and before the memory allocation subsystem is setup. As such, this
|
||||
function is *extremely* restricted in what it can do. It cannot call
|
||||
any form of malloc, for example (which seems fairly obvious, but it's
|
||||
worth mentioning :-) ). This function is one of the determining
|
||||
steps as to whether we'll use the internal ptmalloc2 allocator or
|
||||
not. Several checks are performed:
|
||||
|
||||
- Was either the MCA params mpi_leave_pinned or
|
||||
mpi_leave_pinned_pipeline set?
|
||||
- Is a driver found to be active indicating that an OS-bypass network
|
||||
is in effect (OpenFabrics, MX, Open-MX, ...etc.)
|
||||
- Was an environment variable set indicating that we want to disable
|
||||
this component?
|
||||
|
||||
If the $OMPI_MCA_memory_ptmalloc2_disable or the $FAKEROOTKEY env
|
||||
variables are set, we don't enable the memory hooks.
|
||||
|
||||
We then use the following matrix to determine whether to enable the
|
||||
memory hooks or not (explanation of the matrix is below):
|
||||
|
||||
lp / lpp yes no runtime not found
|
||||
yes yes yes yes yes
|
||||
no yes no no no
|
||||
runtime yes no runtime runtime
|
||||
not found yes no runtime runtime
|
||||
|
||||
lp = leave_pinned (the rows), lpp = leave_pinned_pipeline (the columns)
|
||||
yes = found that variable to be set to "yes" (i.e., 1)
|
||||
no = found that variable to be set to "no" (i.e., 0)
|
||||
runtime = found that variable to be set to "determine at runtime" (i.e., -1)
|
||||
not found = that variable was not set at all
|
||||
|
||||
Hence, if we end up on a "yes" block in the matrix, we enable the
|
||||
hooks. If we end up in a "no" block in the matrix, we disable the
|
||||
hooks. If we end up in a "runtime" block in the matrix, then we
|
||||
enable the hooks *if* we can find indications that an OS bypass
|
||||
network is present and available for use (e.g., OpenFabrics, MX,
|
||||
Open-MX, ...etc.).
|
||||
|
||||
To be clear: sometime during process startup, this function will
|
||||
definitely be called. It will either set the 4 hook functions to
|
||||
point to our name-shifted ptmalloc2 functions, or it won't. If the 4
|
||||
hook functions are set, then the underlying glibc allocator will
|
||||
always call our 4 functions in all the relevant places instead of
|
||||
calling its own functions. Specifically: the process is calling the
|
||||
underlying glibc allocator, but that underlying glibc allocator will
|
||||
make function pointer callbacks to our name-shifted ptmalloc2
|
||||
functions to actually do the work.
|
||||
|
||||
Note that because we know our ptmalloc will not be providing all 5
|
||||
hook variables (because we want to use the underlying glibc hook
|
||||
variables), they are #if 0'ed out in our malloc.c. This has the
|
||||
direct consequence that the *_hook_ini() in hooks.c are never used.
|
||||
So to avoid compiler/linker warnings, I #if 0'ed those out as well.
|
||||
|
||||
All the public functions in malloc.c that call hook functions were
|
||||
modified to #if 0 the hook function invocations. After all, that's
|
||||
something that we want the *underlying* glibc allocator to do -- but
|
||||
we are putting these functions as the hooks, so we don't want to
|
||||
invoke ourselves in an infinite loop!
|
||||
|
||||
The next thing that happens in the startup sequence is that the
|
||||
ptmalloc2 memory component's "open" function is called during
|
||||
MPI_INIT. But we need to test to see if the glibc memory hooks have
|
||||
been overridden before MPI_INIT was invoked. If so, we need to signal
|
||||
that our allocator support may not be complete.
|
||||
|
||||
Patrick Geoffray/MX suggests a simple test: malloc() 4MB and then free
|
||||
it. Watch to see if our name-shifted ptmalloc2 free() function was
|
||||
invoked. If it was, then all of our hooks are probably in place and
|
||||
we can proceed. If not, then set flags indicating that this memory
|
||||
allocator only supports MUNMAP (not FREE/CHUNK).
|
||||
|
||||
We actually perform this test for malloc, realloc, and memalign. If
|
||||
they all pass, then we say that the memory allocator supports
|
||||
everything. If any of them fail, then we say that the memory
|
||||
allocator does not support FREE/CHUNK.
|
||||
|
||||
NOTE: we *used* to simply set the FREE/CHUNK support flags during our
|
||||
ptmalloc2's internal ptmalloc_init() function. This is not a good
|
||||
idea becaus even after our ptmalloc_init() function has been invoked,
|
||||
someone may come in an override our memory hooks. Doing tests during
|
||||
the ptmalloc2 memory component's open function seems to be the safest
|
||||
way to test whether we *actually* support FREE/CHUNK (this is what MX
|
||||
does, too).
|
||||
|
||||
As stated above, we always intercept munmap() -- this is acceptable in
|
||||
all environments. But we test that, too, just to be sure that the
|
||||
munmap intercept is working. If we verify that it is working
|
||||
properly, then we set that we have MUNMAP support.
|
||||
|
||||
Much later in the init sequence during MPI_INIT, components indicate
|
||||
whether they want to use mpi_leave_pinned[_pipeline] support or not.
|
||||
For example, the openib BTL queries the opal_mem_hooks_support_level()
|
||||
function to see if FREE and MUNMAP are supported. If they are, then
|
||||
the openib BTL sets mpi_leave_pinned = 1.
|
||||
|
||||
Finally, the mpool base does a final check. If
|
||||
mpi_leave_pinned[_pipeline] is set to 1 and/or use_mem_hooks is set,
|
||||
if FREE/MUNMAP are not set in the supported flags, then a warning is
|
||||
printed. Otherwise, life continues (assumedly using
|
||||
mpi_leave_pinned[_pipeline] support).
|
||||
|
||||
Simple, right?
|
||||
|
@ -1,192 +0,0 @@
|
||||
ptmalloc2 - a multi-thread malloc implementation
|
||||
================================================
|
||||
|
||||
Wolfram Gloger (wg@malloc.de)
|
||||
|
||||
Nov 2004
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This package is a modified version of Doug Lea's malloc-2.7.1pre
|
||||
implementation (available seperately from ftp://g.oswego.edu/pub/misc)
|
||||
that I adapted for multiple threads, while trying to avoid lock
|
||||
contention as much as possible. Many thanks should go to Doug Lea
|
||||
(dl@cs.oswego.edu) for the great original malloc implementation.
|
||||
|
||||
As part of the GNU C library, the source files are available under the
|
||||
GNU Library General Public License (see the comments in the files).
|
||||
But as part of this stand-alone package, the code is also available
|
||||
under the (probably less restrictive) conditions described in the file
|
||||
'COPYRIGHT'. In any case, there is no warranty whatsoever for this
|
||||
package.
|
||||
|
||||
The current distribution should be available from:
|
||||
|
||||
http://www.malloc.de/malloc/ptmalloc2.tar.gz
|
||||
|
||||
|
||||
Compilation
|
||||
===========
|
||||
|
||||
It should be possible to build ptmalloc2 on any UN*X-like system that
|
||||
implements the sbrk(), mmap(), munmap() and mprotect() calls. If
|
||||
mmap() is not available, it is only possible to produce a
|
||||
non-threadsafe implementation. Since there are now several source
|
||||
files, a library (libmalloc.a) is generated. See the Makefile for
|
||||
examples of the compile-time options.
|
||||
|
||||
Note that support for non-ANSI compilers is no longer a significant
|
||||
goal.
|
||||
|
||||
Several example targets are provided in the Makefile:
|
||||
|
||||
o Posix threads (pthreads), compile with "make posix"
|
||||
|
||||
o Posix threads with explicit initialization, compile with
|
||||
"make posix-explicit" (known to be required on HPUX)
|
||||
|
||||
o Posix threads without "tsd data hack" (see below), compile with
|
||||
"make posix-with-tsd"
|
||||
|
||||
o Solaris threads, compile with "make solaris"
|
||||
|
||||
o SGI sproc() threads, compile with "make sproc"
|
||||
|
||||
o no threads, compile with "make nothreads"
|
||||
|
||||
For Linux:
|
||||
|
||||
o make "linux-pthread" (almost the same as "make posix")
|
||||
|
||||
Note that some compilers need special flags for multi-threaded code,
|
||||
e.g. with Solaris cc with Posix threads, one should use:
|
||||
|
||||
% make posix SYS_FLAGS='-mt'
|
||||
|
||||
Some additional targets, ending in `-libc', are also provided in the
|
||||
Makefile, to compare performance of the test programs to the case when
|
||||
linking with the standard malloc implementation in libc.
|
||||
|
||||
A potential problem remains: If any of the system-specific functions
|
||||
for getting/setting thread-specific data or for locking a mutex call
|
||||
one of the malloc-related functions internally, the implementation
|
||||
cannot work at all due to infinite recursion. One example seems to be
|
||||
Solaris 2.4. I would like to hear if this problem occurs on other
|
||||
systems, and whether similar workarounds could be applied.
|
||||
|
||||
For Posix threads, too, an optional hack like that has been integrated
|
||||
(activated when defining USE_TSD_DATA_HACK) which depends on
|
||||
`pthread_t' being convertible to an integral type (which is of course
|
||||
not generally guaranteed). USE_TSD_DATA_HACK is now the default
|
||||
because I haven't yet found a non-glibc pthreads system where this
|
||||
hack is _not_ needed.
|
||||
|
||||
*NEW* and _important_: In (currently) one place in the ptmalloc2
|
||||
source, a write memory barrier is needed, named
|
||||
atomic_write_barrier(). This macro needs to be defined at the end of
|
||||
malloc-machine.h. For gcc, a fallback in the form of a full memory
|
||||
barrier is already defined, but you may need to add another definition
|
||||
if you don't use gcc.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Just link libmalloc.a into your application.
|
||||
|
||||
Some wicked systems (e.g. HPUX apparently) won't let malloc call _any_
|
||||
thread-related functions before main(). On these systems,
|
||||
USE_STARTER=2 must be defined during compilation (see "make
|
||||
posix-explicit" above) and the global initialization function
|
||||
ptmalloc_init() must be called explitly, preferably at the start of
|
||||
main().
|
||||
|
||||
Otherwise, when using ptmalloc2, no special precautions are necessary.
|
||||
|
||||
Link order is important
|
||||
=======================
|
||||
|
||||
On some systems, when overriding malloc and linking against shared
|
||||
libraries, the link order becomes very important. E.g., when linking
|
||||
C++ programs on Solaris, don't rely on libC being included by default,
|
||||
but instead put `-lthread' behind `-lC' on the command line:
|
||||
|
||||
CC ... libmalloc.a -lC -lthread
|
||||
|
||||
This is because there are global constructors in libC that need
|
||||
malloc/ptmalloc, which in turn needs to have the thread library to be
|
||||
already initialized.
|
||||
|
||||
Debugging hooks
|
||||
===============
|
||||
|
||||
All calls to malloc(), realloc(), free() and memalign() are routed
|
||||
through the global function pointers __malloc_hook, __realloc_hook,
|
||||
__free_hook and __memalign_hook if they are not NULL (see the malloc.h
|
||||
header file for declarations of these pointers). Therefore the malloc
|
||||
implementation can be changed at runtime, if care is taken not to call
|
||||
free() or realloc() on pointers obtained with a different
|
||||
implementation than the one currently in effect. (The easiest way to
|
||||
guarantee this is to set up the hooks before any malloc call, e.g.
|
||||
with a function pointed to by the global variable
|
||||
__malloc_initialize_hook).
|
||||
|
||||
A useful application of the hooks is built-in into ptmalloc2: The
|
||||
implementation is usually very unforgiving with respect to misuse,
|
||||
such as free()ing a pointer twice or free()ing a pointer not obtained
|
||||
with malloc() (these will typically crash the application
|
||||
immediately). To debug in such situations, you can set the
|
||||
environment variable `MALLOC_CHECK_' (note the trailing underscore).
|
||||
Performance will suffer somewhat, but you will get more controlled
|
||||
behaviour in the case of misuse. If MALLOC_CHECK_=0, wrong free()s
|
||||
will be silently ignored, if MALLOC_CHECK_=1, diagnostics will be
|
||||
printed on stderr, and if MALLOC_CHECK_=2, abort() will be called on
|
||||
any error.
|
||||
|
||||
You can now also tune other malloc parameters (normally adjused via
|
||||
mallopt() calls from the application) with environment variables:
|
||||
|
||||
MALLOC_TRIM_THRESHOLD_ for deciding to shrink the heap (in bytes)
|
||||
|
||||
MALLOC_TOP_PAD_ how much extra memory to allocate on
|
||||
each system call (in bytes)
|
||||
|
||||
MALLOC_MMAP_THRESHOLD_ min. size for chunks allocated via
|
||||
mmap() (in bytes)
|
||||
|
||||
MALLOC_MMAP_MAX_ max. number of mmapped regions to use
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
Two testing applications, t-test1 and t-test2, are included in this
|
||||
source distribution. Both perform pseudo-random sequences of
|
||||
allocations/frees, and can be given numeric arguments (all arguments
|
||||
are optional):
|
||||
|
||||
% t-test[12] <n-total> <n-parallel> <n-allocs> <size-max> <bins>
|
||||
|
||||
n-total = total number of threads executed (default 10)
|
||||
n-parallel = number of threads running in parallel (2)
|
||||
n-allocs = number of malloc()'s / free()'s per thread (10000)
|
||||
size-max = max. size requested with malloc() in bytes (10000)
|
||||
bins = number of bins to maintain
|
||||
|
||||
The first test `t-test1' maintains a completely seperate pool of
|
||||
allocated bins for each thread, and should therefore show full
|
||||
parallelism. On the other hand, `t-test2' creates only a single pool
|
||||
of bins, and each thread randomly allocates/frees any bin. Some lock
|
||||
contention is to be expected in this case, as the threads frequently
|
||||
cross each others arena.
|
||||
|
||||
Performance results from t-test1 should be quite repeatable, while the
|
||||
behaviour of t-test2 depends on scheduling variations.
|
||||
|
||||
Conclusion
|
||||
==========
|
||||
|
||||
I'm always interested in performance data and feedback, just send mail
|
||||
to ptmalloc@malloc.de.
|
||||
|
||||
Good luck!
|
@ -1,805 +0,0 @@
|
||||
/* Malloc implementation for multiple threads without lock contention.
|
||||
Copyright (C) 2001 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* $Id: arena.c,v 1.9 2004/11/05 14:42:23 wg Exp $ */
|
||||
|
||||
/* Compile-time constants. */
|
||||
|
||||
#define HEAP_MIN_SIZE (32*1024)
|
||||
#ifndef HEAP_MAX_SIZE
|
||||
#define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */
|
||||
#endif
|
||||
|
||||
/* HEAP_MIN_SIZE and HEAP_MAX_SIZE limit the size of mmap()ed heaps
|
||||
that are dynamically created for multi-threaded programs. The
|
||||
maximum size must be a power of two, for fast determination of
|
||||
which heap belongs to a chunk. It should be much larger than the
|
||||
mmap threshold, so that requests with a size just below that
|
||||
threshold can be fulfilled without creating too many heaps. */
|
||||
|
||||
|
||||
#ifndef THREAD_STATS
|
||||
#define THREAD_STATS 0
|
||||
#endif
|
||||
|
||||
/* If THREAD_STATS is non-zero, some statistics on mutex locking are
|
||||
computed. */
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
#define top(ar_ptr) ((ar_ptr)->top)
|
||||
|
||||
/* A heap is a single contiguous memory region holding (coalesceable)
|
||||
malloc_chunks. It is allocated with mmap() and always starts at an
|
||||
address aligned to HEAP_MAX_SIZE. Not used unless compiling with
|
||||
USE_ARENAS. */
|
||||
|
||||
typedef struct _heap_info {
|
||||
mstate ar_ptr; /* Arena for this heap. */
|
||||
struct _heap_info *prev; /* Previous heap. */
|
||||
size_t size; /* Current size in bytes. */
|
||||
size_t pad; /* Make sure the following data is properly aligned. */
|
||||
} heap_info;
|
||||
|
||||
/* Thread specific data */
|
||||
|
||||
static tsd_key_t arena_key;
|
||||
static mutex_t list_lock;
|
||||
|
||||
#if THREAD_STATS
|
||||
static int stat_n_heaps;
|
||||
#define THREAD_STAT(x) x
|
||||
#else
|
||||
#define THREAD_STAT(x) do ; while(0)
|
||||
#endif
|
||||
|
||||
/* Mapped memory in non-main arenas (reliable only for NO_THREADS). */
|
||||
static unsigned long arena_mem;
|
||||
|
||||
/* Already initialized? */
|
||||
int __malloc_initialized = -1;
|
||||
|
||||
/**************************************************************************/
|
||||
|
||||
#if USE_ARENAS
|
||||
|
||||
/* arena_get() acquires an arena and locks the corresponding mutex.
|
||||
First, try the one last locked successfully by this thread. (This
|
||||
is the common case and handled with a macro for speed.) Then, loop
|
||||
once over the circularly linked list of arenas. If no arena is
|
||||
readily available, create a new one. In this latter case, `size'
|
||||
is just a hint as to how much memory will be required immediately
|
||||
in the new arena. */
|
||||
|
||||
#define arena_get(ptr, size) do { \
|
||||
Void_t *vptr = NULL; \
|
||||
ptr = (mstate)tsd_getspecific(arena_key, vptr); \
|
||||
if(ptr && !mutex_trylock(&ptr->mutex)) { \
|
||||
THREAD_STAT(++(ptr->stat_lock_direct)); \
|
||||
} else \
|
||||
ptr = arena_get2(ptr, (size)); \
|
||||
} while(0)
|
||||
|
||||
/* find the heap and corresponding arena for a given ptr */
|
||||
|
||||
#define heap_for_ptr(ptr) \
|
||||
((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1)))
|
||||
#define arena_for_chunk(ptr) \
|
||||
(chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena)
|
||||
|
||||
#else /* !USE_ARENAS */
|
||||
|
||||
/* There is only one arena, main_arena. */
|
||||
|
||||
#if THREAD_STATS
|
||||
#define arena_get(ar_ptr, sz) do { \
|
||||
ar_ptr = &main_arena; \
|
||||
if(!mutex_trylock(&ar_ptr->mutex)) \
|
||||
++(ar_ptr->stat_lock_direct); \
|
||||
else { \
|
||||
(void)mutex_lock(&ar_ptr->mutex); \
|
||||
++(ar_ptr->stat_lock_wait); \
|
||||
} \
|
||||
} while(0)
|
||||
#else
|
||||
#define arena_get(ar_ptr, sz) do { \
|
||||
ar_ptr = &main_arena; \
|
||||
(void)mutex_lock(&ar_ptr->mutex); \
|
||||
} while(0)
|
||||
#endif
|
||||
#define arena_for_chunk(ptr) (&main_arena)
|
||||
|
||||
#endif /* USE_ARENAS */
|
||||
|
||||
/**************************************************************************/
|
||||
|
||||
#ifndef NO_THREADS
|
||||
|
||||
/* atfork support. */
|
||||
|
||||
static __malloc_ptr_t (*save_malloc_hook) __MALLOC_P ((size_t __size,
|
||||
__const __malloc_ptr_t));
|
||||
# if !defined _LIBC || !defined USE_TLS || (defined SHARED && !USE___THREAD)
|
||||
static __malloc_ptr_t (*save_memalign_hook) __MALLOC_P ((size_t align,
|
||||
size_t __size,
|
||||
__const __malloc_ptr_t));
|
||||
# endif
|
||||
static void (*save_free_hook) __MALLOC_P ((__malloc_ptr_t __ptr,
|
||||
__const __malloc_ptr_t));
|
||||
static Void_t* save_arena;
|
||||
|
||||
/* Magic value for the thread-specific arena pointer when
|
||||
malloc_atfork() is in use. */
|
||||
|
||||
#define ATFORK_ARENA_PTR ((Void_t*)-1)
|
||||
|
||||
/* The following hooks are used while the `atfork' handling mechanism
|
||||
is active. */
|
||||
|
||||
static Void_t*
|
||||
malloc_atfork(size_t sz, const Void_t *caller)
|
||||
{
|
||||
Void_t *vptr = NULL;
|
||||
Void_t *victim;
|
||||
|
||||
tsd_getspecific(arena_key, vptr);
|
||||
if(vptr == ATFORK_ARENA_PTR) {
|
||||
/* We are the only thread that may allocate at all. */
|
||||
if(save_malloc_hook != malloc_check) {
|
||||
return _int_malloc(&main_arena, sz);
|
||||
} else {
|
||||
if(top_check()<0)
|
||||
return 0;
|
||||
victim = _int_malloc(&main_arena, sz+1);
|
||||
return mem2mem_check(victim, sz);
|
||||
}
|
||||
} else {
|
||||
/* Suspend the thread until the `atfork' handlers have completed.
|
||||
By that time, the hooks will have been reset as well, so that
|
||||
mALLOc() can be used again. */
|
||||
(void)mutex_lock(&list_lock);
|
||||
(void)mutex_unlock(&list_lock);
|
||||
return public_mALLOc(sz);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
free_atfork(Void_t* mem, const Void_t *caller)
|
||||
{
|
||||
Void_t *vptr = NULL;
|
||||
mstate ar_ptr;
|
||||
mchunkptr p; /* chunk corresponding to mem */
|
||||
|
||||
if (mem == 0) /* free(0) has no effect */
|
||||
return;
|
||||
|
||||
p = mem2chunk(mem); /* do not bother to replicate free_check here */
|
||||
|
||||
#if HAVE_MMAP
|
||||
if (chunk_is_mmapped(p)) /* release mmapped memory. */
|
||||
{
|
||||
munmap_chunk(p);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
ar_ptr = arena_for_chunk(p);
|
||||
tsd_getspecific(arena_key, vptr);
|
||||
if(vptr != ATFORK_ARENA_PTR)
|
||||
(void)mutex_lock(&ar_ptr->mutex);
|
||||
_int_free(ar_ptr, mem);
|
||||
if(vptr != ATFORK_ARENA_PTR)
|
||||
(void)mutex_unlock(&ar_ptr->mutex);
|
||||
}
|
||||
|
||||
/* The following two functions are registered via thread_atfork() to
|
||||
make sure that the mutexes remain in a consistent state in the
|
||||
fork()ed version of a thread. Also adapt the malloc and free hooks
|
||||
temporarily, because the `atfork' handler mechanism may use
|
||||
malloc/free internally (e.g. in LinuxThreads). */
|
||||
|
||||
static void
|
||||
ptmalloc_lock_all __MALLOC_P((void))
|
||||
{
|
||||
mstate ar_ptr;
|
||||
|
||||
if(__malloc_initialized < 1)
|
||||
return;
|
||||
(void)mutex_lock(&list_lock);
|
||||
for(ar_ptr = &main_arena;;) {
|
||||
(void)mutex_lock(&ar_ptr->mutex);
|
||||
ar_ptr = ar_ptr->next;
|
||||
if(ar_ptr == &main_arena) break;
|
||||
}
|
||||
save_malloc_hook = __malloc_hook;
|
||||
save_free_hook = __free_hook;
|
||||
__malloc_hook = malloc_atfork;
|
||||
__free_hook = free_atfork;
|
||||
/* Only the current thread may perform malloc/free calls now. */
|
||||
tsd_getspecific(arena_key, save_arena);
|
||||
tsd_setspecific(arena_key, ATFORK_ARENA_PTR);
|
||||
}
|
||||
|
||||
static void
|
||||
ptmalloc_unlock_all __MALLOC_P((void))
|
||||
{
|
||||
mstate ar_ptr;
|
||||
|
||||
if(__malloc_initialized < 1)
|
||||
return;
|
||||
tsd_setspecific(arena_key, save_arena);
|
||||
__malloc_hook = save_malloc_hook;
|
||||
__free_hook = save_free_hook;
|
||||
for(ar_ptr = &main_arena;;) {
|
||||
(void)mutex_unlock(&ar_ptr->mutex);
|
||||
ar_ptr = ar_ptr->next;
|
||||
if(ar_ptr == &main_arena) break;
|
||||
}
|
||||
(void)mutex_unlock(&list_lock);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
/* In LinuxThreads, unlocking a mutex in the child process after a
|
||||
fork() is currently unsafe, whereas re-initializing it is safe and
|
||||
does not leak resources. Therefore, a special atfork handler is
|
||||
installed for the child. */
|
||||
|
||||
static void
|
||||
ptmalloc_unlock_all2 __MALLOC_P((void))
|
||||
{
|
||||
mstate ar_ptr;
|
||||
|
||||
if(__malloc_initialized < 1)
|
||||
return;
|
||||
#if defined _LIBC || defined MALLOC_HOOKS
|
||||
tsd_setspecific(arena_key, save_arena);
|
||||
__malloc_hook = save_malloc_hook;
|
||||
__free_hook = save_free_hook;
|
||||
#endif
|
||||
for(ar_ptr = &main_arena;;) {
|
||||
(void)mutex_init(&ar_ptr->mutex);
|
||||
ar_ptr = ar_ptr->next;
|
||||
if(ar_ptr == &main_arena) break;
|
||||
}
|
||||
(void)mutex_init(&list_lock);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define ptmalloc_unlock_all2 ptmalloc_unlock_all
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* !defined NO_THREADS */
|
||||
|
||||
/* Initialization routine. */
|
||||
#ifdef _LIBC
|
||||
#include <string.h>
|
||||
extern char **_environ;
|
||||
|
||||
static char *
|
||||
internal_function
|
||||
next_env_entry (char ***position)
|
||||
{
|
||||
char **current = *position;
|
||||
char *result = NULL;
|
||||
|
||||
while (*current != NULL)
|
||||
{
|
||||
if (__builtin_expect ((*current)[0] == 'M', 0)
|
||||
&& (*current)[1] == 'A'
|
||||
&& (*current)[2] == 'L'
|
||||
&& (*current)[3] == 'L'
|
||||
&& (*current)[4] == 'O'
|
||||
&& (*current)[5] == 'C'
|
||||
&& (*current)[6] == '_')
|
||||
{
|
||||
result = &(*current)[7];
|
||||
|
||||
/* Save current position for next visit. */
|
||||
*position = ++current;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
++current;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* _LIBC */
|
||||
|
||||
/* Set up basic state so that _int_malloc et al can work. */
|
||||
static void
|
||||
ptmalloc_init_minimal __MALLOC_P((void))
|
||||
{
|
||||
#if DEFAULT_TOP_PAD != 0
|
||||
mp_.top_pad = DEFAULT_TOP_PAD;
|
||||
#endif
|
||||
mp_.n_mmaps_max = DEFAULT_MMAP_MAX;
|
||||
mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
|
||||
mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD;
|
||||
mp_.pagesize = malloc_getpagesize;
|
||||
}
|
||||
|
||||
|
||||
#if !(USE_STARTER & 2)
|
||||
static
|
||||
#endif
|
||||
void
|
||||
ptmalloc_init __MALLOC_P((void))
|
||||
{
|
||||
#if __STD_C
|
||||
const char* s;
|
||||
#else
|
||||
char* s;
|
||||
#endif
|
||||
int secure = 0;
|
||||
|
||||
if(__malloc_initialized >= 0) return;
|
||||
__malloc_initialized = 0;
|
||||
|
||||
if (mp_.pagesize == 0)
|
||||
ptmalloc_init_minimal();
|
||||
|
||||
#ifndef NO_THREADS
|
||||
# if USE_STARTER & 1
|
||||
/* With some threads implementations, creating thread-specific data
|
||||
or initializing a mutex may call malloc() itself. Provide a
|
||||
simple starter version (realloc() won't work). */
|
||||
save_malloc_hook = __malloc_hook;
|
||||
save_memalign_hook = __memalign_hook;
|
||||
save_free_hook = __free_hook;
|
||||
__malloc_hook = malloc_starter;
|
||||
__memalign_hook = memalign_starter;
|
||||
__free_hook = free_starter;
|
||||
# ifdef _LIBC
|
||||
/* Initialize the pthreads interface. */
|
||||
if (__pthread_initialize != NULL)
|
||||
__pthread_initialize();
|
||||
# endif /* !defined _LIBC */
|
||||
# endif /* USE_STARTER & 1 */
|
||||
#endif /* !defined NO_THREADS */
|
||||
mutex_init(&main_arena.mutex);
|
||||
main_arena.next = &main_arena;
|
||||
|
||||
mutex_init(&list_lock);
|
||||
tsd_key_create(&arena_key, NULL);
|
||||
tsd_setspecific(arena_key, (Void_t *)&main_arena);
|
||||
thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2);
|
||||
#ifndef NO_THREADS
|
||||
# if USE_STARTER & 1
|
||||
__malloc_hook = save_malloc_hook;
|
||||
__memalign_hook = save_memalign_hook;
|
||||
__free_hook = save_free_hook;
|
||||
# endif
|
||||
# if USE_STARTER & 2
|
||||
__malloc_hook = 0;
|
||||
__memalign_hook = 0;
|
||||
__free_hook = 0;
|
||||
# endif
|
||||
#endif
|
||||
#ifdef _LIBC
|
||||
secure = __libc_enable_secure;
|
||||
s = NULL;
|
||||
if (__builtin_expect (_environ != NULL, 1))
|
||||
{
|
||||
char **runp = _environ;
|
||||
char *envline;
|
||||
|
||||
while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL,
|
||||
0))
|
||||
{
|
||||
size_t len = strcspn (envline, "=");
|
||||
|
||||
if (envline[len] != '=')
|
||||
/* This is a "MALLOC_" variable at the end of the string
|
||||
without a '=' character. Ignore it since otherwise we
|
||||
will access invalid memory below. */
|
||||
continue;
|
||||
|
||||
switch (len)
|
||||
{
|
||||
case 6:
|
||||
if (memcmp (envline, "CHECK_", 6) == 0)
|
||||
s = &envline[7];
|
||||
break;
|
||||
case 8:
|
||||
if (! secure && memcmp (envline, "TOP_PAD_", 8) == 0)
|
||||
mALLOPt(M_TOP_PAD, atoi(&envline[9]));
|
||||
break;
|
||||
case 9:
|
||||
if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0)
|
||||
mALLOPt(M_MMAP_MAX, atoi(&envline[10]));
|
||||
break;
|
||||
case 15:
|
||||
if (! secure)
|
||||
{
|
||||
if (memcmp (envline, "TRIM_THRESHOLD_", 15) == 0)
|
||||
mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16]));
|
||||
else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0)
|
||||
mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16]));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (! secure)
|
||||
{
|
||||
if((s = getenv("MALLOC_TRIM_THRESHOLD_")))
|
||||
mALLOPt(M_TRIM_THRESHOLD, atoi(s));
|
||||
if((s = getenv("MALLOC_TOP_PAD_")))
|
||||
mALLOPt(M_TOP_PAD, atoi(s));
|
||||
if((s = getenv("MALLOC_MMAP_THRESHOLD_")))
|
||||
mALLOPt(M_MMAP_THRESHOLD, atoi(s));
|
||||
if((s = getenv("MALLOC_MMAP_MAX_")))
|
||||
mALLOPt(M_MMAP_MAX, atoi(s));
|
||||
}
|
||||
s = getenv("MALLOC_CHECK_");
|
||||
#endif
|
||||
if(s) {
|
||||
if(s[0]) mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0'));
|
||||
__malloc_check_init();
|
||||
}
|
||||
#if 0
|
||||
/* OMPI Change: Don't call the initialize hook; it was us. */
|
||||
if(__malloc_initialize_hook != NULL)
|
||||
(*__malloc_initialize_hook)();
|
||||
#endif
|
||||
|
||||
__malloc_initialized = 1;
|
||||
}
|
||||
|
||||
/* There are platforms (e.g. Hurd) with a link-time hook mechanism. */
|
||||
#ifdef thread_atfork_static
|
||||
thread_atfork_static(ptmalloc_lock_all, ptmalloc_unlock_all, \
|
||||
ptmalloc_unlock_all2)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Managing heaps and arenas (for concurrent threads) */
|
||||
|
||||
#if USE_ARENAS
|
||||
|
||||
#if MALLOC_DEBUG > 1
|
||||
|
||||
/* Print the complete contents of a single heap to stderr. */
|
||||
|
||||
static void
|
||||
#if __STD_C
|
||||
dump_heap(heap_info *heap)
|
||||
#else
|
||||
dump_heap(heap) heap_info *heap;
|
||||
#endif
|
||||
{
|
||||
char *ptr;
|
||||
mchunkptr p;
|
||||
|
||||
fprintf(stderr, "Heap %p, size %10lx:\n", heap, (long)heap->size);
|
||||
ptr = (heap->ar_ptr != (mstate)(heap+1)) ?
|
||||
(char*)(heap + 1) : (char*)(heap + 1) + sizeof(struct malloc_state);
|
||||
p = (mchunkptr)(((unsigned long)ptr + MALLOC_ALIGN_MASK) &
|
||||
~MALLOC_ALIGN_MASK);
|
||||
for(;;) {
|
||||
fprintf(stderr, "chunk %p size %10lx", p, (long)p->size);
|
||||
if(p == top(heap->ar_ptr)) {
|
||||
fprintf(stderr, " (top)\n");
|
||||
break;
|
||||
} else if(p->size == (0|PREV_INUSE)) {
|
||||
fprintf(stderr, " (fence)\n");
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
p = next_chunk(p);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* MALLOC_DEBUG > 1 */
|
||||
|
||||
/* Create a new heap. size is automatically rounded up to a multiple
|
||||
of the page size. */
|
||||
|
||||
static heap_info *
|
||||
internal_function
|
||||
#if __STD_C
|
||||
new_heap(size_t size, size_t top_pad)
|
||||
#else
|
||||
new_heap(size, top_pad) size_t size, top_pad;
|
||||
#endif
|
||||
{
|
||||
size_t page_mask = malloc_getpagesize - 1;
|
||||
char *p1, *p2;
|
||||
unsigned long ul;
|
||||
heap_info *h;
|
||||
|
||||
if(size+top_pad < HEAP_MIN_SIZE)
|
||||
size = HEAP_MIN_SIZE;
|
||||
else if(size+top_pad <= HEAP_MAX_SIZE)
|
||||
size += top_pad;
|
||||
else if(size > HEAP_MAX_SIZE)
|
||||
return 0;
|
||||
else
|
||||
size = HEAP_MAX_SIZE;
|
||||
size = (size + page_mask) & ~page_mask;
|
||||
|
||||
/* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
|
||||
No swap space needs to be reserved for the following large
|
||||
mapping (on Linux, this is the case for all non-writable mappings
|
||||
anyway). */
|
||||
p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE);
|
||||
if(p1 != MAP_FAILED) {
|
||||
p2 = (char *)(((unsigned long)p1 + (HEAP_MAX_SIZE-1)) & ~(HEAP_MAX_SIZE-1));
|
||||
ul = p2 - p1;
|
||||
munmap(p1, ul);
|
||||
munmap(p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
|
||||
} else {
|
||||
/* Try to take the chance that an allocation of only HEAP_MAX_SIZE
|
||||
is already aligned. */
|
||||
p2 = (char *)MMAP(0, HEAP_MAX_SIZE, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE);
|
||||
if(p2 == MAP_FAILED)
|
||||
return 0;
|
||||
if((unsigned long)p2 & (HEAP_MAX_SIZE-1)) {
|
||||
munmap(p2, HEAP_MAX_SIZE);
|
||||
return 0;
|
||||