diff --git a/opal/mca/memory/linux/COPYRIGHT-ptmalloc2.txt b/opal/mca/memory/linux/COPYRIGHT-ptmalloc2.txt deleted file mode 100644 index 4615c2637c..0000000000 --- a/opal/mca/memory/linux/COPYRIGHT-ptmalloc2.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2001-2004 Wolfram Gloger - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. diff --git a/opal/mca/memory/linux/ChangeLog-ptmalloc2.txt b/opal/mca/memory/linux/ChangeLog-ptmalloc2.txt deleted file mode 100644 index e985550c9f..0000000000 --- a/opal/mca/memory/linux/ChangeLog-ptmalloc2.txt +++ /dev/null @@ -1,181 +0,0 @@ -2004-11-05 Wolfram Gloger - - * malloc/hooks.c (malloc_starter, memalign_starter): Call - ptmalloc_init_minimal(). - -2004-11-04 Wolfram Gloger - - * malloc/malloc.c (USE_STARTER): New macro. - * malloc/hooks.c: Use USE_STARTER. - * malloc/arena.c: Use USE_STARTER. - -2004-08-13 Ulrich Drepper - - * malloc/malloc.c: Use strong_alias instead of weak_alias wherever - possible. - -2002-12-06 Roland McGrath - - * malloc/arena.c (ptmalloc_init_minimal): New function, broken out - of ptmalloc_init. - -2002-08-23 Roland McGrath - - * malloc/hooks.c (__malloc_initialize_hook, __free_hook, - __malloc_hook, __realloc_hook, __memalign_hook, - __after_morecore_hook): Variable definitions moved to ... - * malloc/malloc.c: ... here, so as to be before all references. - -2004-10-19 Wolfram Gloger - - * malloc/hooks.c (mem2chunk_check, top_check): Handle - non-contiguous arena. Reported by Michael Dalton - [BZ #457]. Add further checks for top - chunk. - -2004-08-08 Wolfram Gloger - - * include/malloc.h (mstate): Move type declaration from here... - * malloc/malloc.h: ...to here. - (struct malloc_arena_info, struct malloc_global_info): New types. - (_int_get_arena, _int_get_arena_info, _int_get_global_info): New - functions. - * malloc/malloc.c (mSTATS, public_mSTATs, mALLINFo): Remove. - (_int_get_arena_info, _int_get_global_info): New functions. - * malloc/arena.c (_int_get_arena): New function. - * malloc/malloc-stats.c: New file. - * malloc/tst-mstats.c: New file. - * malloc/Makefile (tests): Add tst-mstats. - (distribute): Remove no-longer existing thread-m.h. - (dist-routines): Add malloc-stats. - * malloc/Versions: Add _int_get_arena, _int_get_arena_info, - _int_get_global_info. - -2004-07-25 Wolfram Gloger - - * sysdeps/generic/thread-st.h: New file. - * sysdeps/pthread/thread-st.h: New file. - * sysdeps/sproc/thread-st.h: New file. - * sysdeps/solaris/thread-st.h: New file. - * thread-st.h: Removed. - -2004-03-18 Ulrich Drepper - - * malloc/malloc.c (__posix_memalign): Correct alignment check. - Reported by Don Heller . - -2003-12-17 Jakub Jelinek - - * malloc/malloc.c (__posix_memalign): If __memalign_hook != NULL, - call it directly instead of memalign_internal. - -2003-09-27 Wolfram Gloger - - * malloc/malloc.c: Include earlier instead of - "thread-m.h", so that default parameters can be overridden in a - system-specific malloc-machine.h. Remove extra ; from extern "C" - closing brace. - * sysdeps/generic/malloc-machine.h: New file. - * malloc/thread-m.h: Removed. - -2003-09-08 Wolfram Gloger - - * malloc/malloc.c (sYSMALLOc): Move foreign sbrk accounting into - contiguous case. Bug report from Prem Gopalan - . - -2003-08-18 Art Haas - - * malloc/malloc.h: Remove unneeded ';' where closing the C++ - extern block. - -2003-06-18 Ulrich Drepper - - * malloc/malloc.c (public_mALLINFo): Initialize malloc if it - hasn't happened yet. - -2003-05-28 Roland McGrath - - * malloc/malloc.h [! __GNUC__] (__const): Define if undefined. - -2003-05-04 H.J. Lu - - * malloc/arena.c (arena_get2): Add atomic_write_barrier. - * malloc/thread-m.h: Include . - (atomic_full_barrier): Provide default. - (atomic_read_barrier): Likewise. - (atomic_write_barrier): Likewise. - -2003-05-01 Ulrich Drepper - - * malloc/malloc.c (mSTATs): Call ptmalloc_init if necessary. - -2003-01-27 Wolfram Gloger - - * malloc/hooks.c (mem2chunk_check): Check alignment of mem - pointer, not of the computed chunk. Bug report from Carlos - O'Donell . - -2002-12-27 Jakub Jelinek - - * malloc/arena.c (ptmalloc_init): Don't call next_env_entry if - _environ is NULL. - -2002-12-17 Ulrich Drepper - - * malloc/malloc.c (mALLOPt): Make sure malloc is initialized. - -2002-12-06 Roland McGrath - - * malloc/hooks.c [_LIBC && (USE___THREAD || (USE_TLS && !SHARED))] - (malloc_starter, memalign_starter, free_starter): Don't define these. - - * malloc/hooks.c (memalign_starter): New function. - * malloc/malloc.c: Declare it. - * malloc/arena.c (save_memalign_hook): New variable. - (ptmalloc_init): Set __memalign_hook to memalign_starter. - -2002-11-18 Wolfram Gloger - - * malloc/arena.c - (ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2): Do - nothing if not initialized. Bug report from Marcus Brinkmann - . - -2002-10-07 Wolfram Gloger - - * malloc/malloc.c (sYSMALLOc): Only check for breakage due - to foreign sbrk()'s if arena is contiguous. Bug report from - Bruno Haible . - -2002-07-11 Wolfram Gloger - - * malloc/hooks.c: typo fix in NO_THREADS case, realloc_check - fix in HAVE_MREMAP case. - -2002-06-11 Wolfram Gloger - - * malloc/malloc.c: Fix error path when new_heap() returns NULL. - Reported by Michael Meissner . - -2002-03-29 Wolfram Gloger - - * malloc/malloc.c: Add short description and prototypes for - malloc_get_state, malloc_set_state and posix_memalign, for - consistency and to avoid warnings with -Wstrict-prototypes. - Reported by Andreas Jaeger . - -2002-03-13 Wolfram Gloger - - * malloc/malloc.c (sYSMALLOc): Don't change brk if mmap - failed. - -2002-01-18 Wolfram Gloger - - * malloc/malloc.c: Rewrite, adapted from Doug Lea's malloc-2.7.0.c. - * malloc/malloc.h: Likewise. - * malloc/arena.c: New file. - * malloc/hooks.c: New file. - * malloc/tst-mallocstate.c: New file. - * malloc/Makefile: Add new testcase tst-mallocstate. - Add arena.c and hooks.c to distribute. Fix commented CPPFLAGS. diff --git a/opal/mca/memory/linux/Makefile.am b/opal/mca/memory/linux/Makefile.am deleted file mode 100644 index c29fc5d87f..0000000000 --- a/opal/mca/memory/linux/Makefile.am +++ /dev/null @@ -1,92 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = -DMALLOC_DEBUG=0 - -AM_CPPFLAGS += \ - -D_GNU_SOURCE=1 \ - -DMALLOC_HOOKS=1 \ - -I$(srcdir)/sysdeps/pthread -# this must come *after* the threads -Is -AM_CPPFLAGS += -I$(srcdir)/sysdeps/generic - -# ptmalloc requires us to include the copyright notice in the -# software. So install it in the same place that we install ROMIO's -# copyright notices. - -docdir = $(opaldatadir)/doc -doc_DATA = COPYRIGHT-ptmalloc2.txt - -# Help file -dist_opaldata_DATA = help-opal-memory-linux.txt - -# This component is only ever built statically (i.e., slurped into -# libopen-pal) -- it is never built as a DSO. -noinst_LTLIBRARIES = libmca_memory_linux.la -libmca_memory_linux_la_SOURCES = \ - memory_linux.h \ - memory_linux_component.c -libmca_memory_linux_la_LDFLAGS = \ - -module -avoid-version $(memory_linux_LDFLAGS) -libmca_memory_linux_la_LIBADD = $(memory_linux_LIBS) - -# Do we have ptmalloc2 support? -if MEMORY_LINUX_PTMALLOC2 -libmca_memory_linux_la_SOURCES += \ - memory_linux_ptmalloc2.c \ - memory_linux_munmap.c \ - rename.h \ - malloc.c \ - malloc-stats.c \ - malloc.h -endif - -# Do we have ummunotify support? -if MEMORY_LINUX_UMMUNOTIFY -libmca_memory_linux_la_SOURCES += memory_linux_ummunotify.c public.h -endif - -# these are included directly and shouldn't be built solo -EXTRA_libmca_memory_linux_la_SOURCES = \ - arena.c \ - hooks.c - -EXTRA_DIST = \ - README-open-mpi.txt \ - README-ptmalloc2.txt \ - ChangeLog-ptmalloc2.txt \ - COPYRIGHT-ptmalloc2.txt \ - lran2.h \ - t-test.h \ - t-test1.c \ - t-test2.c \ - tst-mallocstate.c \ - tst-mstats.c \ - sysdeps/sproc/malloc-machine.h \ - sysdeps/sproc/thread-st.h \ - sysdeps/pthread/malloc-machine.h \ - sysdeps/pthread/thread-st.h \ - sysdeps/solaris/malloc-machine.h \ - sysdeps/solaris/thread-st.h \ - sysdeps/generic/malloc-machine.h \ - sysdeps/generic/thread-st.h \ - sysdeps/generic/atomic.h \ - $(doc_DATA) diff --git a/opal/mca/memory/linux/README-open-mpi.txt b/opal/mca/memory/linux/README-open-mpi.txt deleted file mode 100644 index 43e88eba8a..0000000000 --- a/opal/mca/memory/linux/README-open-mpi.txt +++ /dev/null @@ -1,161 +0,0 @@ -30 March 2009 - -This file documents Open MPI's usage of ptmalloc2. This is perhaps -our 7,208,499th iteration of ptmalloc2 support, so let's document it -here so that some future developer might spend *slightly* less time -understanding what the heck is going on. - -See glibc documentation about malloc hooks before continuing. This is -pretty much required reading before reading the rest of this file / -having a hope of understanding what's going on here: - - http://www.gnu.org/software/libc/manual/html_mono/libc.html#Hooks-for-Malloc - -The overall goal is that we're using the Linux glibc hooks to wholly -replace the underlying allocator. We *used* to use horrid linker -tricks to interpose OMPI's ptmalloc2 symbols with the glibc ones -- -meaning that user apps would call our symbols and not the glibc ones. -But that scheme is fraught with problems, not the least of which is -that *all* MPI applications will be forced to use our overridden -allocator (not just the ones that need it, such as the ones running on -OpenFabrics-based networks). Instead, what we do here is, frankly, -quite similar to what is done in MX: we use the 4 glibc hooks to -assert our own malloc, realloc, free, and memalign functions. This -allows the decision as to whether to use this internal ptmalloc2 -allocate to be a run-time decision. This is quite important; using -this internal allocator has both benefits (allowing using -mpi_leave_pinned=1 behavior) and drawbacks (breaking some debuggers, -being unnecessary for non-OpenFabrics-based networks, etc.). - -Here's how it works... - -This component *must* be linked statically as part of libopen-pal; it -*cannot* be a DSO. Specifically, this library must be present during -pre-main() initialization phases so that its __malloc_initialize_hook -can be found and executed. Loading it as a DSO during MPI_INIT is far -too late. In configure.m4, we define the M4 macro -MCA_memory_ptmalloc2_COMPILE_MODE to always compile this component in -static mode. Yay flexible build system. - -This component provides an munmap() function that will intercept calls -to munmap() and do the Right Thing. That is fairly straightforward to -do. Intercepting the malloc/free/etc. allocator is much more -complicated. - -All the ptmalloc2 public symbols in this component have been name -shifted via the rename.h file. Hence, what used to be "malloc" is now -opal_memory_ptmalloc2_malloc. Since all the public symbols are -name-shifted, we can safely link this component in all MPI -applications. Specifically: just because this ptmalloc2 allocator is -present in all OMPI executables and user-level applications, it won't -necessarily be used -- it's a separate/run-time decision as to whether -it will be used. - -We set the __malloc_initialize_hook variable to point to -opal_memory_ptmalloc2_malloc_init_hook (in hooks.c). This function is -called by the underlying glibc allocator before any allocations occur -and before the memory allocation subsystem is setup. As such, this -function is *extremely* restricted in what it can do. It cannot call -any form of malloc, for example (which seems fairly obvious, but it's -worth mentioning :-) ). This function is one of the determining -steps as to whether we'll use the internal ptmalloc2 allocator or -not. Several checks are performed: - -- Was either the MCA params mpi_leave_pinned or - mpi_leave_pinned_pipeline set? -- Is a driver found to be active indicating that an OS-bypass network - is in effect (OpenFabrics, MX, Open-MX, ...etc.) -- Was an environment variable set indicating that we want to disable - this component? - -If the $OMPI_MCA_memory_ptmalloc2_disable or the $FAKEROOTKEY env -variables are set, we don't enable the memory hooks. - -We then use the following matrix to determine whether to enable the -memory hooks or not (explanation of the matrix is below): - - lp / lpp yes no runtime not found - yes yes yes yes yes - no yes no no no - runtime yes no runtime runtime - not found yes no runtime runtime - -lp = leave_pinned (the rows), lpp = leave_pinned_pipeline (the columns) -yes = found that variable to be set to "yes" (i.e., 1) -no = found that variable to be set to "no" (i.e., 0) -runtime = found that variable to be set to "determine at runtime" (i.e., -1) -not found = that variable was not set at all - -Hence, if we end up on a "yes" block in the matrix, we enable the -hooks. If we end up in a "no" block in the matrix, we disable the -hooks. If we end up in a "runtime" block in the matrix, then we -enable the hooks *if* we can find indications that an OS bypass -network is present and available for use (e.g., OpenFabrics, MX, -Open-MX, ...etc.). - -To be clear: sometime during process startup, this function will -definitely be called. It will either set the 4 hook functions to -point to our name-shifted ptmalloc2 functions, or it won't. If the 4 -hook functions are set, then the underlying glibc allocator will -always call our 4 functions in all the relevant places instead of -calling its own functions. Specifically: the process is calling the -underlying glibc allocator, but that underlying glibc allocator will -make function pointer callbacks to our name-shifted ptmalloc2 -functions to actually do the work. - -Note that because we know our ptmalloc will not be providing all 5 -hook variables (because we want to use the underlying glibc hook -variables), they are #if 0'ed out in our malloc.c. This has the -direct consequence that the *_hook_ini() in hooks.c are never used. -So to avoid compiler/linker warnings, I #if 0'ed those out as well. - -All the public functions in malloc.c that call hook functions were -modified to #if 0 the hook function invocations. After all, that's -something that we want the *underlying* glibc allocator to do -- but -we are putting these functions as the hooks, so we don't want to -invoke ourselves in an infinite loop! - -The next thing that happens in the startup sequence is that the -ptmalloc2 memory component's "open" function is called during -MPI_INIT. But we need to test to see if the glibc memory hooks have -been overridden before MPI_INIT was invoked. If so, we need to signal -that our allocator support may not be complete. - -Patrick Geoffray/MX suggests a simple test: malloc() 4MB and then free -it. Watch to see if our name-shifted ptmalloc2 free() function was -invoked. If it was, then all of our hooks are probably in place and -we can proceed. If not, then set flags indicating that this memory -allocator only supports MUNMAP (not FREE/CHUNK). - -We actually perform this test for malloc, realloc, and memalign. If -they all pass, then we say that the memory allocator supports -everything. If any of them fail, then we say that the memory -allocator does not support FREE/CHUNK. - -NOTE: we *used* to simply set the FREE/CHUNK support flags during our -ptmalloc2's internal ptmalloc_init() function. This is not a good -idea becaus even after our ptmalloc_init() function has been invoked, -someone may come in an override our memory hooks. Doing tests during -the ptmalloc2 memory component's open function seems to be the safest -way to test whether we *actually* support FREE/CHUNK (this is what MX -does, too). - -As stated above, we always intercept munmap() -- this is acceptable in -all environments. But we test that, too, just to be sure that the -munmap intercept is working. If we verify that it is working -properly, then we set that we have MUNMAP support. - -Much later in the init sequence during MPI_INIT, components indicate -whether they want to use mpi_leave_pinned[_pipeline] support or not. -For example, the openib BTL queries the opal_mem_hooks_support_level() -function to see if FREE and MUNMAP are supported. If they are, then -the openib BTL sets mpi_leave_pinned = 1. - -Finally, the mpool base does a final check. If -mpi_leave_pinned[_pipeline] is set to 1 and/or use_mem_hooks is set, -if FREE/MUNMAP are not set in the supported flags, then a warning is -printed. Otherwise, life continues (assumedly using -mpi_leave_pinned[_pipeline] support). - -Simple, right? - diff --git a/opal/mca/memory/linux/README-ptmalloc2.txt b/opal/mca/memory/linux/README-ptmalloc2.txt deleted file mode 100644 index fed6995134..0000000000 --- a/opal/mca/memory/linux/README-ptmalloc2.txt +++ /dev/null @@ -1,192 +0,0 @@ -ptmalloc2 - a multi-thread malloc implementation -================================================ - -Wolfram Gloger (wg@malloc.de) - -Nov 2004 - - -Introduction -============ - -This package is a modified version of Doug Lea's malloc-2.7.1pre -implementation (available seperately from ftp://g.oswego.edu/pub/misc) -that I adapted for multiple threads, while trying to avoid lock -contention as much as possible. Many thanks should go to Doug Lea -(dl@cs.oswego.edu) for the great original malloc implementation. - -As part of the GNU C library, the source files are available under the -GNU Library General Public License (see the comments in the files). -But as part of this stand-alone package, the code is also available -under the (probably less restrictive) conditions described in the file -'COPYRIGHT'. In any case, there is no warranty whatsoever for this -package. - -The current distribution should be available from: - -http://www.malloc.de/malloc/ptmalloc2.tar.gz - - -Compilation -=========== - -It should be possible to build ptmalloc2 on any UN*X-like system that -implements the sbrk(), mmap(), munmap() and mprotect() calls. If -mmap() is not available, it is only possible to produce a -non-threadsafe implementation. Since there are now several source -files, a library (libmalloc.a) is generated. See the Makefile for -examples of the compile-time options. - -Note that support for non-ANSI compilers is no longer a significant -goal. - -Several example targets are provided in the Makefile: - - o Posix threads (pthreads), compile with "make posix" - - o Posix threads with explicit initialization, compile with - "make posix-explicit" (known to be required on HPUX) - - o Posix threads without "tsd data hack" (see below), compile with - "make posix-with-tsd" - - o Solaris threads, compile with "make solaris" - - o SGI sproc() threads, compile with "make sproc" - - o no threads, compile with "make nothreads" - -For Linux: - - o make "linux-pthread" (almost the same as "make posix") - -Note that some compilers need special flags for multi-threaded code, -e.g. with Solaris cc with Posix threads, one should use: - -% make posix SYS_FLAGS='-mt' - -Some additional targets, ending in `-libc', are also provided in the -Makefile, to compare performance of the test programs to the case when -linking with the standard malloc implementation in libc. - -A potential problem remains: If any of the system-specific functions -for getting/setting thread-specific data or for locking a mutex call -one of the malloc-related functions internally, the implementation -cannot work at all due to infinite recursion. One example seems to be -Solaris 2.4. I would like to hear if this problem occurs on other -systems, and whether similar workarounds could be applied. - -For Posix threads, too, an optional hack like that has been integrated -(activated when defining USE_TSD_DATA_HACK) which depends on -`pthread_t' being convertible to an integral type (which is of course -not generally guaranteed). USE_TSD_DATA_HACK is now the default -because I haven't yet found a non-glibc pthreads system where this -hack is _not_ needed. - -*NEW* and _important_: In (currently) one place in the ptmalloc2 -source, a write memory barrier is needed, named -atomic_write_barrier(). This macro needs to be defined at the end of -malloc-machine.h. For gcc, a fallback in the form of a full memory -barrier is already defined, but you may need to add another definition -if you don't use gcc. - -Usage -===== - -Just link libmalloc.a into your application. - -Some wicked systems (e.g. HPUX apparently) won't let malloc call _any_ -thread-related functions before main(). On these systems, -USE_STARTER=2 must be defined during compilation (see "make -posix-explicit" above) and the global initialization function -ptmalloc_init() must be called explitly, preferably at the start of -main(). - -Otherwise, when using ptmalloc2, no special precautions are necessary. - -Link order is important -======================= - -On some systems, when overriding malloc and linking against shared -libraries, the link order becomes very important. E.g., when linking -C++ programs on Solaris, don't rely on libC being included by default, -but instead put `-lthread' behind `-lC' on the command line: - - CC ... libmalloc.a -lC -lthread - -This is because there are global constructors in libC that need -malloc/ptmalloc, which in turn needs to have the thread library to be -already initialized. - -Debugging hooks -=============== - -All calls to malloc(), realloc(), free() and memalign() are routed -through the global function pointers __malloc_hook, __realloc_hook, -__free_hook and __memalign_hook if they are not NULL (see the malloc.h -header file for declarations of these pointers). Therefore the malloc -implementation can be changed at runtime, if care is taken not to call -free() or realloc() on pointers obtained with a different -implementation than the one currently in effect. (The easiest way to -guarantee this is to set up the hooks before any malloc call, e.g. -with a function pointed to by the global variable -__malloc_initialize_hook). - -A useful application of the hooks is built-in into ptmalloc2: The -implementation is usually very unforgiving with respect to misuse, -such as free()ing a pointer twice or free()ing a pointer not obtained -with malloc() (these will typically crash the application -immediately). To debug in such situations, you can set the -environment variable `MALLOC_CHECK_' (note the trailing underscore). -Performance will suffer somewhat, but you will get more controlled -behaviour in the case of misuse. If MALLOC_CHECK_=0, wrong free()s -will be silently ignored, if MALLOC_CHECK_=1, diagnostics will be -printed on stderr, and if MALLOC_CHECK_=2, abort() will be called on -any error. - -You can now also tune other malloc parameters (normally adjused via -mallopt() calls from the application) with environment variables: - - MALLOC_TRIM_THRESHOLD_ for deciding to shrink the heap (in bytes) - - MALLOC_TOP_PAD_ how much extra memory to allocate on - each system call (in bytes) - - MALLOC_MMAP_THRESHOLD_ min. size for chunks allocated via - mmap() (in bytes) - - MALLOC_MMAP_MAX_ max. number of mmapped regions to use - -Tests -===== - -Two testing applications, t-test1 and t-test2, are included in this -source distribution. Both perform pseudo-random sequences of -allocations/frees, and can be given numeric arguments (all arguments -are optional): - -% t-test[12] - - n-total = total number of threads executed (default 10) - n-parallel = number of threads running in parallel (2) - n-allocs = number of malloc()'s / free()'s per thread (10000) - size-max = max. size requested with malloc() in bytes (10000) - bins = number of bins to maintain - -The first test `t-test1' maintains a completely seperate pool of -allocated bins for each thread, and should therefore show full -parallelism. On the other hand, `t-test2' creates only a single pool -of bins, and each thread randomly allocates/frees any bin. Some lock -contention is to be expected in this case, as the threads frequently -cross each others arena. - -Performance results from t-test1 should be quite repeatable, while the -behaviour of t-test2 depends on scheduling variations. - -Conclusion -========== - -I'm always interested in performance data and feedback, just send mail -to ptmalloc@malloc.de. - -Good luck! diff --git a/opal/mca/memory/linux/arena.c b/opal/mca/memory/linux/arena.c deleted file mode 100644 index 3f75a1c3af..0000000000 --- a/opal/mca/memory/linux/arena.c +++ /dev/null @@ -1,805 +0,0 @@ -/* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* $Id: arena.c,v 1.9 2004/11/05 14:42:23 wg Exp $ */ - -/* Compile-time constants. */ - -#define HEAP_MIN_SIZE (32*1024) -#ifndef HEAP_MAX_SIZE -#define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */ -#endif - -/* HEAP_MIN_SIZE and HEAP_MAX_SIZE limit the size of mmap()ed heaps - that are dynamically created for multi-threaded programs. The - maximum size must be a power of two, for fast determination of - which heap belongs to a chunk. It should be much larger than the - mmap threshold, so that requests with a size just below that - threshold can be fulfilled without creating too many heaps. */ - - -#ifndef THREAD_STATS -#define THREAD_STATS 0 -#endif - -/* If THREAD_STATS is non-zero, some statistics on mutex locking are - computed. */ - -/***************************************************************************/ - -#define top(ar_ptr) ((ar_ptr)->top) - -/* A heap is a single contiguous memory region holding (coalesceable) - malloc_chunks. It is allocated with mmap() and always starts at an - address aligned to HEAP_MAX_SIZE. Not used unless compiling with - USE_ARENAS. */ - -typedef struct _heap_info { - mstate ar_ptr; /* Arena for this heap. */ - struct _heap_info *prev; /* Previous heap. */ - size_t size; /* Current size in bytes. */ - size_t pad; /* Make sure the following data is properly aligned. */ -} heap_info; - -/* Thread specific data */ - -static tsd_key_t arena_key; -static mutex_t list_lock; - -#if THREAD_STATS -static int stat_n_heaps; -#define THREAD_STAT(x) x -#else -#define THREAD_STAT(x) do ; while(0) -#endif - -/* Mapped memory in non-main arenas (reliable only for NO_THREADS). */ -static unsigned long arena_mem; - -/* Already initialized? */ -int __malloc_initialized = -1; - -/**************************************************************************/ - -#if USE_ARENAS - -/* arena_get() acquires an arena and locks the corresponding mutex. - First, try the one last locked successfully by this thread. (This - is the common case and handled with a macro for speed.) Then, loop - once over the circularly linked list of arenas. If no arena is - readily available, create a new one. In this latter case, `size' - is just a hint as to how much memory will be required immediately - in the new arena. */ - -#define arena_get(ptr, size) do { \ - Void_t *vptr = NULL; \ - ptr = (mstate)tsd_getspecific(arena_key, vptr); \ - if(ptr && !mutex_trylock(&ptr->mutex)) { \ - THREAD_STAT(++(ptr->stat_lock_direct)); \ - } else \ - ptr = arena_get2(ptr, (size)); \ -} while(0) - -/* find the heap and corresponding arena for a given ptr */ - -#define heap_for_ptr(ptr) \ - ((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1))) -#define arena_for_chunk(ptr) \ - (chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena) - -#else /* !USE_ARENAS */ - -/* There is only one arena, main_arena. */ - -#if THREAD_STATS -#define arena_get(ar_ptr, sz) do { \ - ar_ptr = &main_arena; \ - if(!mutex_trylock(&ar_ptr->mutex)) \ - ++(ar_ptr->stat_lock_direct); \ - else { \ - (void)mutex_lock(&ar_ptr->mutex); \ - ++(ar_ptr->stat_lock_wait); \ - } \ -} while(0) -#else -#define arena_get(ar_ptr, sz) do { \ - ar_ptr = &main_arena; \ - (void)mutex_lock(&ar_ptr->mutex); \ -} while(0) -#endif -#define arena_for_chunk(ptr) (&main_arena) - -#endif /* USE_ARENAS */ - -/**************************************************************************/ - -#ifndef NO_THREADS - -/* atfork support. */ - -static __malloc_ptr_t (*save_malloc_hook) __MALLOC_P ((size_t __size, - __const __malloc_ptr_t)); -# if !defined _LIBC || !defined USE_TLS || (defined SHARED && !USE___THREAD) -static __malloc_ptr_t (*save_memalign_hook) __MALLOC_P ((size_t align, - size_t __size, - __const __malloc_ptr_t)); -# endif -static void (*save_free_hook) __MALLOC_P ((__malloc_ptr_t __ptr, - __const __malloc_ptr_t)); -static Void_t* save_arena; - -/* Magic value for the thread-specific arena pointer when - malloc_atfork() is in use. */ - -#define ATFORK_ARENA_PTR ((Void_t*)-1) - -/* The following hooks are used while the `atfork' handling mechanism - is active. */ - -static Void_t* -malloc_atfork(size_t sz, const Void_t *caller) -{ - Void_t *vptr = NULL; - Void_t *victim; - - tsd_getspecific(arena_key, vptr); - if(vptr == ATFORK_ARENA_PTR) { - /* We are the only thread that may allocate at all. */ - if(save_malloc_hook != malloc_check) { - return _int_malloc(&main_arena, sz); - } else { - if(top_check()<0) - return 0; - victim = _int_malloc(&main_arena, sz+1); - return mem2mem_check(victim, sz); - } - } else { - /* Suspend the thread until the `atfork' handlers have completed. - By that time, the hooks will have been reset as well, so that - mALLOc() can be used again. */ - (void)mutex_lock(&list_lock); - (void)mutex_unlock(&list_lock); - return public_mALLOc(sz); - } -} - -static void -free_atfork(Void_t* mem, const Void_t *caller) -{ - Void_t *vptr = NULL; - mstate ar_ptr; - mchunkptr p; /* chunk corresponding to mem */ - - if (mem == 0) /* free(0) has no effect */ - return; - - p = mem2chunk(mem); /* do not bother to replicate free_check here */ - -#if HAVE_MMAP - if (chunk_is_mmapped(p)) /* release mmapped memory. */ - { - munmap_chunk(p); - return; - } -#endif - - ar_ptr = arena_for_chunk(p); - tsd_getspecific(arena_key, vptr); - if(vptr != ATFORK_ARENA_PTR) - (void)mutex_lock(&ar_ptr->mutex); - _int_free(ar_ptr, mem); - if(vptr != ATFORK_ARENA_PTR) - (void)mutex_unlock(&ar_ptr->mutex); -} - -/* The following two functions are registered via thread_atfork() to - make sure that the mutexes remain in a consistent state in the - fork()ed version of a thread. Also adapt the malloc and free hooks - temporarily, because the `atfork' handler mechanism may use - malloc/free internally (e.g. in LinuxThreads). */ - -static void -ptmalloc_lock_all __MALLOC_P((void)) -{ - mstate ar_ptr; - - if(__malloc_initialized < 1) - return; - (void)mutex_lock(&list_lock); - for(ar_ptr = &main_arena;;) { - (void)mutex_lock(&ar_ptr->mutex); - ar_ptr = ar_ptr->next; - if(ar_ptr == &main_arena) break; - } - save_malloc_hook = __malloc_hook; - save_free_hook = __free_hook; - __malloc_hook = malloc_atfork; - __free_hook = free_atfork; - /* Only the current thread may perform malloc/free calls now. */ - tsd_getspecific(arena_key, save_arena); - tsd_setspecific(arena_key, ATFORK_ARENA_PTR); -} - -static void -ptmalloc_unlock_all __MALLOC_P((void)) -{ - mstate ar_ptr; - - if(__malloc_initialized < 1) - return; - tsd_setspecific(arena_key, save_arena); - __malloc_hook = save_malloc_hook; - __free_hook = save_free_hook; - for(ar_ptr = &main_arena;;) { - (void)mutex_unlock(&ar_ptr->mutex); - ar_ptr = ar_ptr->next; - if(ar_ptr == &main_arena) break; - } - (void)mutex_unlock(&list_lock); -} - -#ifdef __linux__ - -/* In LinuxThreads, unlocking a mutex in the child process after a - fork() is currently unsafe, whereas re-initializing it is safe and - does not leak resources. Therefore, a special atfork handler is - installed for the child. */ - -static void -ptmalloc_unlock_all2 __MALLOC_P((void)) -{ - mstate ar_ptr; - - if(__malloc_initialized < 1) - return; -#if defined _LIBC || defined MALLOC_HOOKS - tsd_setspecific(arena_key, save_arena); - __malloc_hook = save_malloc_hook; - __free_hook = save_free_hook; -#endif - for(ar_ptr = &main_arena;;) { - (void)mutex_init(&ar_ptr->mutex); - ar_ptr = ar_ptr->next; - if(ar_ptr == &main_arena) break; - } - (void)mutex_init(&list_lock); -} - -#else - -#define ptmalloc_unlock_all2 ptmalloc_unlock_all - -#endif - -#endif /* !defined NO_THREADS */ - -/* Initialization routine. */ -#ifdef _LIBC -#include -extern char **_environ; - -static char * -internal_function -next_env_entry (char ***position) -{ - char **current = *position; - char *result = NULL; - - while (*current != NULL) - { - if (__builtin_expect ((*current)[0] == 'M', 0) - && (*current)[1] == 'A' - && (*current)[2] == 'L' - && (*current)[3] == 'L' - && (*current)[4] == 'O' - && (*current)[5] == 'C' - && (*current)[6] == '_') - { - result = &(*current)[7]; - - /* Save current position for next visit. */ - *position = ++current; - - break; - } - - ++current; - } - - return result; -} -#endif /* _LIBC */ - -/* Set up basic state so that _int_malloc et al can work. */ -static void -ptmalloc_init_minimal __MALLOC_P((void)) -{ -#if DEFAULT_TOP_PAD != 0 - mp_.top_pad = DEFAULT_TOP_PAD; -#endif - mp_.n_mmaps_max = DEFAULT_MMAP_MAX; - mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD; - mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD; - mp_.pagesize = malloc_getpagesize; -} - - -#if !(USE_STARTER & 2) -static -#endif -void -ptmalloc_init __MALLOC_P((void)) -{ -#if __STD_C - const char* s; -#else - char* s; -#endif - int secure = 0; - - if(__malloc_initialized >= 0) return; - __malloc_initialized = 0; - - if (mp_.pagesize == 0) - ptmalloc_init_minimal(); - -#ifndef NO_THREADS -# if USE_STARTER & 1 - /* With some threads implementations, creating thread-specific data - or initializing a mutex may call malloc() itself. Provide a - simple starter version (realloc() won't work). */ - save_malloc_hook = __malloc_hook; - save_memalign_hook = __memalign_hook; - save_free_hook = __free_hook; - __malloc_hook = malloc_starter; - __memalign_hook = memalign_starter; - __free_hook = free_starter; -# ifdef _LIBC - /* Initialize the pthreads interface. */ - if (__pthread_initialize != NULL) - __pthread_initialize(); -# endif /* !defined _LIBC */ -# endif /* USE_STARTER & 1 */ -#endif /* !defined NO_THREADS */ - mutex_init(&main_arena.mutex); - main_arena.next = &main_arena; - - mutex_init(&list_lock); - tsd_key_create(&arena_key, NULL); - tsd_setspecific(arena_key, (Void_t *)&main_arena); - thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2); -#ifndef NO_THREADS -# if USE_STARTER & 1 - __malloc_hook = save_malloc_hook; - __memalign_hook = save_memalign_hook; - __free_hook = save_free_hook; -# endif -# if USE_STARTER & 2 - __malloc_hook = 0; - __memalign_hook = 0; - __free_hook = 0; -# endif -#endif -#ifdef _LIBC - secure = __libc_enable_secure; - s = NULL; - if (__builtin_expect (_environ != NULL, 1)) - { - char **runp = _environ; - char *envline; - - while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL, - 0)) - { - size_t len = strcspn (envline, "="); - - if (envline[len] != '=') - /* This is a "MALLOC_" variable at the end of the string - without a '=' character. Ignore it since otherwise we - will access invalid memory below. */ - continue; - - switch (len) - { - case 6: - if (memcmp (envline, "CHECK_", 6) == 0) - s = &envline[7]; - break; - case 8: - if (! secure && memcmp (envline, "TOP_PAD_", 8) == 0) - mALLOPt(M_TOP_PAD, atoi(&envline[9])); - break; - case 9: - if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0) - mALLOPt(M_MMAP_MAX, atoi(&envline[10])); - break; - case 15: - if (! secure) - { - if (memcmp (envline, "TRIM_THRESHOLD_", 15) == 0) - mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16])); - else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0) - mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16])); - } - break; - default: - break; - } - } - } -#else - if (! secure) - { - if((s = getenv("MALLOC_TRIM_THRESHOLD_"))) - mALLOPt(M_TRIM_THRESHOLD, atoi(s)); - if((s = getenv("MALLOC_TOP_PAD_"))) - mALLOPt(M_TOP_PAD, atoi(s)); - if((s = getenv("MALLOC_MMAP_THRESHOLD_"))) - mALLOPt(M_MMAP_THRESHOLD, atoi(s)); - if((s = getenv("MALLOC_MMAP_MAX_"))) - mALLOPt(M_MMAP_MAX, atoi(s)); - } - s = getenv("MALLOC_CHECK_"); -#endif - if(s) { - if(s[0]) mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0')); - __malloc_check_init(); - } -#if 0 - /* OMPI Change: Don't call the initialize hook; it was us. */ - if(__malloc_initialize_hook != NULL) - (*__malloc_initialize_hook)(); -#endif - - __malloc_initialized = 1; -} - -/* There are platforms (e.g. Hurd) with a link-time hook mechanism. */ -#ifdef thread_atfork_static -thread_atfork_static(ptmalloc_lock_all, ptmalloc_unlock_all, \ - ptmalloc_unlock_all2) -#endif - - - -/* Managing heaps and arenas (for concurrent threads) */ - -#if USE_ARENAS - -#if MALLOC_DEBUG > 1 - -/* Print the complete contents of a single heap to stderr. */ - -static void -#if __STD_C -dump_heap(heap_info *heap) -#else -dump_heap(heap) heap_info *heap; -#endif -{ - char *ptr; - mchunkptr p; - - fprintf(stderr, "Heap %p, size %10lx:\n", heap, (long)heap->size); - ptr = (heap->ar_ptr != (mstate)(heap+1)) ? - (char*)(heap + 1) : (char*)(heap + 1) + sizeof(struct malloc_state); - p = (mchunkptr)(((unsigned long)ptr + MALLOC_ALIGN_MASK) & - ~MALLOC_ALIGN_MASK); - for(;;) { - fprintf(stderr, "chunk %p size %10lx", p, (long)p->size); - if(p == top(heap->ar_ptr)) { - fprintf(stderr, " (top)\n"); - break; - } else if(p->size == (0|PREV_INUSE)) { - fprintf(stderr, " (fence)\n"); - break; - } - fprintf(stderr, "\n"); - p = next_chunk(p); - } -} - -#endif /* MALLOC_DEBUG > 1 */ - -/* Create a new heap. size is automatically rounded up to a multiple - of the page size. */ - -static heap_info * -internal_function -#if __STD_C -new_heap(size_t size, size_t top_pad) -#else -new_heap(size, top_pad) size_t size, top_pad; -#endif -{ - size_t page_mask = malloc_getpagesize - 1; - char *p1, *p2; - unsigned long ul; - heap_info *h; - - if(size+top_pad < HEAP_MIN_SIZE) - size = HEAP_MIN_SIZE; - else if(size+top_pad <= HEAP_MAX_SIZE) - size += top_pad; - else if(size > HEAP_MAX_SIZE) - return 0; - else - size = HEAP_MAX_SIZE; - size = (size + page_mask) & ~page_mask; - - /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed. - No swap space needs to be reserved for the following large - mapping (on Linux, this is the case for all non-writable mappings - anyway). */ - p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE); - if(p1 != MAP_FAILED) { - p2 = (char *)(((unsigned long)p1 + (HEAP_MAX_SIZE-1)) & ~(HEAP_MAX_SIZE-1)); - ul = p2 - p1; - munmap(p1, ul); - munmap(p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul); - } else { - /* Try to take the chance that an allocation of only HEAP_MAX_SIZE - is already aligned. */ - p2 = (char *)MMAP(0, HEAP_MAX_SIZE, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE); - if(p2 == MAP_FAILED) - return 0; - if((unsigned long)p2 & (HEAP_MAX_SIZE-1)) { - munmap(p2, HEAP_MAX_SIZE); - return 0; - } - } - if(mprotect(p2, size, PROT_READ|PROT_WRITE) != 0) { - munmap(p2, HEAP_MAX_SIZE); - return 0; - } - h = (heap_info *)p2; - h->size = size; - THREAD_STAT(stat_n_heaps++); - return h; -} - -/* Grow or shrink a heap. size is automatically rounded up to a - multiple of the page size if it is positive. */ - -static int -#if __STD_C -grow_heap(heap_info *h, long diff) -#else -grow_heap(h, diff) heap_info *h; long diff; -#endif -{ - size_t page_mask = malloc_getpagesize - 1; - long new_size; - - if(diff >= 0) { - diff = (diff + page_mask) & ~page_mask; - new_size = (long)h->size + diff; - if(new_size > HEAP_MAX_SIZE) - return -1; - if(mprotect((char *)h + h->size, diff, PROT_READ|PROT_WRITE) != 0) - return -2; - } else { - new_size = (long)h->size + diff; - if(new_size < (long)sizeof(*h)) - return -1; - - if(mprotect((char *)h + new_size, -diff, PROT_NONE) != 0) - return -2; - /*fprintf(stderr, "shrink %p %08lx\n", h, new_size);*/ - } - h->size = new_size; - return 0; -} - -/* Delete a heap. */ - -#define delete_heap(heap) munmap((char*)(heap), HEAP_MAX_SIZE) - -static int -internal_function -#if __STD_C -heap_trim(heap_info *heap, size_t pad) -#else -heap_trim(heap, pad) heap_info *heap; size_t pad; -#endif -{ - mstate ar_ptr = heap->ar_ptr; - unsigned long pagesz = mp_.pagesize; - mchunkptr top_chunk = top(ar_ptr), p, bck, fwd; - heap_info *prev_heap; - long new_size, top_size, extra; - - /* Can this heap go away completely? */ - while(top_chunk == chunk_at_offset(heap, sizeof(*heap))) { - prev_heap = heap->prev; - p = chunk_at_offset(prev_heap, prev_heap->size - (MINSIZE-2*SIZE_SZ)); - assert(p->size == (0|PREV_INUSE)); /* must be fencepost */ - p = prev_chunk(p); - new_size = chunksize(p) + (MINSIZE-2*SIZE_SZ); - assert(new_size>0 && new_size<(long)(2*MINSIZE)); - if(!prev_inuse(p)) - new_size += p->prev_size; - assert(new_size>0 && new_sizesize) < pad + MINSIZE + pagesz) - break; - ar_ptr->system_mem -= heap->size; - arena_mem -= heap->size; - delete_heap(heap); - heap = prev_heap; - if(!prev_inuse(p)) { /* consolidate backward */ - p = prev_chunk(p); - unlink(p, bck, fwd); - } - assert(((unsigned long)((char*)p + new_size) & (pagesz-1)) == 0); - assert( ((char*)p + new_size) == ((char*)heap + heap->size) ); - top(ar_ptr) = top_chunk = p; - set_head(top_chunk, new_size | PREV_INUSE); - /*check_chunk(ar_ptr, top_chunk);*/ - } - top_size = chunksize(top_chunk); - extra = ((top_size - pad - MINSIZE + (pagesz-1))/pagesz - 1) * pagesz; - if(extra < (long)pagesz) - return 0; - /* Try to shrink. */ - if(grow_heap(heap, -extra) != 0) - return 0; - ar_ptr->system_mem -= extra; - arena_mem -= extra; - - /* Success. Adjust top accordingly. */ - set_head(top_chunk, (top_size - extra) | PREV_INUSE); - /*check_chunk(ar_ptr, top_chunk);*/ - return 1; -} - -static mstate -internal_function -#if __STD_C -arena_get2(mstate a_tsd, size_t size) -#else -arena_get2(a_tsd, size) mstate a_tsd; size_t size; -#endif -{ - mstate a; - int err; - - if(!a_tsd) - a = a_tsd = &main_arena; - else { - a = a_tsd->next; - if(!a) { - /* This can only happen while initializing the new arena. */ - (void)mutex_lock(&main_arena.mutex); - THREAD_STAT(++(main_arena.stat_lock_wait)); - return &main_arena; - } - } - - /* Check the global, circularly linked list for available arenas. */ - repeat: - do { - if(!mutex_trylock(&a->mutex)) { - THREAD_STAT(++(a->stat_lock_loop)); - tsd_setspecific(arena_key, (Void_t *)a); - return a; - } - a = a->next; - } while(a != a_tsd); - - /* If not even the list_lock can be obtained, try again. This can - happen during `atfork', or for example on systems where thread - creation makes it temporarily impossible to obtain _any_ - locks. */ - if(mutex_trylock(&list_lock)) { - a = a_tsd; - goto repeat; - } - (void)mutex_unlock(&list_lock); - - /* Nothing immediately available, so generate a new arena. */ - a = _int_new_arena(size); - if(!a) - return 0; - - tsd_setspecific(arena_key, (Void_t *)a); - mutex_init(&a->mutex); - err = mutex_lock(&a->mutex); /* remember result */ - - /* Add the new arena to the global list. */ - (void)mutex_lock(&list_lock); - a->next = main_arena.next; -/* OMPI: use our barriers - atomic_write_barrier (); -*/ - opal_atomic_wmb(); - main_arena.next = a; - (void)mutex_unlock(&list_lock); - - if(err) /* locking failed; keep arena for further attempts later */ - return 0; - - THREAD_STAT(++(a->stat_lock_loop)); - return a; -} - -/* Create a new arena with initial size "size". */ - -mstate -_int_new_arena(size_t size) -{ - mstate a; - heap_info *h; - char *ptr; - unsigned long misalign; - - h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT), - mp_.top_pad); - if(!h) { - /* Maybe size is too large to fit in a single heap. So, just try - to create a minimally-sized arena and let _int_malloc() attempt - to deal with the large request via mmap_chunk(). */ - h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad); - if(!h) - return 0; - } - a = h->ar_ptr = (mstate)(h+1); - malloc_init_state(a); - /*a->next = NULL;*/ - a->system_mem = a->max_system_mem = h->size; - arena_mem += h->size; -#ifdef NO_THREADS - if((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) > - mp_.max_total_mem) - mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem; -#endif - - /* Set up the top chunk, with proper alignment. */ - ptr = (char *)(a + 1); - misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK; - if (misalign > 0) - ptr += MALLOC_ALIGNMENT - misalign; - top(a) = (mchunkptr)ptr; - set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); - - return a; -} - -/* Obtain the arena number n. Needed in malloc_stats. */ - -mstate -_int_get_arena (int n) -{ - mstate a = &main_arena; - - while (n-- != 0) { - a = a->next; - if (a == &main_arena) - return 0; - } - return a; -} - -#endif /* USE_ARENAS */ - -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/linux/configure.m4 b/opal/mca/memory/linux/configure.m4 deleted file mode 100644 index e44786e1eb..0000000000 --- a/opal/mca/memory/linux/configure.m4 +++ /dev/null @@ -1,236 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -AC_DEFUN([MCA_opal_memory_linux_PRIORITY], [40]) - -AC_DEFUN([MCA_opal_memory_linux_COMPILE_MODE], [ - AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) - $4="static" - AC_MSG_RESULT([$$4]) -]) - - -# MCA_memory_linux_CONFIG(action-if-can-compile, -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ - AC_CONFIG_FILES([opal/mca/memory/linux/Makefile]) - - OPAL_VAR_SCOPE_PUSH([memory_linux_ptmalloc2_happy memory_linux_ummu_happy memory_linux_requested icc_major_ver icc_minor_ver memory_linux_mmap memory_linux_munmap memory_linux_LIBS_SAVE]) - - # Only allow this component to build on Linux-based systems - - AC_MSG_CHECKING([operating system]) - case $host in - *linux*) - AC_MSG_RESULT([$host -- supported]) - memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes - ;; - *) - AC_MSG_RESULT([$host -- unsupported]) - memory_linux_ptmalloc2_happy=no - memory_linux_ummu_happy=no - ;; - esac - - AS_IF([test "$with_memory_manager" = "linux"], - [memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes - memory_linux_requested=1], - [memory_linux_requested=0 - AS_IF([test -z "$with_memory_manager" || test "$with_memory_manager" = "yes"], - [memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes], - [memory_linux_ptmalloc2_happy=no - memory_linux_ummu_happy=no])]) - - - ###################################################################### - # if memory hook available - ###################################################################### - memory_hook_found=1 - AS_IF([test "$memory_hook_found" -eq 1], - [memory_hook_found=0 AC_CHECK_HEADER([malloc.h], - [AC_CHECK_FUNC([__malloc_initialize_hook], - [AC_CHECK_FUNC([__malloc_hook], - [AC_CHECK_FUNC([__realloc_hook], - [AC_CHECK_FUNC([__free_hook], - [memory_hook_found=1])])])])])]) - AC_MSG_CHECKING([whether the system can use malloc hooks]) - AS_IF([test "$memory_hook_found" = "0"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes])]) - AC_DEFINE_UNQUOTED([MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT], [$memory_hook_found], - [Whether the system has Memory Allocation Hooks]) - - AC_ARG_ENABLE(memory-linux-malloc-alignment, - AC_HELP_STRING([--enable-memory-linux-malloc-alignment], [Enable support for allocated memory alignment. Default: enabled if supported, disabled otherwise.])) - - malloc_align_enabled=0 - AS_IF([test "$enable_memory_linux_malloc_alignment" != "no"], - [malloc_align_enabled=$memory_hook_found]) - - AS_IF([test "$enable_memory_linux_malloc_alignment" = "yes" && test "$malloc_align_enabled" = "0"], - [AC_MSG_ERROR([memory linux malloc alignment is requested but __malloc_hook is not available])]) - AC_MSG_CHECKING([whether the memory linux will use malloc alignment]) - AS_IF([test "$malloc_align_enabled" = "0"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes])]) - - AC_DEFINE_UNQUOTED(MEMORY_LINUX_MALLOC_ALIGN_ENABLED, [$malloc_align_enabled], - [Whether the memory linux malloc alignment is enabled]) - - ###################################################################### - # ptmalloc2 - ###################################################################### - - # Per ticket #227, Intel 9.0 v20051201 on ia64 with optimization - # of -O2 or higher will bork linux in strange in mysterious ways. - # Doh! So if the compiler vendor is intel and we're on an ia64 - # box, run "icc --version" and snarf the version string. If it's - # 9.0 and the version is <= 20051201, then disable ptmalloc2. - # Executive decision: ignore optimization levels (even though -O1 - # and -O0 seem to work). The upgrade to 9.1 is free, so that's a - # better path than trying to make a much more complicated test - # here. - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [case $host in - ia64-*) - AS_IF([test "$opal_c_vendor" = "intel"], - [# check for v9.0 <= 20051201 - icc_major_ver="`$CC --version | head -n 1 | awk '{ print [$]3 }'`" - icc_minor_ver="`$CC --version | head -n 1 | awk '{ print [$]4 }'`" - AS_IF([test "$icc_major_ver" = "9.0" && test "`expr $icc_minor_ver \<= 20051201`" = "1"], - [memory_linux_ptmalloc2_happy=no - AC_MSG_WARN([*** Detected Intel C compiler v9.0 <= 20051201 on ia64]) - AC_MSG_WARN([*** This compiler/platform combination has known problems with ptmalloc2]) - AC_MSG_WARN([*** Disabling ptmalloc2])])]) - ;; - esac]) - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [# check for malloc.h - AC_CHECK_HEADER([malloc.h], - [memory_linux_ptmalloc2_happy=yes], - [memory_linux_ptmalloc2_happy=no])]) - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [# check for init hook symbol - AC_CHECK_DECL([__malloc_initialize_hook], - [memory_linux_ptmalloc2_happy=yes], - [memory_linux_ptmalloc2_happy=no], - [AC_INCLUDES_DEFAULT - #include ])]) - - # - # See if we have sbrk prototyped - # - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [AC_CHECK_DECLS([sbrk])]) - - # - # Figure out how we're going to call mmap/munmap for real - # - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [memory_linux_mmap=0 - memory_linux_munmap=1 - - # it's nearly impossible to call mmap from syscall(), so - # only go this route if we can't get at munmap any other - # way. - AC_CHECK_HEADER([syscall.h], - [AC_CHECK_FUNCS([syscall], [], [memory_linux_munmap=0])]) - - # Always look for __munmap and __mmap - AC_CHECK_FUNCS([__munmap], [memory_linux_mmap=1]) - AC_CHECK_FUNCS([__mmap]) - - # only allow dlsym (and therefore add -ldl) if we - # really need to - AS_IF([test "$memory_linux_mmap" = "0"], - [memory_linux_LIBS_SAVE="$LIBS" - AC_CHECK_LIB([dl], - [dlsym], - [LIBS="$LIBS -ldl" - memory_linux_LIBS="-ldl" - memory_linux_mmap=1]) - AC_CHECK_FUNCS([dlsym]) - LIBS="$memory_linux_LIBS_SAVE"]) - - AS_IF([test "$memory_linux_mmap" = "0" && test "$memory_linux_munmap" = "0"], - [memory_linux_ptmalloc2_happy=no])]) - - # If all is good, save the extra libs for the wrapper - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes], - [value=1], - [value=0]) - AC_DEFINE_UNQUOTED([MEMORY_LINUX_PTMALLOC2], [$value], - [Whether ptmalloc2 is supported on this system or not]) - AM_CONDITIONAL([MEMORY_LINUX_PTMALLOC2], - [test "$memory_linux_ptmalloc2_happy" = yes]) - - ###################################################################### - # ummunotify - ###################################################################### - - # Check for the relevant header - AS_IF([test "$memory_linux_ummu_happy" = yes], - [# check for linux/ummunotify.h - AC_CHECK_HEADER([linux/ummunotify.h], - [memory_linux_ummu_happy=yes], - [memory_linux_ummu_happy=no])]) - - # has the Linux declaration for ioctl - AC_CHECK_HEADERS([stropts.h]) - - # If all is good, set the header file that we want the rest of the - # code base to use - AS_IF([test "$memory_linux_ummu_happy" = yes], - [memory_base_include="linux/public.h" - value=1], - [value=0]) - AC_DEFINE_UNQUOTED([MEMORY_LINUX_UMMUNOTIFY], [$value], - [Whether ummunotify is supported on this system or not]) - AM_CONDITIONAL([MEMORY_LINUX_UMMUNOTIFY], - [test "$memory_linux_ummu_happy" = yes]) - - ###################################################################### - # post processing - ###################################################################### - - AS_IF([test "$memory_malloc_hooks_requested" = 1 && \ - test "$memory_linux_ptmalloc2_happy" = no && \ - test "$memory_linux_ummu_happy" = no], - [AC_MSG_ERROR([linux memory management requested but neither ptmalloc2 nor ummunotify are available. Aborting.])]) - AC_SUBST([memory_linux_LIBS]) - - AS_IF([test "$memory_linux_ptmalloc2_happy" = yes || \ - test "$memory_linux_ummu_happy" = yes], - [memory_base_found=1 - $1], - [memory_base_found=0 - memory_base_include= - $2]) - - OPAL_VAR_SCOPE_POP -]) diff --git a/opal/mca/memory/linux/help-opal-memory-linux.txt b/opal/mca/memory/linux/help-opal-memory-linux.txt deleted file mode 100644 index b3327c7356..0000000000 --- a/opal/mca/memory/linux/help-opal-memory-linux.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI's memory/linux component. -# -[ummunotify eaccess] -Open MPI was unable to open the UMMU notification device. This is -likely a permissions problem on the device itself. UMMU notification -support is therefore disabled in this process; an alternate memory -hook manager *may* be used instead (if available). - - Local host: %s - UMMU device: %s -# -[ummunotify open error] -Open MPI was unable to open the UMMU notification device. UMMU -notification support is therefore disabled in this process; an -alternate memory hook manager *may* be used instead (if available). - - Local host: %s - UMMU device: %s - Error: %s (%d) diff --git a/opal/mca/memory/linux/hooks.c b/opal/mca/memory/linux/hooks.c deleted file mode 100644 index db608869b2..0000000000 --- a/opal/mca/memory/linux/hooks.c +++ /dev/null @@ -1,899 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * - * Additional copyrights may follow. - */ -/* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* $Id: hooks.c,v 1.12 2004/11/05 14:42:32 wg Exp $ */ - -#include "opal_config.h" - -#include "opal/mca/mca.h" -#include "opal/mca/memory/memory.h" -#include "opal/constants.h" -#include "opal/memoryhooks/memory.h" - -#include "opal/mca/memory/linux/memory_linux.h" - -#ifndef DEFAULT_CHECK_ACTION -#define DEFAULT_CHECK_ACTION 1 -#endif - -#ifdef HAVE_SYS_STAT_H -#include /* for stat */ -#endif /* HAVE_SYS_STAT_H */ - -/* Defined in memory_linux_component.c */ -extern bool opal_memory_linux_disable; - -/* What to do if the standard debugging hooks are in place and a - corrupt pointer is detected: do nothing (0), print an error message - (1), or call abort() (2). */ - -/* Hooks for debugging versions. The initial hooks just call the - initialization routine, then do the normal work. */ - -#if !(USE_STARTER & 2) - -/* OMPI change: these aren't used (avoid a compiler warning by if - 0'ing them out */ -#if 0 -static Void_t* -#if __STD_C -malloc_hook_ini(size_t sz, const __malloc_ptr_t caller) -#else -malloc_hook_ini(sz, caller) - size_t sz; const __malloc_ptr_t caller; -#endif -{ - __malloc_hook = NULL; - ptmalloc_init(); - return public_mALLOc(sz); -} -#endif - -/* OMPI change: these aren't used (avoid a compiler warning by if - 0'ing them out */ -#if 0 -static Void_t* -#if __STD_C -realloc_hook_ini(Void_t* ptr, size_t sz, const __malloc_ptr_t caller) -#else -realloc_hook_ini(ptr, sz, caller) - Void_t* ptr; size_t sz; const __malloc_ptr_t caller; -#endif -{ - __malloc_hook = NULL; - __realloc_hook = NULL; - ptmalloc_init(); - return public_rEALLOc(ptr, sz); -} -#endif - -/* OMPI change: these aren't used (avoid a compiler warning by if - 0'ing them out */ -#if 0 -static Void_t* -#if __STD_C -memalign_hook_ini(size_t alignment, size_t sz, const __malloc_ptr_t caller) -#else -memalign_hook_ini(alignment, sz, caller) - size_t alignment; size_t sz; const __malloc_ptr_t caller; -#endif -{ - __memalign_hook = NULL; - ptmalloc_init(); - return public_mEMALIGn(alignment, sz); -} -#endif - -#endif /* !(USE_STARTER & 2) */ - -static int check_action = DEFAULT_CHECK_ACTION; - -/* Whether we are using malloc checking. */ -static int using_malloc_checking; - -/* A flag that is set by malloc_set_state, to signal that malloc checking - must not be enabled on the request from the user (via the MALLOC_CHECK_ - environment variable). It is reset by __malloc_check_init to tell - malloc_set_state that the user has requested malloc checking. - - The purpose of this flag is to make sure that malloc checking is not - enabled when the heap to be restored was constructed without malloc - checking, and thus does not contain the required magic bytes. - Otherwise the heap would be corrupted by calls to free and realloc. If - it turns out that the heap was created with malloc checking and the - user has requested it malloc_set_state just calls __malloc_check_init - again to enable it. On the other hand, reusing such a heap without - further malloc checking is safe. */ -static int disallow_malloc_check; - -/* Activate a standard set of debugging hooks. */ -void -__malloc_check_init() -{ - if (disallow_malloc_check) { - disallow_malloc_check = 0; - return; - } - using_malloc_checking = 1; - __malloc_hook = malloc_check; - __free_hook = free_check; - __realloc_hook = realloc_check; - __memalign_hook = memalign_check; - if(check_action & 1) - fprintf(stderr, "malloc: using debugging hooks\n"); -} - -/* A simple, standard set of debugging hooks. Overhead is `only' one - byte per chunk; still this will catch most cases of double frees or - overruns. The goal here is to avoid obscure crashes due to invalid - usage, unlike in the MALLOC_DEBUG code. */ - -#define MAGICBYTE(p) ( ( ((size_t)p >> 3) ^ ((size_t)p >> 11)) & 0xFF ) - -/* Instrument a chunk with overrun detector byte(s) and convert it - into a user pointer with requested size sz. */ - -static Void_t* -internal_function -#if __STD_C -mem2mem_check(Void_t *ptr, size_t sz) -#else -mem2mem_check(ptr, sz) Void_t *ptr; size_t sz; -#endif -{ - mchunkptr p; - unsigned char* m_ptr = (unsigned char*)BOUNDED_N(ptr, sz); - size_t i; - - if (!ptr) - return ptr; - p = mem2chunk(ptr); - for(i = chunksize(p) - (chunk_is_mmapped(p) ? 2*SIZE_SZ+1 : SIZE_SZ+1); - i > sz; - i -= 0xFF) { - if(i-sz < 0x100) { - m_ptr[i] = (unsigned char)(i-sz); - break; - } - m_ptr[i] = 0xFF; - } - m_ptr[sz] = MAGICBYTE(p); - return (Void_t*)m_ptr; -} - -/* Convert a pointer to be free()d or realloc()ed to a valid chunk - pointer. If the provided pointer is not valid, return NULL. */ - -static mchunkptr -internal_function -#if __STD_C -mem2chunk_check(Void_t* mem) -#else -mem2chunk_check(mem) Void_t* mem; -#endif -{ - mchunkptr p; - INTERNAL_SIZE_T sz, c; - unsigned char magic; - - if(!aligned_OK(mem)) return NULL; - p = mem2chunk(mem); - if (!chunk_is_mmapped(p)) { - /* Must be a chunk in conventional heap memory. */ - int contig = contiguous(&main_arena); - sz = chunksize(p); - if((contig && - ((char*)p=(mp_.sbrk_base+main_arena.system_mem) )) || - szprev_size&MALLOC_ALIGN_MASK || - (contig && (char*)prev_chunk(p)size & PREV_INUSE) || - ( (((unsigned long)p - p->prev_size) & page_mask) != 0 ) || - ( (sz = chunksize(p)), ((p->prev_size + sz) & page_mask) != 0 ) ) - return NULL; - magic = MAGICBYTE(p); - for(sz -= 1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) { - if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL; - } - ((unsigned char*)p)[sz] ^= 0xFF; - } - return p; -} - -/* Check for corruption of the top chunk, and try to recover if - necessary. */ - -static int -internal_function -#if __STD_C -top_check(void) -#else -top_check() -#endif -{ - mchunkptr t = top(&main_arena); - char* brk, * new_brk; - INTERNAL_SIZE_T front_misalign, sbrk_size; - unsigned long pagesz = malloc_getpagesize; - - if (t == initial_top(&main_arena) || - (!chunk_is_mmapped(t) && - chunksize(t)>=MINSIZE && - prev_inuse(t) && - (!contiguous(&main_arena) || - (char*)t + chunksize(t) == mp_.sbrk_base + main_arena.system_mem))) - return 0; - - if(check_action & 1) - fprintf(stderr, "malloc: top chunk is corrupt\n"); - if(check_action & 2) - abort(); - - /* Try to set up a new top chunk. */ - brk = MORECORE(0); - front_misalign = (unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) - front_misalign = MALLOC_ALIGNMENT - front_misalign; - sbrk_size = front_misalign + mp_.top_pad + MINSIZE; - sbrk_size += pagesz - ((unsigned long)(brk + sbrk_size) & (pagesz - 1)); - new_brk = (char*)(MORECORE (sbrk_size)); - if (new_brk == (char*)(MORECORE_FAILURE)) return -1; - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - main_arena.system_mem = (new_brk - mp_.sbrk_base) + sbrk_size; - - top(&main_arena) = (mchunkptr)(brk + front_misalign); - set_head(top(&main_arena), (sbrk_size - front_misalign) | PREV_INUSE); - - return 0; -} - -static Void_t* -#if __STD_C -malloc_check(size_t sz, const Void_t *caller) -#else -malloc_check(sz, caller) size_t sz; const Void_t *caller; -#endif -{ - Void_t *victim; - - (void)mutex_lock(&main_arena.mutex); - victim = (top_check() >= 0) ? _int_malloc(&main_arena, sz+1) : NULL; - (void)mutex_unlock(&main_arena.mutex); - return mem2mem_check(victim, sz); -} - -static void -#if __STD_C -free_check(Void_t* mem, const Void_t *caller) -#else -free_check(mem, caller) Void_t* mem; const Void_t *caller; -#endif -{ - mchunkptr p; - - if(!mem) return; - (void)mutex_lock(&main_arena.mutex); - p = mem2chunk_check(mem); - if(!p) { - (void)mutex_unlock(&main_arena.mutex); - if(check_action & 1) - fprintf(stderr, "free(): invalid pointer %p!\n", mem); - if(check_action & 2) - abort(); - return; - } -#if HAVE_MMAP - if (chunk_is_mmapped(p)) { - (void)mutex_unlock(&main_arena.mutex); - munmap_chunk(p); - return; - } -#endif -#if 0 /* Erase freed memory. */ - memset(mem, 0, chunksize(p) - (SIZE_SZ+1)); -#endif - _int_free(&main_arena, mem); - (void)mutex_unlock(&main_arena.mutex); -} - -static Void_t* -#if __STD_C -realloc_check(Void_t* oldmem, size_t bytes, const Void_t *caller) -#else -realloc_check(oldmem, bytes, caller) - Void_t* oldmem; size_t bytes; const Void_t *caller; -#endif -{ - mchunkptr oldp; - INTERNAL_SIZE_T nb, oldsize; - Void_t* newmem = 0; - - if (oldmem == 0) return malloc_check(bytes, NULL); - (void)mutex_lock(&main_arena.mutex); - oldp = mem2chunk_check(oldmem); - (void)mutex_unlock(&main_arena.mutex); - if(!oldp) { - if(check_action & 1) - fprintf(stderr, "realloc(): invalid pointer %p!\n", oldmem); - if(check_action & 2) - abort(); - return malloc_check(bytes, NULL); - } - oldsize = chunksize(oldp); - - checked_request2size(bytes+1, nb); - (void)mutex_lock(&main_arena.mutex); - -#if HAVE_MMAP - if (chunk_is_mmapped(oldp)) { -#if HAVE_MREMAP - mchunkptr newp = mremap_chunk(oldp, nb); - if(newp) - newmem = chunk2mem(newp); - else -#endif - { - /* Note the extra SIZE_SZ overhead. */ - if(oldsize - SIZE_SZ >= nb) - newmem = oldmem; /* do nothing */ - else { - /* Must alloc, copy, free. */ - if (top_check() >= 0) - newmem = _int_malloc(&main_arena, bytes+1); - if (newmem) { - MALLOC_COPY(BOUNDED_N(newmem, bytes+1), oldmem, oldsize - 2*SIZE_SZ); - munmap_chunk(oldp); - } - } - } - } else { -#endif /* HAVE_MMAP */ - if (top_check() >= 0) - newmem = _int_realloc(&main_arena, oldmem, bytes+1); -#if 0 /* Erase freed memory. */ - if(newmem) - newp = mem2chunk(newmem); - nb = chunksize(newp); - if(oldp=chunk_at_offset(newp, nb)) { - memset((char*)oldmem + 2*sizeof(mbinptr), 0, - oldsize - (2*sizeof(mbinptr)+2*SIZE_SZ+1)); - } else if(nb > oldsize+SIZE_SZ) { - memset((char*)BOUNDED_N(chunk2mem(newp), bytes) + oldsize, - 0, nb - (oldsize+SIZE_SZ)); - } -#endif -#if HAVE_MMAP - } -#endif - (void)mutex_unlock(&main_arena.mutex); - - return mem2mem_check(newmem, bytes); -} - -static Void_t* -#if __STD_C -memalign_check(size_t alignment, size_t bytes, const Void_t *caller) -#else -memalign_check(alignment, bytes, caller) - size_t alignment; size_t bytes; const Void_t *caller; -#endif -{ - INTERNAL_SIZE_T nb; - Void_t* mem; - - if (alignment <= MALLOC_ALIGNMENT) return malloc_check(bytes, NULL); - if (alignment < MINSIZE) alignment = MINSIZE; - - checked_request2size(bytes+1, nb); - (void)mutex_lock(&main_arena.mutex); - mem = (top_check() >= 0) ? _int_memalign(&main_arena, alignment, bytes+1) : - NULL; - (void)mutex_unlock(&main_arena.mutex); - return mem2mem_check(mem, bytes); -} - -#if !defined NO_THREADS && USE_STARTER - -/* The following hooks are used when the global initialization in - ptmalloc_init() hasn't completed yet. */ - -static Void_t* -#if __STD_C -malloc_starter(size_t sz, const Void_t *caller) -#else -malloc_starter(sz, caller) size_t sz; const Void_t *caller; -#endif -{ - Void_t* victim; - - ptmalloc_init_minimal(); - victim = _int_malloc(&main_arena, sz); - - return victim ? BOUNDED_N(victim, sz) : 0; -} - -static Void_t* -#if __STD_C -memalign_starter(size_t align, size_t sz, const Void_t *caller) -#else -memalign_starter(align, sz, caller) size_t align, sz; const Void_t *caller; -#endif -{ - Void_t* victim; - - ptmalloc_init_minimal(); - victim = _int_memalign(&main_arena, align, sz); - - return victim ? BOUNDED_N(victim, sz) : 0; -} - -static void -#if __STD_C -free_starter(Void_t* mem, const Void_t *caller) -#else -free_starter(mem, caller) Void_t* mem; const Void_t *caller; -#endif -{ - mchunkptr p; - - if(!mem) return; - p = mem2chunk(mem); -#if HAVE_MMAP - if (chunk_is_mmapped(p)) { - munmap_chunk(p); - return; - } -#endif - _int_free(&main_arena, mem); -} - -#endif /* !defined NO_THREADS && USE_STARTER */ - - -/* Get/set state: malloc_get_state() records the current state of all - malloc variables (_except_ for the actual heap contents and `hook' - function pointers) in a system dependent, opaque data structure. - This data structure is dynamically allocated and can be free()d - after use. malloc_set_state() restores the state of all malloc - variables to the previously obtained state. This is especially - useful when using this malloc as part of a shared library, and when - the heap contents are saved/restored via some other method. The - primary example for this is GNU Emacs with its `dumping' procedure. - `Hook' function pointers are never saved or restored by these - functions, with two exceptions: If malloc checking was in use when - malloc_get_state() was called, then malloc_set_state() calls - __malloc_check_init() if possible; if malloc checking was not in - use in the recorded state but the user requested malloc checking, - then the hooks are reset to 0. */ - -#define MALLOC_STATE_MAGIC 0x444c4541l -#define MALLOC_STATE_VERSION (0*0x100l + 2l) /* major*0x100 + minor */ - -struct malloc_save_state { - long magic; - long version; - mbinptr av[NBINS * 2 + 2]; - char* sbrk_base; - int sbrked_mem_bytes; - unsigned long trim_threshold; - unsigned long top_pad; - unsigned int n_mmaps_max; - unsigned long mmap_threshold; - int check_action; - unsigned long max_sbrked_mem; - unsigned long max_total_mem; - unsigned int n_mmaps; - unsigned int max_n_mmaps; - unsigned long mmapped_mem; - unsigned long max_mmapped_mem; - int using_malloc_checking; -}; - -Void_t* -public_gET_STATe(void) -{ - struct malloc_save_state* ms; - int i; - mbinptr b; - - ms = (struct malloc_save_state*)public_mALLOc(sizeof(*ms)); - if (!ms) - return 0; - (void)mutex_lock(&main_arena.mutex); - malloc_consolidate(&main_arena); - ms->magic = MALLOC_STATE_MAGIC; - ms->version = MALLOC_STATE_VERSION; - ms->av[0] = 0; - ms->av[1] = 0; /* used to be binblocks, now no longer used */ - ms->av[2] = top(&main_arena); - ms->av[3] = 0; /* used to be undefined */ - for(i=1; iav[2*i+2] = ms->av[2*i+3] = 0; /* empty bin */ - else { - ms->av[2*i+2] = first(b); - ms->av[2*i+3] = last(b); - } - } - ms->sbrk_base = mp_.sbrk_base; - ms->sbrked_mem_bytes = main_arena.system_mem; - ms->trim_threshold = mp_.trim_threshold; - ms->top_pad = mp_.top_pad; - ms->n_mmaps_max = mp_.n_mmaps_max; - ms->mmap_threshold = mp_.mmap_threshold; - ms->check_action = check_action; - ms->max_sbrked_mem = main_arena.max_system_mem; -#ifdef NO_THREADS - ms->max_total_mem = mp_.max_total_mem; -#else - ms->max_total_mem = 0; -#endif - ms->n_mmaps = mp_.n_mmaps; - ms->max_n_mmaps = mp_.max_n_mmaps; - ms->mmapped_mem = mp_.mmapped_mem; - ms->max_mmapped_mem = mp_.max_mmapped_mem; - ms->using_malloc_checking = using_malloc_checking; - (void)mutex_unlock(&main_arena.mutex); - return (Void_t*)ms; -} - -int -public_sET_STATe(Void_t* msptr) -{ - struct malloc_save_state* ms = (struct malloc_save_state*)msptr; - int i; - mbinptr b; - - disallow_malloc_check = 1; - ptmalloc_init(); - if(ms->magic != MALLOC_STATE_MAGIC) return -1; - /* Must fail if the major version is too high. */ - if((ms->version & ~0xffl) > (MALLOC_STATE_VERSION & ~0xffl)) return -2; - (void)mutex_lock(&main_arena.mutex); - /* There are no fastchunks. */ - clear_fastchunks(&main_arena); - set_max_fast(&main_arena, DEFAULT_MXFAST); - for (i=0; i<(int)NFASTBINS; ++i) - main_arena.fastbins[i] = 0; - for (i=0; i<(int)BINMAPSIZE; ++i) - main_arena.binmap[i] = 0; - top(&main_arena) = ms->av[2]; - main_arena.last_remainder = 0; - for(i=1; iav[2*i+2] == 0) { - assert(ms->av[2*i+3] == 0); - first(b) = last(b) = b; - } else { - if(i<(int)NSMALLBINS || ((int)largebin_index(chunksize(ms->av[2*i+2]))==i && - (int)largebin_index(chunksize(ms->av[2*i+3]))==i)) { - first(b) = ms->av[2*i+2]; - last(b) = ms->av[2*i+3]; - /* Make sure the links to the bins within the heap are correct. */ - first(b)->bk = b; - last(b)->fd = b; - /* Set bit in binblocks. */ - mark_bin(&main_arena, i); - } else { - /* Oops, index computation from chunksize must have changed. - Link the whole list into unsorted_chunks. */ - first(b) = last(b) = b; - b = unsorted_chunks(&main_arena); - ms->av[2*i+2]->bk = b; - ms->av[2*i+3]->fd = b->fd; - b->fd->bk = ms->av[2*i+3]; - b->fd = ms->av[2*i+2]; - } - } - } - mp_.sbrk_base = ms->sbrk_base; - main_arena.system_mem = ms->sbrked_mem_bytes; - mp_.trim_threshold = ms->trim_threshold; - mp_.top_pad = ms->top_pad; - mp_.n_mmaps_max = ms->n_mmaps_max; - mp_.mmap_threshold = ms->mmap_threshold; - check_action = ms->check_action; - main_arena.max_system_mem = ms->max_sbrked_mem; -#ifdef NO_THREADS - mp_.max_total_mem = ms->max_total_mem; -#endif - mp_.n_mmaps = ms->n_mmaps; - mp_.max_n_mmaps = ms->max_n_mmaps; - mp_.mmapped_mem = ms->mmapped_mem; - mp_.max_mmapped_mem = ms->max_mmapped_mem; - /* add version-dependent code here */ - if (ms->version >= 1) { - /* Check whether it is safe to enable malloc checking, or whether - it is necessary to disable it. */ - if (ms->using_malloc_checking && !using_malloc_checking && - !disallow_malloc_check) - __malloc_check_init (); - else if (!ms->using_malloc_checking && using_malloc_checking) { - __malloc_hook = 0; - __free_hook = 0; - __realloc_hook = 0; - __memalign_hook = 0; - using_malloc_checking = 0; - } - } - check_malloc_state(&main_arena); - - (void)mutex_unlock(&main_arena.mutex); - return 0; -} - - -/*------------------------------------------------------------------------- - OMPI change: Per - http://www.gnu.org/software/libc/manual/html_mono/libc.html#Hooks-for-Malloc, - we can define the __malloc_initialize_hook variable to be a - function that is invoked before the first allocation is ever - performed. We use this hook to wholly replace the underlying - allocator to our own allocator if a few conditions are met. - - Remember that this hook is called probably at the very very very - beginning of the process. MCA parameters haven't been setup yet -- - darn near nothing has been setup yet. Indeed, we're effectively in - signal context because we can't call anything that calls malloc. - So we can basically have some hard-coded tests for things to see if - we want to setup to use our internal ptmalloc2 or not. */ - -static void *opal_memory_linux_malloc_hook(size_t sz, - const __malloc_ptr_t caller) -{ - return public_mALLOc(sz); -} - -static void *opal_memory_linux_realloc_hook(Void_t* ptr, size_t sz, - const __malloc_ptr_t caller) -{ - return public_rEALLOc(ptr, sz); -} - -static void *opal_memory_linux_memalign_hook(size_t alignment, size_t sz, - const __malloc_ptr_t caller) -{ - return public_mEMALIGn(alignment, sz); -} - -static void opal_memory_linux_free_hook(__malloc_ptr_t __ptr, - const __malloc_ptr_t caller) -{ - public_fREe(__ptr); -} - -typedef enum { - RESULT_NO, - RESULT_YES, - RESULT_RUNTIME, - RESULT_NOT_FOUND -} check_result_t; - -static check_result_t check(const char *name) -{ - char *s = getenv(name); - if (NULL == s) { - return RESULT_NOT_FOUND; - } - - if ('0' == s[0] && '\0' == s[1]) { - /* A value of 0 means "don't use!" */ - return RESULT_NO; - } else if ('-' == s[0] && '1' == s[1] && '\0' == s[2]) { - /* A value of -1 means "use it if it would be advantageous */ - return RESULT_RUNTIME; - } else { - /* Any other value means "use the hooks, Luke!" */ - return RESULT_YES; - } -} - - -/* This function is called on loading libmpi in case system has Memory Allocation Hooks - * (see ompi/runtime/ompi_mpi_init.c for details) - */ -void opal_memory_linux_malloc_init_hook(void) -{ - check_result_t r1, lp, lpp; - bool want_rcache = false, found_driver = false; - - if (!opal_memory_linux_opened) { - return; - } - - /* First, check for a FAKEROOT environment. If we're in a - fakeroot, then access() (and likely others) have been replaced - and are not safe to call here in this pre-main environment. So - check for the environment markers that we're in a FAKEROOT. - And if so, return immediately. - - Note that this check was inspired by a problem with Debian's - "fakeroot" build environment that allocates memory during - stat() (see http://bugs.debian.org/531522). It may not be - necessary any more since we're using access(), not stat(). But - we'll leave the check, anyway. - - This is also an issue when using Gentoo's version of - 'fakeroot', sandbox v2.5. Sandbox environments can also be - detected fairly easily by looking for SANDBOX_ON. */ - if (getenv("FAKEROOTKEY") != NULL || - getenv("FAKED_MODE") != NULL || - getenv("SANDBOX_ON") != NULL ) { - return; - } - -#if MEMORY_LINUX_UMMUNOTIFY - /* Next, check if ummunotify is present on the system. If it is, - and if we were compile with ummunotify support, then we don't - need to do the following ptmalloc2 hacks. open/mmap on the - device may fail during init, but if /dev/ummunotify exists, we - assume that the user/administrator *wants* to use - ummunotify. */ - if (access("/dev/ummunotify", F_OK) == 0) { - return; - } -#endif - - /* Yes, checking for an MPI MCA parameter here is an abstraction - violation. Cope. Yes, even checking for *any* MCA parameter - here (without going through the MCA param API) is an - abstraction violation. Fricken' cope, will ya? - (unfortunately, there's really no good way to do this other - than this abstraction violation :-( ) */ - lp = check("OPAL_MCA_leave_pinned"); - if( RESULT_NOT_FOUND == lp ) lp = check(OPAL_MCA_PREFIX"mpi_leave_pinned"); - lpp = check("OPAL_MCA_leave_pinned_pipeline"); - if( RESULT_NOT_FOUND == lpp ) lpp = check(OPAL_MCA_PREFIX"mpi_leave_pinned_pipeline"); - - /* See if we want to disable this component. */ - r1 = check(OPAL_MCA_PREFIX"memory_linux_disable"); - if (RESULT_NOT_FOUND != r1 && RESULT_NO != r1) { - return; - } - - /* Look for sentinel files (directories) to see if various network - drivers are loaded (yes, I know, further abstraction - violations...). - - * All OpenFabrics devices will have files in - /sys/class/infiniband (even iWARP) - * Open-MX doesn't currently use a reg cache, but it might - someday. So be conservative and check for /dev/open-mx. - * MX will have one or more of /dev/myri[0-9]. Yuck. - */ - if (0 == access("/sys/class/infiniband", F_OK) || - 0 == access("/dev/open-mx", F_OK) || - 0 == access("/dev/myri0", F_OK) || - 0 == access("/dev/myri1", F_OK) || - 0 == access("/dev/myri2", F_OK) || - 0 == access("/dev/myri3", F_OK) || - 0 == access("/dev/myri4", F_OK) || - 0 == access("/dev/myri5", F_OK) || - 0 == access("/dev/myri6", F_OK) || - 0 == access("/dev/myri7", F_OK) || - 0 == access("/dev/myri8", F_OK) || - 0 == access("/dev/myri9", F_OK) || - 0 == access("/dev/ipath", F_OK) || - 0 == access("/dev/kgni0", F_OK) || - 0 == access("/dev/mic/scif", F_OK) || - 0 == access("/dev/scif", F_OK)) { - found_driver = true; - } - - /* Simple combination of the results of these two environment - variables (if both "yes" and "no" are specified, then be - conservative and assume "yes"): - - lp / lpp yes no runtime not found - yes yes yes yes yes - no yes no no no - runtime yes no runtime runtime - not found yes no runtime runtime - */ - if (RESULT_YES == lp || RESULT_YES == lpp) { - want_rcache = true; - } else if (RESULT_NO == lp || RESULT_NO == lpp) { - want_rcache = false; - } else { - want_rcache = found_driver; - } - - if (want_rcache) { - /* Initialize ptmalloc */ - ptmalloc_init(); - - /* Now set the hooks to point to our functions */ - __free_hook = opal_memory_linux_free_hook; - __malloc_hook = opal_memory_linux_malloc_hook; - __memalign_hook = opal_memory_linux_memalign_hook; - __realloc_hook = opal_memory_linux_realloc_hook; - } -} - - -/* OMPI change: prototype the function below, otherwise we'll get - warnings about it not being declared (at least in developer/debug - builds). This function is not DECLSPEC'ed because we don't want it - visible outside of this component (i.e., libopen-pal, since this - component is never built as a DSO; it's always slurped into - libopen-pal). This declaration is not in malloc.h because this - function only exists as a horrid workaround to force linkers to - pull in this .o file (see explanation below). */ -void opal_memory_linux_hook_pull(bool *want_hooks); - -/* OMPI change: add a dummy function here that will be called by the - linux component open() function. This dummy function is - necessary for when OMPI is built as --disable-shared - --enable-static --disable-dlopen, because we won't use - -Wl,--export-dynamic when building OMPI. So we need to ensure that - not only that all the symbols in this file end up in libopen-pal.a, - but they also end up in the final exectuable (so that - __malloc_initialize_hook is there, overrides the weak symbol in - glibc, ....etc.). */ -void opal_memory_linux_hook_pull(bool *want_hooks) -{ - /* Make this slightly less than a dummy function -- register the - MCA parameter here (that way we keep the name of this MCA - parameter here within this one, single file). Register solely - so that it shows up in ompi_info -- by the time we register it, - the _malloc_init_hook() has almost certainly already fired, so - whatever value was set via normal MCA mechanisms likely won't - be see if it wasn't already see by the getenv() in the - _malloc_init_hook(). */ - *want_hooks = !opal_memory_linux_disable; -} - - - -/* - * Local variables: - * c-basic-offset: 4 - * End: - */ diff --git a/opal/mca/memory/linux/lran2.h b/opal/mca/memory/linux/lran2.h deleted file mode 100644 index cea9920282..0000000000 --- a/opal/mca/memory/linux/lran2.h +++ /dev/null @@ -1,51 +0,0 @@ -/* lran2.h - * by Wolfram Gloger 1996. - * - * A small, portable pseudo-random number generator. - */ - -#ifndef _LRAN2_H -#define _LRAN2_H - -#define LRAN2_MAX 714025l /* constants for portable */ -#define IA 1366l /* random number generator */ -#define IC 150889l /* (see e.g. `Numerical Recipes') */ - -struct lran2_st { - long x, y, v[97]; -}; - -static void -lran2_init(struct lran2_st* d, long seed) -{ - long x; - int j; - - x = (IC - seed) % LRAN2_MAX; - if(x < 0) x = -x; - for(j=0; j<97; j++) { - x = (IA*x + IC) % LRAN2_MAX; - d->v[j] = x; - } - d->x = (IA*x + IC) % LRAN2_MAX; - d->y = d->x; -} - -#ifdef __GNUC__ -__inline__ -#endif -static long -lran2(struct lran2_st* d) -{ - int j = (d->y % 97); - - d->y = d->v[j]; - d->x = (IA*d->x + IC) % LRAN2_MAX; - d->v[j] = d->x; - return d->y; -} - -#undef IA -#undef IC - -#endif diff --git a/opal/mca/memory/linux/malloc-stats.c b/opal/mca/memory/linux/malloc-stats.c deleted file mode 100644 index cfb9fb23df..0000000000 --- a/opal/mca/memory/linux/malloc-stats.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * - * Additional copyrights may follow. - */ -/* Malloc implementation for multiple threads; statistics printing. - Copyright (C) 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2004. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -/* $Id: $ */ - -/* OMPI change: Name-shift all the internal symbols */ -#include "opal/mca/memory/linux/rename.h" - -#include /* needed for malloc_stats */ - -#include - -#include "malloc.h" - -/* - Define HAVE_MMAP as true to optionally make malloc() use mmap() to - allocate very large blocks. These will be returned to the - operating system immediately after a free(). Also, if mmap - is available, it is used as a backup strategy in cases where - MORECORE fails to provide space from system. - - This malloc is best tuned to work with mmap for large requests. - If you do not have mmap, operations involving very large chunks (1MB - or so) may be slower than you'd like. -*/ - -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 -#endif - -#ifdef USE_DL_PREFIX - -#define public_mSTATs dlmalloc_stats - -#else /* USE_DL_PREFIX */ -#ifdef _LIBC - -#define public_mSTATs __malloc_stats - -#else /* !_LIBC */ - -#define public_mSTATs malloc_stats - -#endif /* _LIBC */ -#endif /* USE_DL_PREFIX */ - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. - -*/ -void public_mSTATs __MALLOC_P((void)); - -/* - ------------------------------ malloc_stats ------------------------------ -*/ - -void public_mSTATs() -{ - int i; - mstate ar_ptr; - struct malloc_global_info mgi; - struct malloc_arena_info mai; - unsigned long in_use_b, system_b, avail_b; -#if defined(THREAD_STATS) && THREAD_STATS - long stat_lock_direct = 0, stat_lock_loop = 0, stat_lock_wait = 0; -#endif - -#if 0 - if(__malloc_initialized < 0) - ptmalloc_init (); -#endif - _int_get_global_info(&mgi); - system_b = in_use_b = mgi.mmapped_mem; -#ifdef _LIBC - _IO_flockfile (stderr); - int old_flags2 = ((_IO_FILE *) stderr)->_flags2; - ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL; -#endif - for (i=0; (ar_ptr = _int_get_arena(i)); i++) { - _int_get_arena_info(ar_ptr, &mai); - avail_b = mai.fastavail + mai.binavail + mai.top_size; - fprintf(stderr, "Arena %d:\n", i); - fprintf(stderr, "system bytes = %10lu\n", - (unsigned long)mai.system_mem); - fprintf(stderr, "in use bytes = %10lu\n", - (unsigned long)(mai.system_mem - avail_b)); -#if MALLOC_DEBUG > 1 - if (i > 0) - dump_heap(heap_for_ptr(top(ar_ptr))); -#endif - system_b += mai.system_mem; - in_use_b += mai.system_mem - avail_b; -#if defined(THREAD_STATS) && THREAD_STATS - stat_lock_direct += mai.stat_lock_direct; - stat_lock_loop += mai.stat_lock_loop; - stat_lock_wait += mai.stat_lock_wait; -#endif - } -#if HAVE_MMAP - fprintf(stderr, "Total (incl. mmap):\n"); -#else - fprintf(stderr, "Total:\n"); -#endif - fprintf(stderr, "system bytes = %10lu\n", system_b); - fprintf(stderr, "in use bytes = %10lu\n", in_use_b); -#ifdef NO_THREADS - fprintf(stderr, "max system bytes = %10lu\n", - (unsigned long)mgi.max_total_mem); -#endif -#if HAVE_MMAP - fprintf(stderr, "max mmap regions = %10u\n", (unsigned int)mgi.max_n_mmaps); - fprintf(stderr, "max mmap bytes = %10lu\n", - (unsigned long)mgi.max_mmapped_mem); -#endif -#if defined(THREAD_STATS) && THREAD_STATS - fprintf(stderr, "heaps created = %10d\n", mgi.stat_n_heaps); - fprintf(stderr, "locked directly = %10ld\n", stat_lock_direct); - fprintf(stderr, "locked in loop = %10ld\n", stat_lock_loop); - fprintf(stderr, "locked waiting = %10ld\n", stat_lock_wait); - fprintf(stderr, "locked total = %10ld\n", - stat_lock_direct + stat_lock_loop + stat_lock_wait); -#endif -#ifdef _LIBC - ((_IO_FILE *) stderr)->_flags2 |= old_flags2; - _IO_funlockfile (stderr); -#endif -} - -#ifdef _LIBC -weak_alias (__malloc_stats, malloc_stats) -#endif diff --git a/opal/mca/memory/linux/malloc.c b/opal/mca/memory/linux/malloc.c deleted file mode 100644 index 55ee1f0996..0000000000 --- a/opal/mca/memory/linux/malloc.c +++ /dev/null @@ -1,5599 +0,0 @@ -/********************** BEGIN OMPI CHANGES *****************************/ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * Additional copyrights may follow. - */ - -#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1 -#include "opal_config.h" - -#include -#include - -#include "opal/sys/atomic.h" -#include "opal/memoryhooks/memory_internal.h" -#include "opal/mca/memory/linux/memory_linux.h" -/* Name-shift all the internal symbols */ -#include "opal/mca/memory/linux/rename.h" - -/* - * Not all systems have sbrk() declared, since it's technically not a - * POSIX function. - */ -#if !HAVE_DECL_SBRK -void *sbrk(); -#endif - - -static void *opal_memory_linux_free_ptmalloc2_sbrk(int inc) -{ - if (inc < 0) { - long oldp = (long) sbrk(0); - opal_mem_hooks_release_hook((void*) (oldp + inc), -inc, 1); - } - - return sbrk(inc); -} - -/* if we are trying to catch only allocations from and releases to the - operating system, intercept sbrk, mmap, and munmap. If we want to - intercept every call to malloc/realloc/free/etc., don't do this, as - we need to add something into each of those calls anyway. */ -#define MORECORE opal_memory_linux_free_ptmalloc2_sbrk -#define munmap(a,b) opal_memory_linux_free_ptmalloc2_munmap(a,b,1) - -/* make some non-GCC compilers happy */ -#ifndef __GNUC__ -#define __const const -#endif - -/********************* END OMPI CHANGES ******************************/ - - - - -/* Malloc implementation for multiple threads without lock contention. - Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger - and Doug Lea , 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public License as - published by the Free Software Foundation; either version 2.1 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* - This is a version (aka ptmalloc2) of malloc/free/realloc written by - Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger. - -* Version ptmalloc2-20011215 - $Id: malloc.c,v 1.20 2004/11/04 17:31:04 wg Exp $ - based on: - VERSION 2.7.1pre1 Sat May 12 07:41:21 2001 Doug Lea (dl at gee) - - Note: There may be an updated version of this malloc obtainable at - http://www.malloc.de/malloc/ptmalloc2.tar.gz - Check before installing! - -* Quickstart - - In order to compile this implementation, a Makefile is provided with - the ptmalloc2 distribution, which has pre-defined targets for some - popular systems (e.g. "make posix" for Posix threads). All that is - typically required with regard to compiler flags is the selection of - an appropriate malloc-machine.h include file via -I directives. - Many/most systems will additionally require USE_TSD_DATA_HACK to be - defined, so this is the default for "make posix". - -* Why use this malloc? - - This is not the fastest, most space-conserving, most portable, or - most tunable malloc ever written. However it is among the fastest - while also being among the most space-conserving, portable and tunable. - Consistent balance across these factors results in a good general-purpose - allocator for malloc-intensive programs. - - The main properties of the algorithms are: - * For large (>= 512 bytes) requests, it is a pure best-fit allocator, - with ties normally decided via FIFO (i.e. least recently used). - * For small (<= 64 bytes by default) requests, it is a caching - allocator, that maintains pools of quickly recycled chunks. - * In between, and for combinations of large and small requests, it does - the best it can trying to meet both goals at once. - * For very large requests (>= 128KB by default), it relies on system - memory mapping facilities, if supported. - - For a longer but slightly out of date high-level description, see - http://gee.cs.oswego.edu/dl/html/malloc.html - - You may already by default be using a C library containing a malloc - that is based on some version of this malloc (for example in - linux). You might still want to use the one in this file in order to - customize settings or to avoid overheads associated with library - versions. - -* Contents, described in more detail in "description of public routines" below. - - Standard (ANSI/SVID/...) functions: - malloc(size_t n); - calloc(size_t n_elements, size_t element_size); - free(Void_t* p); - realloc(Void_t* p, size_t n); - memalign(size_t alignment, size_t n); - valloc(size_t n); - mallinfo() - mallopt(int parameter_number, int parameter_value) - - Additional functions: - independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); - independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); - pvalloc(size_t n); - cfree(Void_t* p); - malloc_trim(size_t pad); - malloc_usable_size(Void_t* p); - malloc_stats(); - -* Vital statistics: - - Supported pointer representation: 4 or 8 bytes - Supported size_t representation: 4 or 8 bytes - Note that size_t is allowed to be 4 bytes even if pointers are 8. - You can adjust this by defining INTERNAL_SIZE_T - - Alignment: 2 * sizeof(size_t) (default) - (i.e., 8 byte alignment with 4byte size_t). This suffices for - nearly all current machines and C compilers. However, you can - define MALLOC_ALIGNMENT to be wider than this if necessary. - - Minimum overhead per allocated chunk: 4 or 8 bytes - Each malloced chunk has a hidden word of overhead holding size - and status information. - - Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) - 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) - - When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte - ptrs but 4 byte size) or 24 (for 8/8) additional bytes are - needed; 4 (8) for a trailing size field and 8 (16) bytes for - free list pointers. Thus, the minimum allocatable size is - 16/24/32 bytes. - - Even a request for zero bytes (i.e., malloc(0)) returns a - pointer to something of the minimum allocatable size. - - The maximum overhead wastage (i.e., number of extra bytes - allocated than were requested in malloc) is less than or equal - to the minimum size, except for requests >= mmap_threshold that - are serviced via mmap(), where the worst case wastage is 2 * - sizeof(size_t) bytes plus the remainder from a system page (the - minimal mmap unit); typically 4096 or 8192 bytes. - - Maximum allocated size: 4-byte size_t: 2^32 minus about two pages - 8-byte size_t: 2^64 minus about two pages - - It is assumed that (possibly signed) size_t values suffice to - represent chunk sizes. `Possibly signed' is due to the fact - that `size_t' may be defined on a system as either a signed or - an unsigned type. The ISO C standard says that it must be - unsigned, but a few systems are known not to adhere to this. - Additionally, even when size_t is unsigned, sbrk (which is by - default used to obtain memory from system) accepts signed - arguments, and may not be able to handle size_t-wide arguments - with negative sign bit. Generally, values that would - appear as negative after accounting for overhead and alignment - are supported only via mmap(), which does not have this - limitation. - - Requests for sizes outside the allowed range will perform an optional - failure action and then return null. (Requests may also - also fail because a system is out of memory.) - - Thread-safety: thread-safe unless NO_THREADS is defined - - Compliance: I believe it is compliant with the 1997 Single Unix Specification - (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably - others as well. - -* Synopsis of compile-time options: - - People have reported using previous versions of this malloc on all - versions of Unix, sometimes by tweaking some of the defines - below. It has been tested most extensively on Solaris and - Linux. It is also reported to work on WIN32 platforms. - People also report using it in stand-alone embedded systems. - - The implementation is in straight, hand-tuned ANSI C. It is not - at all modular. (Sorry!) It uses a lot of macros. To be at all - usable, this code should be compiled using an optimizing compiler - (for example gcc -O3) that can simplify expressions and control - paths. (FAQ: some macros import variables as arguments rather than - declare locals because people reported that some debuggers - otherwise get confused.) - - OPTION DEFAULT VALUE - - Compilation Environment options: - - __STD_C derived from C compiler defines - WIN32 NOT defined - HAVE_MEMCPY defined - USE_MEMCPY 1 if HAVE_MEMCPY is defined - HAVE_MMAP defined as 1 - MMAP_CLEARS 1 - HAVE_MREMAP 0 unless linux defined - USE_ARENAS the same as HAVE_MMAP - USE_STARTER 1 - malloc_getpagesize derived from system #includes, or 4096 if not - HAVE_USR_INCLUDE_MALLOC_H NOT defined - LACKS_UNISTD_H NOT defined unless WIN32 - LACKS_SYS_PARAM_H NOT defined unless WIN32 - LACKS_SYS_MMAN_H NOT defined unless WIN32 - - Changing default word sizes: - - INTERNAL_SIZE_T size_t - MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) - - Configuration and functionality options: - - USE_DL_PREFIX NOT defined - USE_PUBLIC_MALLOC_WRAPPERS NOT defined - USE_MALLOC_LOCK NOT defined - MALLOC_DEBUG NOT defined - REALLOC_ZERO_BYTES_FREES 1 - MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op - TRIM_FASTBINS 0 - FIRST_SORTED_BIN_SIZE 512 - - Options for customizing MORECORE: - - MORECORE sbrk - MORECORE_FAILURE -1 - MORECORE_CONTIGUOUS 1 - MORECORE_CANNOT_TRIM NOT defined - MORECORE_CLEARS 1 - MMAP_AS_MORECORE_SIZE (1024 * 1024) - - Tuning options that are also dynamically changeable via mallopt: - - DEFAULT_MXFAST 64 - DEFAULT_TRIM_THRESHOLD 128 * 1024 - DEFAULT_TOP_PAD 0 - DEFAULT_MMAP_THRESHOLD 128 * 1024 - DEFAULT_MMAP_MAX 65536 - - There are several other #defined constants and macros that you - probably don't want to touch unless you are extending or adapting malloc. */ - -/* - __STD_C should be nonzero if using ANSI-standard C compiler, a C++ - compiler, or a C compiler sufficiently close to ANSI to get away - with it. -*/ - -#ifndef __STD_C -#if defined(__STDC__) || defined(__cplusplus) -#define __STD_C 1 -#else -#define __STD_C 0 -#endif -#endif /*__STD_C*/ - - -/* - Void_t* is the pointer type that malloc should say it returns -*/ - -#ifndef Void_t -#if (__STD_C || defined(WIN32)) -#define Void_t void -#else -#define Void_t char -#endif -#endif /*Void_t*/ - -#if __STD_C -#include /* for size_t */ -#include /* for getenv(), abort() */ -#else -#include -#endif - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* define LACKS_UNISTD_H if your system does not have a . */ - -/* #define LACKS_UNISTD_H */ - -#ifndef LACKS_UNISTD_H -#include -#endif - -/* define LACKS_SYS_PARAM_H if your system does not have a . */ - -/* #define LACKS_SYS_PARAM_H */ - - -#include /* needed for malloc_stats */ -#include /* needed for optional MALLOC_FAILURE_ACTION */ - - -/* - Debugging: - - Because freed chunks may be overwritten with bookkeeping fields, this - malloc will often die when freed memory is overwritten by user - programs. This can be very effective (albeit in an annoying way) - in helping track down dangling pointers. - - If you compile with -DMALLOC_DEBUG, a number of assertion checks are - enabled that will catch more memory errors. You probably won't be - able to make much sense of the actual assertion errors, but they - should help you locate incorrectly overwritten memory. The checking - is fairly extensive, and will slow down execution - noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set - will attempt to check every non-mmapped allocated and free chunk in - the course of computing the summmaries. (By nature, mmapped regions - cannot be checked very much automatically.) - - Setting MALLOC_DEBUG may also be helpful if you are trying to modify - this code. The assertions in the check routines spell out in more - detail the assumptions and invariants underlying the algorithms. - - Setting MALLOC_DEBUG does NOT provide an automated mechanism for - checking that all accesses to malloced memory stay within their - bounds. However, there are several add-ons and adaptations of this - or other mallocs available that do this. -*/ - -#if MALLOC_DEBUG -#include -#else -#undef assert -#define assert(x) ((void)0) -#endif - - -/* - INTERNAL_SIZE_T is the word-size used for internal bookkeeping - of chunk sizes. - - The default version is the same as size_t. - - While not strictly necessary, it is best to define this as an - unsigned type, even if size_t is a signed type. This may avoid some - artificial size limitations on some systems. - - On a 64-bit machine, you may be able to reduce malloc overhead by - defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the - expense of not being able to handle more than 2^32 of malloced - space. If this limitation is acceptable, you are encouraged to set - this unless you are on a platform requiring 16byte alignments. In - this case the alignment requirements turn out to negate any - potential advantages of decreasing size_t word size. - - Implementors: Beware of the possible combinations of: - - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, - and might be the same width as int or as long - - size_t might have different width and signedness as INTERNAL_SIZE_T - - int and long might be 32 or 64 bits, and might be the same width - To deal with this, most comparisons and difference computations - among INTERNAL_SIZE_Ts should cast them to unsigned long, being - aware of the fact that casting an unsigned int to a wider long does - not sign-extend. (This also makes checking for negative numbers - awkward.) Some of these casts result in harmless compiler warnings - on some systems. -*/ - -#ifndef INTERNAL_SIZE_T -#define INTERNAL_SIZE_T size_t -#endif - -/* The corresponding word size */ -#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) - - -/* - MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. - It must be a power of two at least 2 * SIZE_SZ, even on machines - for which smaller alignments would suffice. It may be defined as - larger than this though. Note however that code and data structures - are optimized for the case of 8-byte alignment. -*/ - - -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT (2 * SIZE_SZ) -#endif - -/* The corresponding bit mask value */ -#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) - - - -/* - REALLOC_ZERO_BYTES_FREES should be set if a call to - realloc with zero bytes should be the same as a call to free. - This is required by the C standard. Otherwise, since this malloc - returns a unique pointer for malloc(0), so does realloc(p, 0). -*/ - -#ifndef REALLOC_ZERO_BYTES_FREES -#define REALLOC_ZERO_BYTES_FREES 1 -#endif - -/* - TRIM_FASTBINS controls whether free() of a very small chunk can - immediately lead to trimming. Setting to true (1) can reduce memory - footprint, but will almost always slow down programs that use a lot - of small chunks. - - Define this only if you are willing to give up some speed to more - aggressively reduce system-level memory footprint when releasing - memory in programs that use many small chunks. You can get - essentially the same effect by setting MXFAST to 0, but this can - lead to even greater slowdowns in programs using many small chunks. - TRIM_FASTBINS is an in-between compile-time option, that disables - only those chunks bordering topmost memory from being placed in - fastbins. -*/ - -#ifndef TRIM_FASTBINS -#define TRIM_FASTBINS 0 -#endif - - -/* - USE_DL_PREFIX will prefix all public routines with the string 'dl'. - This is necessary when you only want to use this malloc in one part - of a program, using your regular system malloc elsewhere. -*/ - -/* #define USE_DL_PREFIX */ - - -/* - Two-phase name translation. - All of the actual routines are given mangled names. - When wrappers are used, they become the public callable versions. - When DL_PREFIX is used, the callable names are prefixed. -*/ - -#ifdef USE_DL_PREFIX -#define public_cALLOc dlcalloc -#define public_fREe dlfree -#define public_cFREe dlcfree -#define public_mALLOc dlmalloc -#define public_mEMALIGn dlmemalign -#define public_rEALLOc dlrealloc -#define public_vALLOc dlvalloc -#define public_pVALLOc dlpvalloc -#define public_mALLINFo dlmallinfo -#define public_mALLOPt dlmallopt -#define public_mTRIm dlmalloc_trim -#define public_mSTATs dlmalloc_stats -#define public_mUSABLe dlmalloc_usable_size -#define public_iCALLOc dlindependent_calloc -#define public_iCOMALLOc dlindependent_comalloc -#define public_gET_STATe dlget_state -#define public_sET_STATe dlset_state -#else /* USE_DL_PREFIX */ -#ifdef _LIBC - -/* Special defines for the GNU C library. */ -#define public_cALLOc __libc_calloc -#define public_fREe __libc_free -#define public_cFREe __libc_cfree -#define public_mALLOc __libc_malloc -#define public_mEMALIGn __libc_memalign -#define public_rEALLOc __libc_realloc -#define public_vALLOc __libc_valloc -#define public_pVALLOc __libc_pvalloc -#define public_mALLINFo __libc_mallinfo -#define public_mALLOPt __libc_mallopt -#define public_mTRIm __malloc_trim -#define public_mSTATs __malloc_stats -#define public_mUSABLe __malloc_usable_size -#define public_iCALLOc __libc_independent_calloc -#define public_iCOMALLOc __libc_independent_comalloc -#define public_gET_STATe __malloc_get_state -#define public_sET_STATe __malloc_set_state -#define malloc_getpagesize __getpagesize() -#define open __open -#define mmap __mmap -#define munmap __munmap -#define mremap __mremap -#define mprotect __mprotect -#define MORECORE (*__morecore) -#define MORECORE_FAILURE 0 - -Void_t * __default_morecore (ptrdiff_t); -Void_t *(*__morecore)(ptrdiff_t) = __default_morecore; - -#else /* !_LIBC */ - /* OMPI change: put "opal_memory_ptmalloc2_" in front of all of these */ -#define public_cALLOc opal_memory_ptmalloc2_calloc -#define public_fREe opal_memory_ptmalloc2_free -#define public_cFREe opal_memory_ptmalloc2_cfree -#define public_mALLOc opal_memory_ptmalloc2_malloc -#define public_mEMALIGn opal_memory_ptmalloc2_memalign -#define public_rEALLOc opal_memory_ptmalloc2_realloc -#define public_vALLOc opal_memory_ptmalloc2_valloc -#define public_pVALLOc opal_memory_ptmalloc2_pvalloc -#define public_mALLINFo opal_memory_ptmalloc2_mallinfo -#define public_mALLOPt opal_memory_ptmalloc2_mallopt -#define public_mTRIm opal_memory_ptmalloc2_malloc_trim -#define public_mSTATs opal_memory_ptmalloc2_malloc_stats -#define public_mUSABLe opal_memory_ptmalloc2_malloc_usable_size -#define public_iCALLOc opal_memory_ptmalloc2_independent_calloc -#define public_iCOMALLOc opal_memory_ptmalloc2_independent_comalloc -#define public_gET_STATe opal_memory_ptmalloc2_malloc_get_state -#define public_sET_STATe opal_memory_ptmalloc2_malloc_set_state -#endif /* _LIBC */ -#endif /* USE_DL_PREFIX */ - - -#if !defined _LIBC && (!defined __GNUC__ || __GNUC__<3) -#ifndef __builtin_expect -#define __builtin_expect(expr, val) (expr) -#endif -#endif - -/* - HAVE_MEMCPY should be defined if you are not otherwise using - ANSI STD C, but still have memcpy and memset in your C library - and want to use them in calloc and realloc. Otherwise simple - macro versions are defined below. - - USE_MEMCPY should be defined as 1 if you actually want to - have memset and memcpy called. People report that the macro - versions are faster than libc versions on some systems. - - Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks - (of <= 36 bytes) are manually unrolled in realloc and calloc. -*/ - -#define HAVE_MEMCPY - -#ifndef USE_MEMCPY -#ifdef HAVE_MEMCPY -#define USE_MEMCPY 1 -#else -#define USE_MEMCPY 0 -#endif -#endif - - -#if (__STD_C || defined(HAVE_MEMCPY)) - -#ifdef _LIBC -# include -#else -#ifdef WIN32 -/* On Win32 memset and memcpy are already declared in windows.h */ -#else -#if __STD_C -void* memset(void*, int, size_t); -void* memcpy(void*, const void*, size_t); -#else -Void_t* memset(); -Void_t* memcpy(); -#endif -#endif -#endif -#endif - -/* - MALLOC_FAILURE_ACTION is the action to take before "return 0" when - malloc fails to be able to return memory, either because memory is - exhausted or because of illegal arguments. - - By default, sets errno if running on STD_C platform, else does nothing. -*/ - -#ifndef MALLOC_FAILURE_ACTION -#if __STD_C -#define MALLOC_FAILURE_ACTION \ - errno = ENOMEM; - -#else -#define MALLOC_FAILURE_ACTION -#endif -#endif - -/* - MORECORE-related declarations. By default, rely on sbrk -*/ - - -#ifdef LACKS_UNISTD_H -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) -#if __STD_C -extern Void_t* sbrk(ptrdiff_t); -#else -extern Void_t* sbrk(); -#endif -#endif -#endif - -/* - MORECORE is the name of the routine to call to obtain more memory - from the system. See below for general guidance on writing - alternative MORECORE functions, as well as a version for WIN32 and a - sample version for pre-OSX macos. -*/ - -#ifndef MORECORE -#define MORECORE sbrk -#endif - -/* - MORECORE_FAILURE is the value returned upon failure of MORECORE - as well as mmap. Since it cannot be an otherwise valid memory address, - and must reflect values of standard sys calls, you probably ought not - try to redefine it. -*/ - -#ifndef MORECORE_FAILURE -#define MORECORE_FAILURE (-1) -#endif - -/* - If MORECORE_CONTIGUOUS is true, take advantage of fact that - consecutive calls to MORECORE with positive arguments always return - contiguous increasing addresses. This is true of unix sbrk. Even - if not defined, when regions happen to be contiguous, malloc will - permit allocations spanning regions obtained from different - calls. But defining this when applicable enables some stronger - consistency checks and space efficiencies. -*/ - -#ifndef MORECORE_CONTIGUOUS -#define MORECORE_CONTIGUOUS 1 -#endif - -/* - Define MORECORE_CANNOT_TRIM if your version of MORECORE - cannot release space back to the system when given negative - arguments. This is generally necessary only if you are using - a hand-crafted MORECORE function that cannot handle negative arguments. -*/ - -/* #define MORECORE_CANNOT_TRIM */ - -/* MORECORE_CLEARS (default 1) - The degree to which the routine mapped to MORECORE zeroes out - memory: never (0), only for newly allocated space (1) or always - (2). The distinction between (1) and (2) is necessary because on - some systems, if the application first decrements and then - increments the break value, the contents of the reallocated space - are unspecified. -*/ - -#ifndef MORECORE_CLEARS -#define MORECORE_CLEARS 1 -#endif - - -/* - Define HAVE_MMAP as true to optionally make malloc() use mmap() to - allocate very large blocks. These will be returned to the - operating system immediately after a free(). Also, if mmap - is available, it is used as a backup strategy in cases where - MORECORE fails to provide space from system. - - This malloc is best tuned to work with mmap for large requests. - If you do not have mmap, operations involving very large chunks (1MB - or so) may be slower than you'd like. -*/ - -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 - -/* - Standard unix mmap using /dev/zero clears memory so calloc doesn't - need to. -*/ - -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 1 -#endif - -#else /* no mmap */ -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 0 -#endif -#endif - - -/* - MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if - sbrk fails, and mmap is used as a backup (which is done only if - HAVE_MMAP). The value must be a multiple of page size. This - backup strategy generally applies only when systems have "holes" in - address space, so sbrk cannot perform contiguous expansion, but - there is still space available on system. On systems for which - this is known to be useful (i.e. most linux kernels), this occurs - only when programs allocate huge amounts of memory. Between this, - and the fact that mmap regions tend to be limited, the size should - be large, to avoid too many mmap calls and thus avoid running out - of kernel resources. -*/ - -#ifndef MMAP_AS_MORECORE_SIZE -#define MMAP_AS_MORECORE_SIZE (1024 * 1024) -#endif - -/* - Define HAVE_MREMAP to make realloc() use mremap() to re-allocate - large blocks. This is currently only possible on Linux with - kernel versions newer than 1.3.77. -*/ - -#ifndef HAVE_MREMAP -#ifdef linux -#define HAVE_MREMAP 1 -#else -#define HAVE_MREMAP 0 -#endif - -#endif /* HAVE_MMAP */ - -/* Define USE_ARENAS to enable support for multiple `arenas'. These - are allocated using mmap(), are necessary for threads and - occasionally useful to overcome address space limitations affecting - sbrk(). */ - -#ifndef USE_ARENAS -#define USE_ARENAS HAVE_MMAP -#endif - -/* USE_STARTER determines if and when the special "starter" hook - functions are used: not at all (0), during ptmalloc_init (first bit - set), or from the beginning until an explicit call to ptmalloc_init - (second bit set). This is necessary if thread-related - initialization functions (e.g. pthread_key_create) require - malloc() calls (set USE_STARTER=1), or if those functions initially - cannot be used at all (set USE_STARTER=2 and perform an explicit - ptmalloc_init() when the thread library is ready, typically at the - start of main()). */ - -#ifndef USE_STARTER -# ifndef _LIBC -# define USE_STARTER 1 -# else -# if USE___THREAD || (defined USE_TLS && !defined SHARED) - /* These routines are never needed in this configuration. */ -# define USE_STARTER 0 -# else -# define USE_STARTER (USE_TLS ? 4 : 1) -# endif -# endif -#endif - - -/* - The system page size. To the extent possible, this malloc manages - memory from the system in page-size units. Note that this value is - cached during initialization into a field of malloc_state. So even - if malloc_getpagesize is a function, it is only called once. - - The following mechanics for getpagesize were adapted from bsd/gnu - getpagesize.h. If none of the system-probes here apply, a value of - 4096 is used, which should be OK: If they don't apply, then using - the actual value probably doesn't impact performance. -*/ - - -#ifndef malloc_getpagesize - -#ifndef LACKS_UNISTD_H -# include -#endif - -# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ -# ifndef _SC_PAGE_SIZE -# define _SC_PAGE_SIZE _SC_PAGESIZE -# endif -# endif - -# ifdef _SC_PAGE_SIZE -# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) -# else -# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) - extern size_t getpagesize(); -# define malloc_getpagesize getpagesize() -# else -# ifdef WIN32 /* use supplied emulation of getpagesize */ -# define malloc_getpagesize getpagesize() -# else -# ifndef LACKS_SYS_PARAM_H -# include -# endif -# ifdef EXEC_PAGESIZE -# define malloc_getpagesize EXEC_PAGESIZE -# else -# ifdef NBPG -# ifndef CLSIZE -# define malloc_getpagesize NBPG -# else -# define malloc_getpagesize (NBPG * CLSIZE) -# endif -# else -# ifdef NBPC -# define malloc_getpagesize NBPC -# else -# ifdef PAGESIZE -# define malloc_getpagesize PAGESIZE -# else /* just guess */ -# define malloc_getpagesize (4096) -# endif -# endif -# endif -# endif -# endif -# endif -# endif -#endif - -/* - This version of malloc supports the standard SVID/XPG mallinfo - routine that returns a struct containing usage properties and - statistics. It should work on any SVID/XPG compliant system that has - a /usr/include/malloc.h defining struct mallinfo. (If you'd like to - install such a thing yourself, cut out the preliminary declarations - as described above and below and save them in a malloc.h file. But - there's no compelling reason to bother to do this.) - - The main declaration needed is the mallinfo struct that is returned - (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a - bunch of fields that are not even meaningful in this version of - malloc. These fields are are instead filled by mallinfo() with - other numbers that might be of interest. - - HAVE_USR_INCLUDE_MALLOC_H should be set if you have a - /usr/include/malloc.h file that includes a declaration of struct - mallinfo. If so, it is included; else an SVID2/XPG2 compliant - version is declared below. These must be precisely the same for - mallinfo() to work. The original SVID version of this struct, - defined on most systems with mallinfo, declares all fields as - ints. But some others define as unsigned long. If your system - defines the fields using a type of different width than listed here, - you must #include your system version and #define - HAVE_USR_INCLUDE_MALLOC_H. -*/ - -/* #define HAVE_USR_INCLUDE_MALLOC_H */ - -#ifdef HAVE_USR_INCLUDE_MALLOC_H -#include "/usr/include/malloc.h" -#endif - - -/* ---------- description of public routines ------------ */ - -/* - malloc(size_t n) - Returns a pointer to a newly allocated chunk of at least n bytes, or null - if no space is available. Additionally, on failure, errno is - set to ENOMEM on ANSI C systems. - - If n is zero, malloc returns a minumum-sized chunk. (The minimum - size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit - systems.) On most systems, size_t is an unsigned type, so calls - with negative arguments are interpreted as requests for huge amounts - of space, which will often fail. The maximum supported value of n - differs across systems, but is in all cases less than the maximum - representable value of a size_t. -*/ -#if __STD_C -Void_t* public_mALLOc(size_t); -#else -Void_t* public_mALLOc(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_mALLOc) -#endif - -/* - free(Void_t* p) - Releases the chunk of memory pointed to by p, that had been previously - allocated using malloc or a related routine such as realloc. - It has no effect if p is null. It can have arbitrary (i.e., bad!) - effects if p has already been freed. - - Unless disabled (using mallopt), freeing very large spaces will - when possible, automatically trigger operations that give - back unused memory to the system, thus reducing program footprint. -*/ -#if __STD_C -void public_fREe(Void_t*); -#else -void public_fREe(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_fREe) -#endif - -/* - calloc(size_t n_elements, size_t element_size); - Returns a pointer to n_elements * element_size bytes, with all locations - set to zero. -*/ -#if __STD_C -Void_t* public_cALLOc(size_t, size_t); -#else -Void_t* public_cALLOc(); -#endif - -/* - realloc(Void_t* p, size_t n) - Returns a pointer to a chunk of size n that contains the same data - as does chunk p up to the minimum of (n, p's size) bytes, or null - if no space is available. - - The returned pointer may or may not be the same as p. The algorithm - prefers extending p when possible, otherwise it employs the - equivalent of a malloc-copy-free sequence. - - If p is null, realloc is equivalent to malloc. - - If space is not available, realloc returns null, errno is set (if on - ANSI) and p is NOT freed. - - if n is for fewer bytes than already held by p, the newly unused - space is lopped off and freed if possible. Unless the #define - REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of - zero (re)allocates a minimum-sized chunk. - - Large chunks that were internally obtained via mmap will always - be reallocated using malloc-copy-free sequences unless - the system supports MREMAP (currently only linux). - - The old unix realloc convention of allowing the last-free'd chunk - to be used as an argument to realloc is not supported. -*/ -#if __STD_C -Void_t* public_rEALLOc(Void_t*, size_t); -#else -Void_t* public_rEALLOc(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_rEALLOc) -#endif - -/* - memalign(size_t alignment, size_t n); - Returns a pointer to a newly allocated chunk of n bytes, aligned - in accord with the alignment argument. - - The alignment argument should be a power of two. If the argument is - not a power of two, the nearest greater power is used. - 8-byte alignment is guaranteed by normal malloc calls, so don't - bother calling memalign with an argument of 8 or less. - - Overreliance on memalign is a sure way to fragment space. -*/ -#if __STD_C -Void_t* public_mEMALIGn(size_t, size_t); -#else -Void_t* public_mEMALIGn(); -#endif -#ifdef libc_hidden_proto -libc_hidden_proto (public_mEMALIGn) -#endif - -/* - valloc(size_t n); - Equivalent to memalign(pagesize, n), where pagesize is the page - size of the system. If the pagesize is unknown, 4096 is used. -*/ -#if __STD_C -Void_t* public_vALLOc(size_t); -#else -Void_t* public_vALLOc(); -#endif - - - -/* - mallopt(int parameter_number, int parameter_value) - Sets tunable parameters The format is to provide a - (parameter-number, parameter-value) pair. mallopt then sets the - corresponding parameter to the argument value if it can (i.e., so - long as the value is meaningful), and returns 1 if successful else - 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, - normally defined in malloc.h. Only one of these (M_MXFAST) is used - in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, - so setting them has no effect. But this malloc also supports four - other options in mallopt. See below for details. Briefly, supported - parameters are as follows (listed defaults are for "typical" - configurations). - - Symbol param # default allowed param values - M_MXFAST 1 64 0-80 (0 disables fastbins) - M_TRIM_THRESHOLD -1 128*1024 any (-1U disables trimming) - M_TOP_PAD -2 0 any - M_MMAP_THRESHOLD -3 128*1024 any (or 0 if no MMAP support) - M_MMAP_MAX -4 65536 any (0 disables use of mmap) -*/ -#if __STD_C -int public_mALLOPt(int, int); -#else -int public_mALLOPt(); -#endif - - -/* - mallinfo() - Returns (by copy) a struct containing various summary statistics: - - arena: current total non-mmapped bytes allocated from system - ordblks: the number of free chunks - smblks: the number of fastbin blocks (i.e., small chunks that - have been freed but not use resused or consolidated) - hblks: current number of mmapped regions - hblkhd: total bytes held in mmapped regions - usmblks: the maximum total allocated space. This will be greater - than current total if trimming has occurred. - fsmblks: total bytes held in fastbin blocks - uordblks: current total allocated space (normal or mmapped) - fordblks: total free space - keepcost: the maximum number of bytes that could ideally be released - back to system via malloc_trim. ("ideally" means that - it ignores page restrictions etc.) - - Because these fields are ints, but internal bookkeeping may - be kept as longs, the reported values may wrap around zero and - thus be inaccurate. -*/ -#if __STD_C -struct mallinfo public_mALLINFo(void); -#else -struct mallinfo public_mALLINFo(); -#endif - -/* - independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); - - independent_calloc is similar to calloc, but instead of returning a - single cleared space, it returns an array of pointers to n_elements - independent elements that can hold contents of size elem_size, each - of which starts out cleared, and can be independently freed, - realloc'ed etc. The elements are guaranteed to be adjacently - allocated (this is not guaranteed to occur with multiple callocs or - mallocs), which may also improve cache locality in some - applications. - - The "chunks" argument is optional (i.e., may be null, which is - probably the most typical usage). If it is null, the returned array - is itself dynamically allocated and should also be freed when it is - no longer needed. Otherwise, the chunks array must be of at least - n_elements in length. It is filled in with the pointers to the - chunks. - - In either case, independent_calloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and "chunks" - is null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use regular calloc and assign pointers into this - space to represent elements. (In this case though, you cannot - independently free elements.) - - independent_calloc simplifies and speeds up implementations of many - kinds of pools. It may also be useful when constructing large data - structures that initially have a fixed number of fixed-sized nodes, - but the number is not known at compile time, and some of the nodes - may later need to be freed. For example: - - struct Node { int item; struct Node* next; }; - - struct Node* build_list() { - struct Node** pool; - int n = read_number_of_nodes_needed(); - if (n <= 0) return 0; - pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - if (pool == 0) die(); - // organize into a linked list... - struct Node* first = pool[0]; - for (i = 0; i < n-1; ++i) - pool[i]->next = pool[i+1]; - free(pool); // Can now free the array (or not, if it is needed later) - return first; - } -*/ -#if __STD_C -Void_t** public_iCALLOc(size_t, size_t, Void_t**); -#else -Void_t** public_iCALLOc(); -#endif - -/* - independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); - - independent_comalloc allocates, all at once, a set of n_elements - chunks with sizes indicated in the "sizes" array. It returns - an array of pointers to these elements, each of which can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null). If it is null - the returned array is itself dynamically allocated and should also - be freed when it is no longer needed. Otherwise, the chunks array - must be of at least n_elements in length. It is filled in with the - pointers to the chunks. - - In either case, independent_comalloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and chunks is - null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use a single regular malloc, and assign pointers at - particular offsets in the aggregate space. (In this case though, you - cannot independently free elements.) - - independent_comallac differs from independent_calloc in that each - element may have a different size, and also that it does not - automatically clear elements. - - independent_comalloc can be used to speed up allocation in cases - where several structs or objects must always be allocated at the - same time. For example: - - struct Head { ... } - struct Foot { ... } - - void send_message(char* msg) { - int msglen = strlen(msg); - size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - void* chunks[3]; - if (independent_comalloc(3, sizes, chunks) == 0) - die(); - struct Head* head = (struct Head*)(chunks[0]); - char* body = (char*)(chunks[1]); - struct Foot* foot = (struct Foot*)(chunks[2]); - // ... - } - - In general though, independent_comalloc is worth using only for - larger values of n_elements. For small values, you probably won't - detect enough difference from series of malloc calls to bother. - - Overuse of independent_comalloc can increase overall memory usage, - since it cannot reuse existing noncontiguous small chunks that - might be available for some of the elements. -*/ -#if __STD_C -Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**); -#else -Void_t** public_iCOMALLOc(); -#endif - - -/* - pvalloc(size_t n); - Equivalent to valloc(minimum-page-that-holds(n)), that is, - round up n to nearest pagesize. - */ -#if __STD_C -Void_t* public_pVALLOc(size_t); -#else -Void_t* public_pVALLOc(); -#endif - -/* - cfree(Void_t* p); - Equivalent to free(p). - - cfree is needed/defined on some systems that pair it with calloc, - for odd historical reasons (such as: cfree is used in example - code in the first edition of K&R). -*/ -#if __STD_C -void public_cFREe(Void_t*); -#else -void public_cFREe(); -#endif - -/* - malloc_trim(size_t pad); - - If possible, gives memory back to the system (via negative - arguments to sbrk) if there is unused memory at the `high' end of - the malloc pool. You can call this after freeing large blocks of - memory to potentially reduce the system-level memory requirements - of a program. However, it cannot guarantee to reduce memory. Under - some allocation patterns, some large free blocks of memory will be - locked between two used chunks, so they cannot be given back to - the system. - - The `pad' argument to malloc_trim represents the amount of free - trailing space to leave untrimmed. If this argument is zero, - only the minimum amount of memory to maintain internal data - structures will be left (one page or less). Non-zero arguments - can be supplied to maintain enough trailing space to service - future expected allocations without having to re-obtain memory - from the system. - - Malloc_trim returns 1 if it actually released any memory, else 0. - On systems that do not support "negative sbrks", it will always - rreturn 0. -*/ -#if __STD_C -int public_mTRIm(size_t); -#else -int public_mTRIm(); -#endif - -/* - malloc_usable_size(Void_t* p); - - Returns the number of bytes you can actually use in - an allocated chunk, which may be more than you requested (although - often not) due to alignment and minimum size constraints. - You can use this many bytes without worrying about - overwriting other allocated objects. This is not a particularly great - programming practice. malloc_usable_size can be more useful in - debugging and assertions, for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); - -*/ -#if __STD_C -size_t public_mUSABLe(Void_t*); -#else -size_t public_mUSABLe(); -#endif - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. - -*/ -#if __STD_C -void public_mSTATs(void); -#else -void public_mSTATs(); -#endif - -/* - malloc_get_state(void); - - Returns the state of all malloc variables in an opaque data - structure. -*/ -#if __STD_C -Void_t* public_gET_STATe(void); -#else -Void_t* public_gET_STATe(); -#endif - -/* - malloc_set_state(Void_t* state); - - Restore the state of all malloc variables from data obtained with - malloc_get_state(). -*/ -#if __STD_C -int public_sET_STATe(Void_t*); -#else -int public_sET_STATe(); -#endif - -#ifdef _LIBC -/* - posix_memalign(void **memptr, size_t alignment, size_t size); - - POSIX wrapper like memalign(), checking for validity of size. -*/ -int __posix_memalign(void **, size_t, size_t); -#endif - -/* mallopt tuning options */ - -/* - M_MXFAST is the maximum request size used for "fastbins", special bins - that hold returned chunks without consolidating their spaces. This - enables future requests for chunks of the same size to be handled - very quickly, but can increase fragmentation, and thus increase the - overall memory footprint of a program. - - This malloc manages fastbins very conservatively yet still - efficiently, so fragmentation is rarely a problem for values less - than or equal to the default. The maximum supported value of MXFAST - is 80. You wouldn't want it any higher than this anyway. Fastbins - are designed especially for use with many small structs, objects or - strings -- the default handles structs/objects/arrays with sizes up - to 8 4byte fields, or small strings representing words, tokens, - etc. Using fastbins for larger objects normally worsens - fragmentation without improving speed. - - M_MXFAST is set in REQUEST size units. It is internally used in - chunksize units, which adds padding and alignment. You can reduce - M_MXFAST to 0 to disable all use of fastbins. This causes the malloc - algorithm to be a closer approximation of fifo-best-fit in all cases, - not just for larger requests, but will generally cause it to be - slower. -*/ - - -/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ -#ifndef M_MXFAST -#define M_MXFAST 1 -#endif - -#ifndef DEFAULT_MXFAST -#define DEFAULT_MXFAST 64 -#endif - - -/* - M_TRIM_THRESHOLD is the maximum amount of unused top-most memory - to keep before releasing via malloc_trim in free(). - - Automatic trimming is mainly useful in long-lived programs. - Because trimming via sbrk can be slow on some systems, and can - sometimes be wasteful (in cases where programs immediately - afterward allocate more large chunks) the value should be high - enough so that your overall system performance would improve by - releasing this much memory. - - The trim threshold and the mmap control parameters (see below) - can be traded off with one another. Trimming and mmapping are - two different ways of releasing unused memory back to the - system. Between these two, it is often possible to keep - system-level demands of a long-lived program down to a bare - minimum. For example, in one test suite of sessions measuring - the XF86 X server on Linux, using a trim threshold of 128K and a - mmap threshold of 192K led to near-minimal long term resource - consumption. - - If you are using this malloc in a long-lived program, it should - pay to experiment with these values. As a rough guide, you - might set to a value close to the average size of a process - (program) running on your system. Releasing this much memory - would allow such a process to run in memory. Generally, it's - worth it to tune for trimming rather tham memory mapping when a - program undergoes phases where several large chunks are - allocated and released in ways that can reuse each other's - storage, perhaps mixed with phases where there are no such - chunks at all. And in well-behaved long-lived programs, - controlling release of large blocks via trimming versus mapping - is usually faster. - - However, in most programs, these parameters serve mainly as - protection against the system-level effects of carrying around - massive amounts of unneeded memory. Since frequent calls to - sbrk, mmap, and munmap otherwise degrade performance, the default - parameters are set to relatively high values that serve only as - safeguards. - - The trim value It must be greater than page size to have any useful - effect. To disable trimming completely, you can set to - (unsigned long)(-1) - - Trim settings interact with fastbin (MXFAST) settings: Unless - TRIM_FASTBINS is defined, automatic trimming never takes place upon - freeing a chunk with size less than or equal to MXFAST. Trimming is - instead delayed until subsequent freeing of larger chunks. However, - you can still force an attempted trim by calling malloc_trim. - - Also, trimming is not generally possible in cases where - the main arena is obtained via mmap. - - Note that the trick some people use of mallocing a huge space and - then freeing it at program startup, in an attempt to reserve system - memory, doesn't have the intended effect under automatic trimming, - since that memory will immediately be returned to the system. -*/ - -#define M_TRIM_THRESHOLD -1 - -#ifndef DEFAULT_TRIM_THRESHOLD -#define DEFAULT_TRIM_THRESHOLD (128 * 1024) -#endif - -/* - M_TOP_PAD is the amount of extra `padding' space to allocate or - retain whenever sbrk is called. It is used in two ways internally: - - * When sbrk is called to extend the top of the arena to satisfy - a new malloc request, this much padding is added to the sbrk - request. - - * When malloc_trim is called automatically from free(), - it is used as the `pad' argument. - - In both cases, the actual amount of padding is rounded - so that the end of the arena is always a system page boundary. - - The main reason for using padding is to avoid calling sbrk so - often. Having even a small pad greatly reduces the likelihood - that nearly every malloc request during program start-up (or - after trimming) will invoke sbrk, which needlessly wastes - time. - - Automatic rounding-up to page-size units is normally sufficient - to avoid measurable overhead, so the default is 0. However, in - systems where sbrk is relatively slow, it can pay to increase - this value, at the expense of carrying around more memory than - the program needs. -*/ - -#define M_TOP_PAD -2 - -#ifndef DEFAULT_TOP_PAD -#define DEFAULT_TOP_PAD (0) -#endif - -/* - M_MMAP_THRESHOLD is the request size threshold for using mmap() - to service a request. Requests of at least this size that cannot - be allocated using already-existing space will be serviced via mmap. - (If enough normal freed space already exists it is used instead.) - - Using mmap segregates relatively large chunks of memory so that - they can be individually obtained and released from the host - system. A request serviced through mmap is never reused by any - other request (at least not directly; the system may just so - happen to remap successive requests to the same locations). - - Segregating space in this way has the benefits that: - - 1. Mmapped space can ALWAYS be individually released back - to the system, which helps keep the system level memory - demands of a long-lived program low. - 2. Mapped memory can never become `locked' between - other chunks, as can happen with normally allocated chunks, which - means that even trimming via malloc_trim would not release them. - 3. On some systems with "holes" in address spaces, mmap can obtain - memory that sbrk cannot. - - However, it has the disadvantages that: - - 1. The space cannot be reclaimed, consolidated, and then - used to service later requests, as happens with normal chunks. - 2. It can lead to more wastage because of mmap page alignment - requirements - 3. It causes malloc performance to be more dependent on host - system memory management support routines which may vary in - implementation quality and may impose arbitrary - limitations. Generally, servicing a request via normal - malloc steps is faster than going through a system's mmap. - - The advantages of mmap nearly always outweigh disadvantages for - "large" chunks, but the value of "large" varies across systems. The - default is an empirically derived value that works well in most - systems. -*/ - -#define M_MMAP_THRESHOLD -3 - -#ifndef DEFAULT_MMAP_THRESHOLD -#define DEFAULT_MMAP_THRESHOLD (128 * 1024) -#endif - -/* - M_MMAP_MAX is the maximum number of requests to simultaneously - service using mmap. This parameter exists because - some systems have a limited number of internal tables for - use by mmap, and using more than a few of them may degrade - performance. - - The default is set to a value that serves only as a safeguard. - Setting to 0 disables use of mmap for servicing large requests. If - HAVE_MMAP is not set, the default value is 0, and attempts to set it - to non-zero values in mallopt will fail. -*/ - -#define M_MMAP_MAX -4 - -#ifndef DEFAULT_MMAP_MAX -#if HAVE_MMAP -#define DEFAULT_MMAP_MAX (65536) -#else -#define DEFAULT_MMAP_MAX (0) -#endif -#endif - -#ifdef __cplusplus -} /* end of extern "C" */ -#endif - -#include "malloc.h" - -#ifndef BOUNDED_N -#define BOUNDED_N(ptr, sz) (ptr) -#endif -#ifndef RETURN_ADDRESS -#define RETURN_ADDRESS(X_) (NULL) -#endif - -/* On some platforms we can compile internal, not exported functions better. - Let the environment provide a macro and define it to be empty if it - is not available. */ -#ifndef internal_function -# define internal_function -#endif - -/* Forward declarations. */ -struct malloc_chunk; -typedef struct malloc_chunk* mchunkptr; - -/* Internal routines. */ - -#if __STD_C - -Void_t* _int_malloc(mstate, size_t); -void _int_free(mstate, Void_t*); -Void_t* _int_realloc(mstate, Void_t*, size_t); -Void_t* _int_memalign(mstate, size_t, size_t); -Void_t* _int_valloc(mstate, size_t); -static Void_t* _int_pvalloc(mstate, size_t); -/*static Void_t* cALLOc(size_t, size_t);*/ -static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**); -static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**); -static int mTRIm(size_t); -static size_t mUSABLe(Void_t*); -static int mALLOPt(int, int); - -static Void_t* internal_function mem2mem_check(Void_t *p, size_t sz); -static int internal_function top_check(void); -static void internal_function munmap_chunk(mchunkptr p); -#if HAVE_MREMAP -static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size); -#endif - -static Void_t* malloc_check(size_t sz, const Void_t *caller); -static void free_check(Void_t* mem, const Void_t *caller); -static Void_t* realloc_check(Void_t* oldmem, size_t bytes, - const Void_t *caller); -static Void_t* memalign_check(size_t alignment, size_t bytes, - const Void_t *caller); -#ifndef NO_THREADS -# if USE_STARTER -static Void_t* malloc_starter(size_t sz, const Void_t *caller); -static Void_t* memalign_starter(size_t aln, size_t sz, const Void_t *caller); -static void free_starter(Void_t* mem, const Void_t *caller); -# endif -static Void_t* malloc_atfork(size_t sz, const Void_t *caller); -static void free_atfork(Void_t* mem, const Void_t *caller); -#endif - -#else - -Void_t* _int_malloc(); -void _int_free(); -Void_t* _int_realloc(); -Void_t* _int_memalign(); -Void_t* _int_valloc(); -Void_t* _int_pvalloc(); -/*static Void_t* cALLOc();*/ -static Void_t** _int_icalloc(); -static Void_t** _int_icomalloc(); -static int mTRIm(); -static size_t mUSABLe(); -static int mALLOPt(); - -#endif - - - - -/* ------------- Optional versions of memcopy ---------------- */ - - -#if USE_MEMCPY - -/* - Note: memcpy is ONLY invoked with non-overlapping regions, - so the (usually slower) memmove is not needed. -*/ - -#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes) -#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes) - -#else /* !USE_MEMCPY */ - -/* Use Duff's device for good zeroing/copying performance. */ - -#define MALLOC_ZERO(charp, nbytes) \ -do { \ - INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp); \ - unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ - long mcn; \ - if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ - switch (mctmp) { \ - case 0: for(;;) { *mzp++ = 0; \ - case 7: *mzp++ = 0; \ - case 6: *mzp++ = 0; \ - case 5: *mzp++ = 0; \ - case 4: *mzp++ = 0; \ - case 3: *mzp++ = 0; \ - case 2: *mzp++ = 0; \ - case 1: *mzp++ = 0; if(mcn <= 0) break; mcn--; } \ - } \ -} while(0) - -#define MALLOC_COPY(dest,src,nbytes) \ -do { \ - INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src; \ - INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest; \ - unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ - long mcn; \ - if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ - switch (mctmp) { \ - case 0: for(;;) { *mcdst++ = *mcsrc++; \ - case 7: *mcdst++ = *mcsrc++; \ - case 6: *mcdst++ = *mcsrc++; \ - case 5: *mcdst++ = *mcsrc++; \ - case 4: *mcdst++ = *mcsrc++; \ - case 3: *mcdst++ = *mcsrc++; \ - case 2: *mcdst++ = *mcsrc++; \ - case 1: *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; } \ - } \ -} while(0) - -#endif - -/* ------------------ MMAP support ------------------ */ - - -#if HAVE_MMAP - -#include -#ifndef LACKS_SYS_MMAN_H -#include -#endif - -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -# define MAP_ANONYMOUS MAP_ANON -#endif -#if !defined(MAP_FAILED) -# define MAP_FAILED ((char*)-1) -#endif - -#ifndef MAP_NORESERVE -# ifdef MAP_AUTORESRV -# define MAP_NORESERVE MAP_AUTORESRV -# else -# define MAP_NORESERVE 0 -# endif -#endif - -/* - Nearly all versions of mmap support MAP_ANONYMOUS, - so the following is unlikely to be needed, but is - supplied just in case. -*/ - -#ifndef MAP_ANONYMOUS - -static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ - -#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \ - (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \ - mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) - -#else - -#define MMAP(addr, size, prot, flags) \ - (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0)) - -#endif - - -#endif /* HAVE_MMAP */ - - -/* - ----------------------- Chunk representations ----------------------- -*/ - - -/* - This struct declaration is misleading (but accurate and necessary). - It declares a "view" into memory allowing access to necessary - fields at known offsets from a given base. See explanation below. -*/ - -struct malloc_chunk { - - INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ - INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ - - struct malloc_chunk* fd; /* double links -- used only if free. */ - struct malloc_chunk* bk; -}; - - -/* - malloc_chunk details: - - (The following includes lightly edited explanations by Colin Plumb.) - - Chunks of memory are maintained using a `boundary tag' method as - described in e.g., Knuth or Standish. (See the paper by Paul - Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a - survey of such techniques.) Sizes of free chunks are stored both - in the front of each chunk and at the end. This makes - consolidating fragmented chunks into bigger chunks very fast. The - size fields also hold bits representing whether chunks are free or - in use. - - An allocated chunk looks like this: - - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk, if allocated | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | User data starts here... . - . . - . (malloc_usable_space() bytes) . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Where "chunk" is the front of the chunk for the purpose of most of - the malloc code, but "mem" is the pointer that is returned to the - user. "Nextchunk" is the beginning of the next contiguous chunk. - - Chunks always begin on even word boundries, so the mem portion - (which is returned to the user) is also on an even word boundary, and - thus at least double-word aligned. - - Free chunks are stored in circular doubly-linked lists, and look like this: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space (may be 0 bytes long) . - . . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - The P (PREV_INUSE) bit, stored in the unused low-order bit of the - chunk size (which is always a multiple of two words), is an in-use - bit for the *previous* chunk. If that bit is *clear*, then the - word before the current chunk size contains the previous chunk - size, and can be used to find the front of the previous chunk. - The very first chunk allocated always has this bit set, - preventing access to non-existent (or non-owned) memory. If - prev_inuse is set for any given chunk, then you CANNOT determine - the size of the previous chunk, and might even get a memory - addressing fault when trying to do so. - - Note that the `foot' of the current chunk is actually represented - as the prev_size of the NEXT chunk. This makes it easier to - deal with alignments etc but can be very confusing when trying - to extend or adapt this code. - - The two exceptions to all this are - - 1. The special chunk `top' doesn't bother using the - trailing size field since there is no next contiguous chunk - that would have to index off it. After initialization, `top' - is forced to always exist. If it would become less than - MINSIZE bytes long, it is replenished. - - 2. Chunks allocated via mmap, which have the second-lowest-order - bit (IS_MMAPPED) set in their size fields. Because they are - allocated one-by-one, each must contain its own trailing size field. - -*/ - -/* - ---------- Size and alignment checks and conversions ---------- -*/ - -/* conversion from malloc headers to user pointers, and back */ - -#define chunk2mem(p) ((Void_t*)((char*)(p) + 2*SIZE_SZ)) -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ)) - -/* The smallest possible chunk */ -#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) - -/* The smallest size we can malloc is an aligned minimal chunk */ - -#define MINSIZE \ - (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) - -/* Check if m has acceptable alignment */ - -#define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) - - -/* - Check if a request is so large that it would wrap around zero when - padded and aligned. To simplify some other code, the bound is made - low enough so that adding MINSIZE will also not wrap around zero. -*/ - -#define REQUEST_OUT_OF_RANGE(req) \ - ((unsigned long)(req) >= \ - (unsigned long)(INTERNAL_SIZE_T)(-2 * MINSIZE)) - -/* pad request bytes into a usable size -- internal version */ - -#define request2size(req) \ - (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \ - MINSIZE : \ - ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) - -/* Same, except also perform argument check */ - -#define checked_request2size(req, sz) \ - if (REQUEST_OUT_OF_RANGE(req)) { \ - MALLOC_FAILURE_ACTION; \ - return 0; \ - } \ - (sz) = request2size(req); - -/* - --------------- Physical chunk operations --------------- -*/ - - -/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ -#define PREV_INUSE 0x1 - -/* extract inuse bit of previous chunk */ -#define prev_inuse(p) ((p)->size & PREV_INUSE) - - -/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ -#define IS_MMAPPED 0x2 - -/* check for mmap()'ed chunk */ -#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) - - -/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained - from a non-main arena. This is only set immediately before handing - the chunk to the user, if necessary. */ -#define NON_MAIN_ARENA 0x4 - -/* check for chunk from non-main arena */ -#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA) - - -/* - Bits to mask off when extracting size - - Note: IS_MMAPPED is intentionally not masked off from size field in - macros for which mmapped chunks should never be seen. This should - cause helpful core dumps to occur if it is tried by accident by - people extending or adapting this malloc. -*/ -#define SIZE_BITS (PREV_INUSE|IS_MMAPPED|NON_MAIN_ARENA) - -/* Get size, ignoring use bits */ -#define chunksize(p) ((p)->size & ~(SIZE_BITS)) - - -/* Ptr to next physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) )) - -/* Ptr to previous physical malloc_chunk */ -#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) - -/* Treat space at ptr + offset as a chunk */ -#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) - -/* extract p's inuse bit */ -#define inuse(p)\ -((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE) - -/* set/clear chunk as being inuse without otherwise disturbing */ -#define set_inuse(p)\ -((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE - -#define clear_inuse(p)\ -((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE) - - -/* check/set/clear inuse bits in known places */ -#define inuse_bit_at_offset(p, s)\ - (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) - -#define set_inuse_bit_at_offset(p, s)\ - (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) - -#define clear_inuse_bit_at_offset(p, s)\ - (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) - - -/* Set size at head, without disturbing its use bit */ -#define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s))) - -/* Set size/use field */ -#define set_head(p, s) ((p)->size = (s)) - -/* Set size at footer (only when chunk is not in use) */ -#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) - - -/* - -------------------- Internal data structures -------------------- - - All internal state is held in an instance of malloc_state defined - below. There are no other static variables, except in two optional - cases: - * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. - * If HAVE_MMAP is true, but mmap doesn't support - MAP_ANONYMOUS, a dummy file descriptor for mmap. - - Beware of lots of tricks that minimize the total bookkeeping space - requirements. The result is a little over 1K bytes (for 4byte - pointers and size_t.) -*/ - -/* - Bins - - An array of bin headers for free chunks. Each bin is doubly - linked. The bins are approximately proportionally (log) spaced. - There are a lot of these bins (128). This may look excessive, but - works very well in practice. Most bins hold sizes that are - unusual as malloc request sizes, but are more usual for fragments - and consolidated sets of chunks, which is what these bins hold, so - they can be found quickly. All procedures maintain the invariant - that no consolidated chunk physically borders another one, so each - chunk in a list is known to be preceeded and followed by either - inuse chunks or the ends of memory. - - Chunks in bins are kept in size order, with ties going to the - approximately least recently used chunk. Ordering isn't needed - for the small bins, which all contain the same-sized chunks, but - facilitates best-fit allocation for larger chunks. These lists - are just sequential. Keeping them in order almost never requires - enough traversal to warrant using fancier ordered data - structures. - - Chunks of the same size are linked with the most - recently freed at the front, and allocations are taken from the - back. This results in LRU (FIFO) allocation order, which tends - to give each chunk an equal opportunity to be consolidated with - adjacent freed chunks, resulting in larger free chunks and less - fragmentation. - - To simplify use in double-linked lists, each bin header acts - as a malloc_chunk. This avoids special-casing for headers. - But to conserve space and improve locality, we allocate - only the fd/bk pointers of bins, and then use repositioning tricks - to treat these as the fields of a malloc_chunk*. -*/ - -typedef struct malloc_chunk* mbinptr; - -/* addressing -- note that bin_at(0) does not exist */ -#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1))) - -/* analog of ++bin */ -#define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) - -/* Reminders about list directionality within bins */ -#define first(b) ((b)->fd) -#define last(b) ((b)->bk) - -/* Take a chunk off a bin list */ -#define unlink(P, BK, FD) { \ - FD = P->fd; \ - BK = P->bk; \ - FD->bk = BK; \ - BK->fd = FD; \ -} - -/* - Indexing - - Bins for sizes < 512 bytes contain chunks of all the same size, spaced - 8 bytes apart. Larger bins are approximately logarithmically spaced: - - 64 bins of size 8 - 32 bins of size 64 - 16 bins of size 512 - 8 bins of size 4096 - 4 bins of size 32768 - 2 bins of size 262144 - 1 bin of size what's left - - There is actually a little bit of slop in the numbers in bin_index - for the sake of speed. This makes no difference elsewhere. - - The bins top out around 1MB because we expect to service large - requests via mmap. -*/ - -#define NBINS 128 -#define NSMALLBINS 64 -#define SMALLBIN_WIDTH 8 -#define MIN_LARGE_SIZE 512 - -#define in_smallbin_range(sz) \ - ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) - -#define smallbin_index(sz) (((unsigned)(sz)) >> 3) - -#define largebin_index(sz) \ -(((((unsigned long)(sz)) >> 6) <= 32)? 56 + (((unsigned long)(sz)) >> 6): \ - ((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \ - ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \ - ((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \ - ((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \ - 126) - -#define bin_index(sz) \ - ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) - -/* - FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the - first bin that is maintained in sorted order. This must - be the smallest size corresponding to a given bin. - - Normally, this should be MIN_LARGE_SIZE. But you can weaken - best fit guarantees to sometimes speed up malloc by increasing value. - Doing this means that malloc may choose a chunk that is - non-best-fitting by up to the width of the bin. - - Some useful cutoff values: - 512 - all bins sorted - 2560 - leaves bins <= 64 bytes wide unsorted - 12288 - leaves bins <= 512 bytes wide unsorted - 65536 - leaves bins <= 4096 bytes wide unsorted - 262144 - leaves bins <= 32768 bytes wide unsorted - -1 - no bins sorted (not recommended!) -*/ - -#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE -/* #define FIRST_SORTED_BIN_SIZE 65536 */ - -/* - Unsorted chunks - - All remainders from chunk splits, as well as all returned chunks, - are first placed in the "unsorted" bin. They are then placed - in regular bins after malloc gives them ONE chance to be used before - binning. So, basically, the unsorted_chunks list acts as a queue, - with chunks being placed on it in free (and malloc_consolidate), - and taken off (to be either used or placed in bins) in malloc. - - The NON_MAIN_ARENA flag is never set for unsorted chunks, so it - does not have to be taken into account in size comparisons. -*/ - -/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ -#define unsorted_chunks(M) (bin_at(M, 1)) - -/* - Top - - The top-most available chunk (i.e., the one bordering the end of - available memory) is treated specially. It is never included in - any bin, is used only if no other chunk is available, and is - released back to the system if it is very large (see - M_TRIM_THRESHOLD). Because top initially - points to its own bin with initial zero size, thus forcing - extension on the first malloc request, we avoid having any special - code in malloc to check whether it even exists yet. But we still - need to do so when getting memory from system, so we make - initial_top treat the bin as a legal but unusable chunk during the - interval between initialization and the first call to - sYSMALLOc. (This is somewhat delicate, since it relies on - the 2 preceding words to be zero during this interval as well.) -*/ - -/* Conveniently, the unsorted bin can be used as dummy top on first call */ -#define initial_top(M) (unsorted_chunks(M)) - -/* - Binmap - - To help compensate for the large number of bins, a one-level index - structure is used for bin-by-bin searching. `binmap' is a - bitvector recording whether bins are definitely empty so they can - be skipped over during during traversals. The bits are NOT always - cleared as soon as bins are empty, but instead only - when they are noticed to be empty during traversal in malloc. -*/ - -/* Conservatively use 32 bits per map word, even if on 64bit system */ -#define BINMAPSHIFT 5 -#define BITSPERMAP (1U << BINMAPSHIFT) -#define BINMAPSIZE (NBINS / BITSPERMAP) - -#define idx2block(i) ((i) >> BINMAPSHIFT) -#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) - -#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) -#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) -#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i)) - -/* - Fastbins - - An array of lists holding recently freed small chunks. Fastbins - are not doubly linked. It is faster to single-link them, and - since chunks are never removed from the middles of these lists, - double linking is not necessary. Also, unlike regular bins, they - are not even processed in FIFO order (they use faster LIFO) since - ordering doesn't much matter in the transient contexts in which - fastbins are normally used. - - Chunks in fastbins keep their inuse bit set, so they cannot - be consolidated with other free chunks. malloc_consolidate - releases all chunks in fastbins and consolidates them with - other free chunks. -*/ - -typedef struct malloc_chunk* mfastbinptr; - -/* offset 2 to use otherwise unindexable first 2 bins */ -#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) - -/* The maximum fastbin request size we support */ -#define MAX_FAST_SIZE 80 - -#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) - -/* - FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() - that triggers automatic consolidation of possibly-surrounding - fastbin chunks. This is a heuristic, so the exact value should not - matter too much. It is defined at half the default trim threshold as a - compromise heuristic to only attempt consolidation if it is likely - to lead to trimming. However, it is not dynamically tunable, since - consolidation reduces fragmentation surrounding large chunks even - if trimming is not used. -*/ - -#define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL) - -/* - Since the lowest 2 bits in max_fast don't matter in size comparisons, - they are used as flags. -*/ - -/* - FASTCHUNKS_BIT held in max_fast indicates that there are probably - some fastbin chunks. It is set true on entering a chunk into any - fastbin, and cleared only in malloc_consolidate. - - The truth value is inverted so that have_fastchunks will be true - upon startup (since statics are zero-filled), simplifying - initialization checks. -*/ - -#define FASTCHUNKS_BIT (1U) - -#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT) == 0) -#define clear_fastchunks(M) ((M)->max_fast |= FASTCHUNKS_BIT) -#define set_fastchunks(M) ((M)->max_fast &= ~FASTCHUNKS_BIT) - -/* - NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous - regions. Otherwise, contiguity is exploited in merging together, - when possible, results from consecutive MORECORE calls. - - The initial value comes from MORECORE_CONTIGUOUS, but is - changed dynamically if mmap is ever used as an sbrk substitute. -*/ - -#define NONCONTIGUOUS_BIT (2U) - -#define contiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) == 0) -#define noncontiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) != 0) -#define set_noncontiguous(M) ((M)->max_fast |= NONCONTIGUOUS_BIT) -#define set_contiguous(M) ((M)->max_fast &= ~NONCONTIGUOUS_BIT) - -/* - Set value of max_fast. - Use impossibly small value if 0. - Precondition: there are no existing fastbin chunks. - Setting the value clears fastchunk bit but preserves noncontiguous bit. -*/ - -#define set_max_fast(M, s) \ - (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ - FASTCHUNKS_BIT | \ - ((M)->max_fast & NONCONTIGUOUS_BIT) - - -/* - ----------- Internal state representation and initialization ----------- -*/ - -struct malloc_state { - /* Serialize access. */ - mutex_t mutex; - - /* Statistics for locking. Only used if THREAD_STATS is defined. */ - long stat_lock_direct, stat_lock_loop, stat_lock_wait; - long pad0_[1]; /* try to give the mutex its own cacheline */ - - /* The maximum chunk size to be eligible for fastbin */ - INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ - - /* Fastbins */ - mfastbinptr fastbins[NFASTBINS]; - - /* Base of the topmost chunk -- not otherwise kept in a bin */ - mchunkptr top; - - /* The remainder from the most recent split of a small request */ - mchunkptr last_remainder; - - /* Normal bins packed as described above */ - mchunkptr bins[NBINS * 2]; - - /* Bitmap of bins */ - unsigned int binmap[BINMAPSIZE]; - - /* Linked list */ - struct malloc_state *next; - - /* Memory allocated from the system in this arena. */ - INTERNAL_SIZE_T system_mem; - INTERNAL_SIZE_T max_system_mem; -}; - -struct malloc_par { - /* Tunable parameters */ - unsigned long trim_threshold; - INTERNAL_SIZE_T top_pad; - INTERNAL_SIZE_T mmap_threshold; - - /* Memory map support */ - int n_mmaps; - int n_mmaps_max; - int max_n_mmaps; - - /* Cache malloc_getpagesize */ - unsigned int pagesize; - - /* Statistics */ - INTERNAL_SIZE_T mmapped_mem; - /*INTERNAL_SIZE_T sbrked_mem;*/ - /*INTERNAL_SIZE_T max_sbrked_mem;*/ - INTERNAL_SIZE_T max_mmapped_mem; - INTERNAL_SIZE_T max_total_mem; /* only kept for NO_THREADS */ - - /* First address handed out by MORECORE/sbrk. */ - char* sbrk_base; -}; - -/* There are several instances of this struct ("arenas") in this - malloc. If you are adapting this malloc in a way that does NOT use - a static or mmapped malloc_state, you MUST explicitly zero-fill it - before using. This malloc relies on the property that malloc_state - is initialized to all zeroes (as is true of C statics). */ - -static struct malloc_state main_arena; - -/* There is only one instance of the malloc parameters. */ - -static struct malloc_par mp_; - -/* - Initialize a malloc_state struct. - - This is called only from within malloc_consolidate, which needs - be called in the same contexts anyway. It is never called directly - outside of malloc_consolidate because some optimizing compilers try - to inline it at all call points, which turns out not to be an - optimization at all. (Inlining it in malloc_consolidate is fine though.) -*/ - -#if __STD_C -static void malloc_init_state(mstate av) -#else -static void malloc_init_state(av) mstate av; -#endif -{ - int i; - mbinptr bin; - - /* Establish circular links for normal bins */ - for (i = 1; i < NBINS; ++i) { - bin = bin_at(av,i); - bin->fd = bin->bk = bin; - } - -#if MORECORE_CONTIGUOUS - if (av != &main_arena) -#endif - set_noncontiguous(av); - - set_max_fast(av, DEFAULT_MXFAST); - - av->top = initial_top(av); -} - -/* - Other internal utilities operating on mstates -*/ - -#if __STD_C -static Void_t* sYSMALLOc(INTERNAL_SIZE_T, mstate); -#ifndef MORECORE_CANNOT_TRIM -static int sYSTRIm(size_t, mstate); -#endif -static void malloc_consolidate(mstate); -static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**); -#else -static Void_t* sYSMALLOc(); -#ifndef MORECORE_CANNOT_TRIM -static int sYSTRIm(); -#endif -static void malloc_consolidate(); -static Void_t** iALLOc(); -#endif - - -/* -------------- Early definitions for debugging hooks ---------------- */ - -/* Define and initialize the hook variables. These weak definitions must - appear before any use of the variables in a function (arena.c uses one). */ -#ifndef weak_variable -#ifndef _LIBC -#define weak_variable /**/ -#else -/* In GNU libc we want the hook variables to be weak definitions to - avoid a problem with Emacs. */ -#define weak_variable weak_function -#endif -#endif - -/* OMPI change: these aren't used */ -#if 0 -#if !(USE_STARTER & 2) -# define free_hook_ini NULL -/* Forward declarations. */ -static Void_t* malloc_hook_ini __MALLOC_P ((size_t sz, - const __malloc_ptr_t caller)); -static Void_t* realloc_hook_ini __MALLOC_P ((Void_t* ptr, size_t sz, - const __malloc_ptr_t caller)); -static Void_t* memalign_hook_ini __MALLOC_P ((size_t alignment, size_t sz, - const __malloc_ptr_t caller)); -#else -# define free_hook_ini free_starter -# define malloc_hook_ini malloc_starter -# define realloc_hook_ini NULL -# define memalign_hook_ini memalign_starter -#endif -#endif - -/* OMPI change: we don't want any of these -- we want to use the - underlying allocator's symbols */ -#if 0 -void weak_variable (*__malloc_initialize_hook) __MALLOC_PMT ((void)) = NULL; -void weak_variable (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - const __malloc_ptr_t)) - = free_hook_ini; -__malloc_ptr_t weak_variable (*__malloc_hook) - __MALLOC_PMT ((size_t __size, const __malloc_ptr_t)) = malloc_hook_ini; -__malloc_ptr_t weak_variable (*__realloc_hook) - __MALLOC_PMT ((__malloc_ptr_t __ptr, size_t __size, const __malloc_ptr_t)) - = realloc_hook_ini; -__malloc_ptr_t weak_variable (*__memalign_hook) - __MALLOC_PMT ((size_t __alignment, size_t __size, const __malloc_ptr_t)) - = memalign_hook_ini; -void weak_variable (*__after_morecore_hook) __MALLOC_P ((void)) = NULL; -#endif - - -/* ------------------- Support for multiple arenas -------------------- */ -#include "arena.c" - -/* - Debugging support - - These routines make a number of assertions about the states - of data structures that should be true at all times. If any - are not true, it's very likely that a user program has somehow - trashed memory. (It's also possible that there is a coding error - in malloc. In which case, please report it!) -*/ - -#if ! MALLOC_DEBUG - -#define check_chunk(A,P) -#define check_free_chunk(A,P) -#define check_inuse_chunk(A,P) -#define check_remalloced_chunk(A,P,N) -#define check_malloced_chunk(A,P,N) -#define check_malloc_state(A) - -#else - -#define check_chunk(A,P) do_check_chunk(A,P) -#define check_free_chunk(A,P) do_check_free_chunk(A,P) -#define check_inuse_chunk(A,P) do_check_inuse_chunk(A,P) -#define check_remalloced_chunk(A,P,N) do_check_remalloced_chunk(A,P,N) -#define check_malloced_chunk(A,P,N) do_check_malloced_chunk(A,P,N) -#define check_malloc_state(A) do_check_malloc_state(A) - -/* - Properties of all chunks -*/ - -#if __STD_C -static void do_check_chunk(mstate av, mchunkptr p) -#else -static void do_check_chunk(av, p) mstate av; mchunkptr p; -#endif -{ - unsigned long sz = chunksize(p); - /* min and max possible addresses assuming contiguous allocation */ - char* max_address = (char*)(av->top) + chunksize(av->top); - char* min_address = max_address - av->system_mem; - - if (!chunk_is_mmapped(p)) { - - /* Has legal address ... */ - if (p != av->top) { - if (contiguous(av)) { - assert(((char*)p) >= min_address); - assert(((char*)p + sz) <= ((char*)(av->top))); - } - } - else { - /* top size is always at least MINSIZE */ - assert((unsigned long)(sz) >= MINSIZE); - /* top predecessor always marked inuse */ - assert(prev_inuse(p)); - } - - } - else { -#if HAVE_MMAP - /* address is outside main heap */ - if (contiguous(av) && av->top != initial_top(av)) { - assert(((char*)p) < min_address || ((char*)p) > max_address); - } - /* chunk is page-aligned */ - assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0); - /* mem is aligned */ - assert(aligned_OK(chunk2mem(p))); -#else - /* force an appropriate assert violation if debug set */ - assert(!chunk_is_mmapped(p)); -#endif - } -} - -/* - Properties of free chunks -*/ - -#if __STD_C -static void do_check_free_chunk(mstate av, mchunkptr p) -#else -static void do_check_free_chunk(av, p) mstate av; mchunkptr p; -#endif -{ - INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); - mchunkptr next = chunk_at_offset(p, sz); - - do_check_chunk(av, p); - - /* Chunk must claim to be free ... */ - assert(!inuse(p)); - assert (!chunk_is_mmapped(p)); - - /* Unless a special marker, must have OK fields */ - if ((unsigned long)(sz) >= MINSIZE) - { - assert((sz & MALLOC_ALIGN_MASK) == 0); - assert(aligned_OK(chunk2mem(p))); - /* ... matching footer field */ - assert(next->prev_size == sz); - /* ... and is fully consolidated */ - assert(prev_inuse(p)); - assert (next == av->top || inuse(next)); - - /* ... and has minimally sane links */ - assert(p->fd->bk == p); - assert(p->bk->fd == p); - } - else /* markers are always of size SIZE_SZ */ - assert(sz == SIZE_SZ); -} - -/* - Properties of inuse chunks -*/ - -#if __STD_C -static void do_check_inuse_chunk(mstate av, mchunkptr p) -#else -static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p; -#endif -{ - mchunkptr next; - - do_check_chunk(av, p); - - if (chunk_is_mmapped(p)) - return; /* mmapped chunks have no next/prev */ - - /* Check whether it claims to be in use ... */ - assert(inuse(p)); - - next = next_chunk(p); - - /* ... and is surrounded by OK chunks. - Since more things can be checked with free chunks than inuse ones, - if an inuse chunk borders them and debug is on, it's worth doing them. - */ - if (!prev_inuse(p)) { - /* Note that we cannot even look at prev unless it is not inuse */ - mchunkptr prv = prev_chunk(p); - assert(next_chunk(prv) == p); - do_check_free_chunk(av, prv); - } - - if (next == av->top) { - assert(prev_inuse(next)); - assert(chunksize(next) >= MINSIZE); - } - else if (!inuse(next)) - do_check_free_chunk(av, next); -} - -/* - Properties of chunks recycled from fastbins -*/ - -#if __STD_C -static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) -#else -static void do_check_remalloced_chunk(av, p, s) -mstate av; mchunkptr p; INTERNAL_SIZE_T s; -#endif -{ - INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); - - if (!chunk_is_mmapped(p)) { - assert(av == arena_for_chunk(p)); - if (chunk_non_main_arena(p)) - assert(av != &main_arena); - else - assert(av == &main_arena); - } - - do_check_inuse_chunk(av, p); - - /* Legal size ... */ - assert((sz & MALLOC_ALIGN_MASK) == 0); - assert((unsigned long)(sz) >= MINSIZE); - /* ... and alignment */ - assert(aligned_OK(chunk2mem(p))); - /* chunk is less than MINSIZE more than request */ - assert((long)(sz) - (long)(s) >= 0); - assert((long)(sz) - (long)(s + MINSIZE) < 0); -} - -/* - Properties of nonrecycled chunks at the point they are malloced -*/ - -#if __STD_C -static void do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) -#else -static void do_check_malloced_chunk(av, p, s) -mstate av; mchunkptr p; INTERNAL_SIZE_T s; -#endif -{ - /* same as recycled case ... */ - do_check_remalloced_chunk(av, p, s); - - /* - ... plus, must obey implementation invariant that prev_inuse is - always true of any allocated chunk; i.e., that each allocated - chunk borders either a previously allocated and still in-use - chunk, or the base of its memory arena. This is ensured - by making all allocations from the the `lowest' part of any found - chunk. This does not necessarily hold however for chunks - recycled via fastbins. - */ - - assert(prev_inuse(p)); -} - - -/* - Properties of malloc_state. - - This may be useful for debugging malloc, as well as detecting user - programmer errors that somehow write into malloc_state. - - If you are extending or experimenting with this malloc, you can - probably figure out how to hack this routine to print out or - display chunk addresses, sizes, bins, and other instrumentation. -*/ - -static void do_check_malloc_state(mstate av) -{ - int i; - mchunkptr p; - mchunkptr q; - mbinptr b; - unsigned int binbit; - int empty; - unsigned int idx; - INTERNAL_SIZE_T size; - unsigned long total = 0; - int max_fast_bin; - - /* internal size_t must be no wider than pointer type */ - assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*)); - - /* alignment is a power of 2 */ - assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0); - - /* cannot run remaining checks until fully initialized */ - if (av->top == 0 || av->top == initial_top(av)) - return; - - /* pagesize is a power of 2 */ - assert((mp_.pagesize & (mp_.pagesize-1)) == 0); - - /* A contiguous main_arena is consistent with sbrk_base. */ - if (av == &main_arena && contiguous(av)) - assert((char*)mp_.sbrk_base + av->system_mem == - (char*)av->top + chunksize(av->top)); - - /* properties of fastbins */ - - /* max_fast is in allowed range */ - assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE)); - - max_fast_bin = fastbin_index(av->max_fast); - - for (i = 0; i < NFASTBINS; ++i) { - p = av->fastbins[i]; - - /* all bins past max_fast are empty */ - if (i > max_fast_bin) - assert(p == 0); - - while (p != 0) { - /* each chunk claims to be inuse */ - do_check_inuse_chunk(av, p); - total += chunksize(p); - /* chunk belongs in this bin */ - assert(fastbin_index(chunksize(p)) == i); - p = p->fd; - } - } - - if (total != 0) - assert(have_fastchunks(av)); - else if (!have_fastchunks(av)) - assert(total == 0); - - /* check normal bins */ - for (i = 1; i < NBINS; ++i) { - b = bin_at(av,i); - - /* binmap is accurate (except for bin 1 == unsorted_chunks) */ - if (i >= 2) { - binbit = get_binmap(av,i); - empty = last(b) == b; - if (!binbit) - assert(empty); - else if (!empty) - assert(binbit); - } - - for (p = last(b); p != b; p = p->bk) { - /* each chunk claims to be free */ - do_check_free_chunk(av, p); - size = chunksize(p); - total += size; - if (i >= 2) { - /* chunk belongs in bin */ - idx = bin_index(size); - assert(idx == i); - /* lists are sorted */ - if ((unsigned long) size >= (unsigned long)(FIRST_SORTED_BIN_SIZE)) { - assert(p->bk == b || - (unsigned long)chunksize(p->bk) >= - (unsigned long)chunksize(p)); - } - } - /* chunk is followed by a legal chain of inuse chunks */ - for (q = next_chunk(p); - (q != av->top && inuse(q) && - (unsigned long)(chunksize(q)) >= MINSIZE); - q = next_chunk(q)) - do_check_inuse_chunk(av, q); - } - } - - /* top chunk is OK */ - check_chunk(av, av->top); - - /* sanity checks for statistics */ - -#ifdef NO_THREADS - assert(total <= (unsigned long)(mp_.max_total_mem)); - assert(mp_.n_mmaps >= 0); -#endif - assert(mp_.n_mmaps <= mp_.n_mmaps_max); - assert(mp_.n_mmaps <= mp_.max_n_mmaps); - - assert((unsigned long)(av->system_mem) <= - (unsigned long)(av->max_system_mem)); - - assert((unsigned long)(mp_.mmapped_mem) <= - (unsigned long)(mp_.max_mmapped_mem)); - -#ifdef NO_THREADS - assert((unsigned long)(mp_.max_total_mem) >= - (unsigned long)(mp_.mmapped_mem) + (unsigned long)(av->system_mem)); -#endif -} -#endif - - -/* ----------------- Support for debugging hooks -------------------- */ -#include "hooks.c" - -/* ----------- Routines dealing with system allocation -------------- */ - -/* - sysmalloc handles malloc cases requiring more memory from the system. - On entry, it is assumed that av->top does not have enough - space to service request for nb bytes, thus requiring that av->top - be extended or replaced. -*/ - -#if __STD_C -static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av) -#else -static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; -#endif -{ - mchunkptr old_top; /* incoming value of av->top */ - INTERNAL_SIZE_T old_size; /* its size */ - char* old_end; /* its end address */ - - long size; /* arg to first MORECORE or mmap call */ - char* brk; /* return value from MORECORE */ - - long correction; /* arg to 2nd MORECORE call */ - char* snd_brk; /* 2nd return val */ - - INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ - INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ - char* aligned_brk; /* aligned offset into brk */ - - mchunkptr p; /* the allocated/returned chunk */ - mchunkptr remainder; /* remainder from allocation */ - unsigned long remainder_size; /* its size */ - - unsigned long sum; /* for updating stats */ - - size_t pagemask = mp_.pagesize - 1; - - -#if HAVE_MMAP - - /* - If have mmap, and the request size meets the mmap threshold, and - the system supports mmap, and there are few enough currently - allocated mmapped regions, try to directly map this request - rather than expanding top. - */ - - if ((unsigned long)(nb) >= (unsigned long)(mp_.mmap_threshold) && - (mp_.n_mmaps < mp_.n_mmaps_max)) { - - char* mm; /* return value from mmap call*/ - - /* - Round up size to nearest page. For mmapped chunks, the overhead - is one SIZE_SZ unit larger than for normal chunks, because there - is no following chunk whose prev_size field could be used. - */ - size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask; - - /* Don't try if size wraps around 0 */ - if ((unsigned long)(size) > (unsigned long)(nb)) { - - mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE)); - - if (mm != MAP_FAILED) { - - /* - The offset to the start of the mmapped region is stored - in the prev_size field of the chunk. This allows us to adjust - returned start address to meet alignment requirements here - and in memalign(), and still be able to compute proper - address argument for later munmap in free() and realloc(). - */ - - front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) { - correction = MALLOC_ALIGNMENT - front_misalign; - p = (mchunkptr)(mm + correction); - p->prev_size = correction; - set_head(p, (size - correction) |IS_MMAPPED); - } - else { - p = (mchunkptr)mm; - set_head(p, size|IS_MMAPPED); - } - - /* update statistics */ - - if (++mp_.n_mmaps > mp_.max_n_mmaps) - mp_.max_n_mmaps = mp_.n_mmaps; - - sum = mp_.mmapped_mem += size; - if (sum > (unsigned long)(mp_.max_mmapped_mem)) - mp_.max_mmapped_mem = sum; -#ifdef NO_THREADS - sum += av->system_mem; - if (sum > (unsigned long)(mp_.max_total_mem)) - mp_.max_total_mem = sum; -#endif - - check_chunk(av, p); - - return chunk2mem(p); - } - } - } -#endif - - /* Record incoming configuration of top */ - - old_top = av->top; - old_size = chunksize(old_top); - old_end = (char*)(chunk_at_offset(old_top, old_size)); - - brk = snd_brk = (char*)(MORECORE_FAILURE); - - /* - If not the first time through, we require old_size to be - at least MINSIZE and to have prev_inuse set. - */ - - assert((old_top == initial_top(av) && old_size == 0) || - ((unsigned long) (old_size) >= MINSIZE && - prev_inuse(old_top) && - ((unsigned long)old_end & pagemask) == 0)); - - /* Precondition: not enough current space to satisfy nb request */ - assert((unsigned long)(old_size) < (unsigned long)(nb + MINSIZE)); - - /* Precondition: all fastbins are consolidated */ - assert(!have_fastchunks(av)); - - - if (av != &main_arena) { - - heap_info *old_heap, *heap; - size_t old_heap_size; - - /* First try to extend the current heap. */ - old_heap = heap_for_ptr(old_top); - old_heap_size = old_heap->size; - if (grow_heap(old_heap, MINSIZE + nb - old_size) == 0) { - av->system_mem += old_heap->size - old_heap_size; - arena_mem += old_heap->size - old_heap_size; -#if 0 - if(mmapped_mem + arena_mem + sbrked_mem > max_total_mem) - max_total_mem = mmapped_mem + arena_mem + sbrked_mem; -#endif - set_head(old_top, (((char *)old_heap + old_heap->size) - (char *)old_top) - | PREV_INUSE); - } - else if ((heap = new_heap(nb + (MINSIZE + sizeof(*heap)), mp_.top_pad))) { - /* Use a newly allocated heap. */ - heap->ar_ptr = av; - heap->prev = old_heap; - av->system_mem += heap->size; - arena_mem += heap->size; -#if 0 - if((unsigned long)(mmapped_mem + arena_mem + sbrked_mem) > max_total_mem) - max_total_mem = mmapped_mem + arena_mem + sbrked_mem; -#endif - /* Set up the new top. */ - top(av) = chunk_at_offset(heap, sizeof(*heap)); - set_head(top(av), (heap->size - sizeof(*heap)) | PREV_INUSE); - - /* Setup fencepost and free the old top chunk. */ - /* The fencepost takes at least MINSIZE bytes, because it might - become the top chunk again later. Note that a footer is set - up, too, although the chunk is marked in use. */ - old_size -= MINSIZE; - set_head(chunk_at_offset(old_top, old_size + 2*SIZE_SZ), 0|PREV_INUSE); - if (old_size >= MINSIZE) { - set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE); - set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)); - set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA); - _int_free(av, chunk2mem(old_top)); - } else { - set_head(old_top, (old_size + 2*SIZE_SZ)|PREV_INUSE); - set_foot(old_top, (old_size + 2*SIZE_SZ)); - } - } - - } else { /* av == main_arena */ - - - /* Request enough space for nb + pad + overhead */ - - size = nb + mp_.top_pad + MINSIZE; - - /* - If contiguous, we can subtract out existing space that we hope to - combine with new space. We add it back later only if - we don't actually get contiguous space. - */ - - if (contiguous(av)) - size -= old_size; - - /* - Round to a multiple of page size. - If MORECORE is not contiguous, this ensures that we only call it - with whole-page arguments. And if MORECORE is contiguous and - this is not first time through, this preserves page-alignment of - previous calls. Otherwise, we correct to page-align below. - */ - - size = (size + pagemask) & ~pagemask; - - /* - Don't try to call MORECORE if argument is so big as to appear - negative. Note that since mmap takes size_t arg, it may succeed - below even if we cannot call MORECORE. - */ - - if (size > 0) - brk = (char*)(MORECORE(size)); - - if (brk != (char*)(MORECORE_FAILURE)) { - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - } else { - /* - If have mmap, try using it as a backup when MORECORE fails or - cannot be used. This is worth doing on systems that have "holes" in - address space, so sbrk cannot extend to give contiguous space, but - space is available elsewhere. Note that we ignore mmap max count - and threshold limits, since the space will not be used as a - segregated mmap region. - */ - -#if HAVE_MMAP - /* Cannot merge with old top, so add its size back in */ - if (contiguous(av)) - size = (size + old_size + pagemask) & ~pagemask; - - /* If we are relying on mmap as backup, then use larger units */ - if ((unsigned long)(size) < (unsigned long)(MMAP_AS_MORECORE_SIZE)) - size = MMAP_AS_MORECORE_SIZE; - - /* Don't try if size wraps around 0 */ - if ((unsigned long)(size) > (unsigned long)(nb)) { - - char *mbrk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE)); - - if (mbrk != MAP_FAILED) { - - /* We do not need, and cannot use, another sbrk call to find end */ - brk = mbrk; - snd_brk = brk + size; - - /* - Record that we no longer have a contiguous sbrk region. - After the first time mmap is used as backup, we do not - ever rely on contiguous space since this could incorrectly - bridge regions. - */ - set_noncontiguous(av); - } - } -#endif - } - - if (brk != (char*)(MORECORE_FAILURE)) { - if (mp_.sbrk_base == 0) - mp_.sbrk_base = brk; - av->system_mem += size; - - /* - If MORECORE extends previous space, we can likewise extend top size. - */ - - if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE)) - set_head(old_top, (size + old_size) | PREV_INUSE); - - else if (contiguous(av) && old_size && brk < old_end) { - /* Oops! Someone else killed our space.. Can't touch anything. */ - assert(0); - } - - /* - Otherwise, make adjustments: - - * If the first time through or noncontiguous, we need to call sbrk - just to find out where the end of memory lies. - - * We need to ensure that all returned chunks from malloc will meet - MALLOC_ALIGNMENT - - * If there was an intervening foreign sbrk, we need to adjust sbrk - request size to account for fact that we will not be able to - combine new space with existing space in old_top. - - * Almost all systems internally allocate whole pages at a time, in - which case we might as well use the whole last page of request. - So we allocate enough more memory to hit a page boundary now, - which in turn causes future contiguous calls to page-align. - */ - - else { - front_misalign = 0; - end_misalign = 0; - correction = 0; - aligned_brk = brk; - - /* handle contiguous cases */ - if (contiguous(av)) { - - /* Count foreign sbrk as system_mem. */ - if (old_size) - av->system_mem += brk - old_end; - - /* Guarantee alignment of first new chunk made from this space */ - - front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; - if (front_misalign > 0) { - - /* - Skip over some bytes to arrive at an aligned position. - We don't need to specially mark these wasted front bytes. - They will never be accessed anyway because - prev_inuse of av->top (and any chunk created from its start) - is always true after initialization. - */ - - correction = MALLOC_ALIGNMENT - front_misalign; - aligned_brk += correction; - } - - /* - If this isn't adjacent to existing space, then we will not - be able to merge with old_top space, so must add to 2nd request. - */ - - correction += old_size; - - /* Extend the end address to hit a page boundary */ - end_misalign = (INTERNAL_SIZE_T)(brk + size + correction); - correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign; - - assert(correction >= 0); - snd_brk = (char*)(MORECORE(correction)); - - /* - If can't allocate correction, try to at least find out current - brk. It might be enough to proceed without failing. - - Note that if second sbrk did NOT fail, we assume that space - is contiguous with first sbrk. This is a safe assumption unless - program is multithreaded but doesn't use locks and a foreign sbrk - occurred between our first and second calls. - */ - - if (snd_brk == (char*)(MORECORE_FAILURE)) { - correction = 0; - snd_brk = (char*)(MORECORE(0)); - } else - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - } - - /* handle non-contiguous cases */ - else { - /* MORECORE/mmap must correctly align */ - assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0); - - /* Find out current end of memory */ - if (snd_brk == (char*)(MORECORE_FAILURE)) { - snd_brk = (char*)(MORECORE(0)); - } - } - - /* Adjust top based on results of second sbrk */ - if (snd_brk != (char*)(MORECORE_FAILURE)) { - av->top = (mchunkptr)aligned_brk; - set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE); - av->system_mem += correction; - - /* - If not the first time through, we either have a - gap due to foreign sbrk or a non-contiguous region. Insert a - double fencepost at old_top to prevent consolidation with space - we don't own. These fenceposts are artificial chunks that are - marked as inuse and are in any case too small to use. We need - two to make sizes and alignments work out. - */ - - if (old_size != 0) { - /* - Shrink old_top to insert fenceposts, keeping size a - multiple of MALLOC_ALIGNMENT. We know there is at least - enough space in old_top to do this. - */ - old_size = (old_size - 4*SIZE_SZ) & ~MALLOC_ALIGN_MASK; - set_head(old_top, old_size | PREV_INUSE); - - /* - Note that the following assignments completely overwrite - old_top when old_size was previously MINSIZE. This is - intentional. We need the fencepost, even if old_top otherwise gets - lost. - */ - chunk_at_offset(old_top, old_size )->size = - (2*SIZE_SZ)|PREV_INUSE; - - chunk_at_offset(old_top, old_size + 2*SIZE_SZ)->size = - (2*SIZE_SZ)|PREV_INUSE; - - /* If possible, release the rest. */ - if (old_size >= MINSIZE) { - _int_free(av, chunk2mem(old_top)); - } - - } - } - } - - /* Update statistics */ -#ifdef NO_THREADS - sum = av->system_mem + mp_.mmapped_mem; - if (sum > (unsigned long)(mp_.max_total_mem)) - mp_.max_total_mem = sum; -#endif - - } - - } /* if (av != &main_arena) */ - - if ((unsigned long)av->system_mem > (unsigned long)(av->max_system_mem)) - av->max_system_mem = av->system_mem; - check_malloc_state(av); - - /* finally, do the allocation */ - p = av->top; - size = chunksize(p); - - /* check that one of the above allocation paths succeeded */ - if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(p, nb); - av->top = remainder; - set_head(p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - check_malloced_chunk(av, p, nb); - return chunk2mem(p); - } - - /* catch all failure paths */ - MALLOC_FAILURE_ACTION; - return 0; -} - - -#ifndef MORECORE_CANNOT_TRIM -/* - sYSTRIm is an inverse of sorts to sYSMALLOc. It gives memory back - to the system (via negative arguments to sbrk) if there is unused - memory at the `high' end of the malloc pool. It is called - automatically by free() when top space exceeds the trim - threshold. It is also called by the public malloc_trim routine. It - returns 1 if it actually released any memory, else 0. -*/ - -#if __STD_C -static int sYSTRIm(size_t pad, mstate av) -#else -static int sYSTRIm(pad, av) size_t pad; mstate av; -#endif -{ - long top_size; /* Amount of top-most memory */ - long extra; /* Amount to release */ - long released; /* Amount actually released */ - char* current_brk; /* address returned by pre-check sbrk call */ - char* new_brk; /* address returned by post-check sbrk call */ - size_t pagesz; - - pagesz = mp_.pagesize; - top_size = chunksize(av->top); - - /* Release in pagesize units, keeping at least one page */ - extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz; - - if (extra > 0) { - - /* - Only proceed if end of memory is where we last set it. - This avoids problems if there were foreign sbrk calls. - */ - current_brk = (char*)(MORECORE(0)); - if (current_brk == (char*)(av->top) + top_size) { - - /* - Attempt to release memory. We ignore MORECORE return value, - and instead call again to find out where new end of memory is. - This avoids problems if first call releases less than we asked, - of if failure somehow altered brk value. (We could still - encounter problems if it altered brk in some very bad way, - but the only thing we can do is adjust anyway, which will cause - some downstream failure.) - */ - - MORECORE(-extra); - /* Call the `morecore' hook if necessary. */ - if (__after_morecore_hook) - (*__after_morecore_hook) (); - new_brk = (char*)(MORECORE(0)); - - if (new_brk != (char*)MORECORE_FAILURE) { - released = (long)(current_brk - new_brk); - - if (released != 0) { - /* Success. Adjust top. */ - av->system_mem -= released; - set_head(av->top, (top_size - released) | PREV_INUSE); - check_malloc_state(av); - return 1; - } - } - } - } - return 0; -} -#endif - -#ifdef HAVE_MMAP - -static void -internal_function -#if __STD_C -munmap_chunk(mchunkptr p) -#else -munmap_chunk(p) mchunkptr p; -#endif -{ - INTERNAL_SIZE_T size = chunksize(p); - int ret; - - assert (chunk_is_mmapped(p)); -#if 0 - assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem)); - assert((mp_.n_mmaps > 0)); -#endif - assert(((p->prev_size + size) & (mp_.pagesize-1)) == 0); - - mp_.n_mmaps--; - mp_.mmapped_mem -= (size + p->prev_size); - - ret = munmap((char *)p - p->prev_size, size + p->prev_size); - - /* munmap returns non-zero on failure */ - assert(ret == 0); -} - -#if HAVE_MREMAP - -static mchunkptr -internal_function -#if __STD_C -mremap_chunk(mchunkptr p, size_t new_size) -#else -mremap_chunk(p, new_size) mchunkptr p; size_t new_size; -#endif -{ - size_t page_mask = mp_.pagesize - 1; - INTERNAL_SIZE_T offset = p->prev_size; - INTERNAL_SIZE_T size = chunksize(p); - char *cp; - - assert (chunk_is_mmapped(p)); -#if 0 - assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem)); - assert((mp_.n_mmaps > 0)); -#endif - assert(((size + offset) & (mp_.pagesize-1)) == 0); - - /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */ - new_size = (new_size + offset + SIZE_SZ + page_mask) & ~page_mask; - - /* OMPI change - take pessimistic approach and assume going to move */ - opal_mem_hooks_release_hook((char *)p - offset, size, 1); - cp = (char *)mremap((char *)p - offset, size + offset, new_size, - MREMAP_MAYMOVE); - - if (cp == MAP_FAILED) return 0; - - p = (mchunkptr)(cp + offset); - - assert(aligned_OK(chunk2mem(p))); - - assert((p->prev_size == offset)); - set_head(p, (new_size - offset)|IS_MMAPPED); - - mp_.mmapped_mem -= size + offset; - mp_.mmapped_mem += new_size; - if ((unsigned long)mp_.mmapped_mem > (unsigned long)mp_.max_mmapped_mem) - mp_.max_mmapped_mem = mp_.mmapped_mem; -#ifdef NO_THREADS - if ((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) > - mp_.max_total_mem) - mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem; -#endif - return p; -} - -#endif /* HAVE_MREMAP */ - -#endif /* HAVE_MMAP */ - -/*------------------------ Public wrappers. --------------------------------*/ - -Void_t* -public_mALLOc(size_t bytes) -{ - mstate ar_ptr; - Void_t *victim; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_P ((size_t, __const __malloc_ptr_t)) = - __malloc_hook; - if (hook != NULL) - return (*hook)(bytes, RETURN_ADDRESS (0)); -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.malloc_invoked = true; - - arena_get(ar_ptr, bytes); - if(!ar_ptr) - return 0; - victim = _int_malloc(ar_ptr, bytes); - if(!victim) { - /* Maybe the failure is due to running out of mmapped areas. */ - if(ar_ptr != &main_arena) { - (void)mutex_unlock(&ar_ptr->mutex); - (void)mutex_lock(&main_arena.mutex); - victim = _int_malloc(&main_arena, bytes); - (void)mutex_unlock(&main_arena.mutex); - } else { -#if USE_ARENAS - /* ... or sbrk() has failed and there is still a chance to mmap() */ - ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes); - (void)mutex_unlock(&main_arena.mutex); - if(ar_ptr) { - victim = _int_malloc(ar_ptr, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - } -#endif - } - } else - (void)mutex_unlock(&ar_ptr->mutex); - assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || - ar_ptr == arena_for_chunk(mem2chunk(victim))); - - return victim; -} -#ifdef libc_hidden_def -libc_hidden_def(public_mALLOc) -#endif - -void -public_fREe(Void_t* mem) -{ - mstate ar_ptr; - mchunkptr p; /* chunk corresponding to mem */ - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - void (*hook) __MALLOC_P ((__malloc_ptr_t, __const __malloc_ptr_t)) = - __free_hook; - if (hook != NULL) { - (*hook)(mem, RETURN_ADDRESS (0)); - return; - } -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.free_invoked = true; - - if (mem == 0) /* free(0) has no effect */ - return; - - p = mem2chunk(mem); - -#if HAVE_MMAP - if (chunk_is_mmapped(p)) /* release mmapped memory. */ - { - munmap_chunk(p); - return; - } -#endif - - ar_ptr = arena_for_chunk(p); -#if THREAD_STATS - if(!mutex_trylock(&ar_ptr->mutex)) - ++(ar_ptr->stat_lock_direct); - else { - (void)mutex_lock(&ar_ptr->mutex); - ++(ar_ptr->stat_lock_wait); - } -#else - (void)mutex_lock(&ar_ptr->mutex); -#endif - _int_free(ar_ptr, mem); - (void)mutex_unlock(&ar_ptr->mutex); -} -#ifdef libc_hidden_def -libc_hidden_def (public_fREe) -#endif - -Void_t* -public_rEALLOc(Void_t* oldmem, size_t bytes) -{ - mstate ar_ptr; - INTERNAL_SIZE_T nb; /* padded request size */ - - mchunkptr oldp; /* chunk corresponding to oldmem */ - INTERNAL_SIZE_T oldsize; /* its size */ - - Void_t* newp; /* chunk to return */ - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_P ((__malloc_ptr_t, size_t, - __const __malloc_ptr_t)) = - __realloc_hook; - if (hook != NULL) - return (*hook)(oldmem, bytes, RETURN_ADDRESS (0)); -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.realloc_invoked = true; - -#if REALLOC_ZERO_BYTES_FREES - if (bytes == 0 && oldmem != NULL) { public_fREe(oldmem); return 0; } -#endif - - /* realloc of null is supposed to be same as malloc */ - if (oldmem == 0) return public_mALLOc(bytes); - - oldp = mem2chunk(oldmem); - oldsize = chunksize(oldp); - - checked_request2size(bytes, nb); - -#if HAVE_MMAP - if (chunk_is_mmapped(oldp)) - { - Void_t* newmem; - -#if HAVE_MREMAP - newp = mremap_chunk(oldp, nb); - if(newp) { - return chunk2mem(newp); - } -#endif - /* Note the extra SIZE_SZ overhead. */ - if(oldsize - SIZE_SZ >= nb) { - return oldmem; /* do nothing */ - } - /* Must alloc, copy, free. */ - newmem = public_mALLOc(bytes); - if (newmem == 0) return 0; /* propagate failure */ - MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ); - munmap_chunk(oldp); - return newmem; - } -#endif - - ar_ptr = arena_for_chunk(oldp); -#if THREAD_STATS - if(!mutex_trylock(&ar_ptr->mutex)) - ++(ar_ptr->stat_lock_direct); - else { - (void)mutex_lock(&ar_ptr->mutex); - ++(ar_ptr->stat_lock_wait); - } -#else - (void)mutex_lock(&ar_ptr->mutex); -#endif - -#ifndef NO_THREADS - /* As in malloc(), remember this arena for the next allocation. */ - tsd_setspecific(arena_key, (Void_t *)ar_ptr); -#endif - - newp = _int_realloc(ar_ptr, oldmem, bytes); - - (void)mutex_unlock(&ar_ptr->mutex); - assert(!newp || chunk_is_mmapped(mem2chunk(newp)) || - ar_ptr == arena_for_chunk(mem2chunk(newp))); - return newp; -} -#ifdef libc_hidden_def -libc_hidden_def (public_rEALLOc) -#endif - -Void_t* -public_mEMALIGn(size_t alignment, size_t bytes) -{ - mstate ar_ptr; - Void_t *p; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, size_t, - __const __malloc_ptr_t)) = - __memalign_hook; - if (hook != NULL) - return (*hook)(alignment, bytes, RETURN_ADDRESS (0)); -#endif - - /* OMPI change: put in a flag so that we can know that this function - was invoked. This flag is checked in the memory/linux component - init to ensure that this ptmalloc is actually being used. Used a - simple "extern" here to get the flag symbol rather than putting - it in a new .h file that would only contain a small number of - symbols. */ - mca_memory_linux_component.memalign_invoked = true; - - /* If need less alignment than we give anyway, just relay to malloc */ - if (alignment <= MALLOC_ALIGNMENT) return public_mALLOc(bytes); - - /* Otherwise, ensure that it is at least a minimum chunk size */ - if (alignment < MINSIZE) alignment = MINSIZE; - - arena_get(ar_ptr, bytes + alignment + MINSIZE); - if(!ar_ptr) - return 0; - p = _int_memalign(ar_ptr, alignment, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - if(!p) { - /* Maybe the failure is due to running out of mmapped areas. */ - if(ar_ptr != &main_arena) { - (void)mutex_lock(&main_arena.mutex); - p = _int_memalign(&main_arena, alignment, bytes); - (void)mutex_unlock(&main_arena.mutex); - } else { -#if USE_ARENAS - /* ... or sbrk() has failed and there is still a chance to mmap() */ - ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes); - if(ar_ptr) { - p = _int_memalign(ar_ptr, alignment, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - } -#endif - } - } - assert(!p || chunk_is_mmapped(mem2chunk(p)) || - ar_ptr == arena_for_chunk(mem2chunk(p))); - return p; -} -#ifdef libc_hidden_def -libc_hidden_def (public_mEMALIGn) -#endif - -Void_t* -public_vALLOc(size_t bytes) -{ - mstate ar_ptr; - Void_t *p; - - if(__malloc_initialized < 0) - ptmalloc_init (); - arena_get(ar_ptr, bytes + mp_.pagesize + MINSIZE); - if(!ar_ptr) - return 0; - p = _int_valloc(ar_ptr, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - return p; -} - -Void_t* -public_pVALLOc(size_t bytes) -{ - mstate ar_ptr; - Void_t *p; - - if(__malloc_initialized < 0) - ptmalloc_init (); - arena_get(ar_ptr, bytes + 2*mp_.pagesize + MINSIZE); - p = _int_pvalloc(ar_ptr, bytes); - (void)mutex_unlock(&ar_ptr->mutex); - return p; -} - -Void_t* -public_cALLOc(size_t n, size_t elem_size) -{ - mstate av; - mchunkptr oldtop, p; - INTERNAL_SIZE_T bytes, sz, csz, oldtopsize; - Void_t* mem; - unsigned long clearsize; - unsigned long nclears; - INTERNAL_SIZE_T* d; - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, __const __malloc_ptr_t)) = - __malloc_hook; -#endif - - /* size_t is unsigned so the behavior on overflow is defined. */ - bytes = n * elem_size; -#define HALF_INTERNAL_SIZE_T \ - (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2)) - if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0)) { - if (elem_size != 0 && bytes / elem_size != n) { - MALLOC_FAILURE_ACTION; - return 0; - } - } - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - if (hook != NULL) { - sz = bytes; - mem = (*hook)(sz, RETURN_ADDRESS (0)); - if(mem == 0) - return 0; -#ifdef HAVE_MEMCPY - return memset(mem, 0, sz); -#else - while(sz > 0) ((char*)mem)[--sz] = 0; /* rather inefficient */ - return mem; -#endif - } -#endif - - sz = bytes; - - arena_get(av, sz); - if(!av) - return 0; - - /* Check if we hand out the top chunk, in which case there may be no - need to clear. */ -#if MORECORE_CLEARS - oldtop = top(av); - oldtopsize = chunksize(top(av)); -#if MORECORE_CLEARS < 2 - /* Only newly allocated memory is guaranteed to be cleared. */ - if (av == &main_arena && - oldtopsize < (INTERNAL_SIZE_T)(mp_.sbrk_base + av->max_system_mem - (char *)oldtop)) - oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *)oldtop); -#endif -#endif - mem = _int_malloc(av, sz); - - /* Only clearing follows, so we can unlock early. */ - (void)mutex_unlock(&av->mutex); - - assert(!mem || chunk_is_mmapped(mem2chunk(mem)) || - av == arena_for_chunk(mem2chunk(mem))); - - if (mem == 0) { - /* Maybe the failure is due to running out of mmapped areas. */ - if(av != &main_arena) { - (void)mutex_lock(&main_arena.mutex); - mem = _int_malloc(&main_arena, sz); - (void)mutex_unlock(&main_arena.mutex); - } else { -#if USE_ARENAS - /* ... or sbrk() has failed and there is still a chance to mmap() */ - (void)mutex_lock(&main_arena.mutex); - av = arena_get2(av->next ? av : 0, sz); - (void)mutex_unlock(&main_arena.mutex); - if(av) { - mem = _int_malloc(av, sz); - (void)mutex_unlock(&av->mutex); - } -#endif - } - if (mem == 0) return 0; - } - p = mem2chunk(mem); - - /* Two optional cases in which clearing not necessary */ -#if HAVE_MMAP - if (chunk_is_mmapped(p)) { - return mem; - } -#endif - - csz = chunksize(p); - -#if MORECORE_CLEARS - if (p == oldtop && csz > oldtopsize) { - /* clear only the bytes from non-freshly-sbrked memory */ - csz = oldtopsize; - } -#endif - - /* Unroll clear of <= 36 bytes (72 if 8byte sizes). We know that - contents have an odd number of INTERNAL_SIZE_T-sized words; - minimally 3. */ - d = (INTERNAL_SIZE_T*)mem; - clearsize = csz - SIZE_SZ; - nclears = clearsize / sizeof(INTERNAL_SIZE_T); - assert(nclears >= 3); - - if (nclears > 9) - MALLOC_ZERO(d, clearsize); - - else { - *(d+0) = 0; - *(d+1) = 0; - *(d+2) = 0; - if (nclears > 4) { - *(d+3) = 0; - *(d+4) = 0; - if (nclears > 6) { - *(d+5) = 0; - *(d+6) = 0; - if (nclears > 8) { - *(d+7) = 0; - *(d+8) = 0; - } - } - } - } - - return mem; -} - -Void_t** -public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks) -{ - mstate ar_ptr; - Void_t** m; - - arena_get(ar_ptr, n*elem_size); - if(!ar_ptr) - return 0; - - m = _int_icalloc(ar_ptr, n, elem_size, chunks); - (void)mutex_unlock(&ar_ptr->mutex); - return m; -} - -Void_t** -public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks) -{ - mstate ar_ptr; - Void_t** m; - - arena_get(ar_ptr, 0); - if(!ar_ptr) - return 0; - - m = _int_icomalloc(ar_ptr, n, sizes, chunks); - (void)mutex_unlock(&ar_ptr->mutex); - return m; -} - -#ifndef _LIBC - -void -public_cFREe(Void_t* m) -{ - public_fREe(m); -} - -#endif /* _LIBC */ - -int -public_mTRIm(size_t s) -{ - int result; - - (void)mutex_lock(&main_arena.mutex); - result = mTRIm(s); - (void)mutex_unlock(&main_arena.mutex); - return result; -} - -size_t -public_mUSABLe(Void_t* m) -{ - size_t result; - - result = mUSABLe(m); - return result; -} - -/* This exists mainly for backward compatibility. Calling - _int_get_arena_info() directly is more useful. */ -struct mallinfo -public_mALLINFo() -{ - struct malloc_arena_info mai; - struct mallinfo m; - size_t avail; - - if(__malloc_initialized < 0) - ptmalloc_init (); - _int_get_arena_info(&main_arena, &mai); - /* Account for top */ - avail = mai.fastavail + mai.binavail + mai.top_size; - m.smblks = mai.nfastblocks; - m.ordblks = mai.nbinblocks + 1; - m.fordblks = avail; - m.uordblks = mai.system_mem - avail; - m.arena = mai.system_mem; - m.hblks = mp_.n_mmaps; - m.hblkhd = mp_.mmapped_mem; - m.fsmblks = mai.fastavail; - m.keepcost = mai.top_size; - m.usmblks = mp_.max_total_mem; - return m; -} - -int -public_mALLOPt(int p, int v) -{ - int result; - result = mALLOPt(p, v); - return result; -} - -/* - ------------------------------ malloc ------------------------------ -*/ - -/* With Intel Composer XE V12.1.0, release 2011.6.233, any launch */ -/* fails, even before main(), due to a bug in the vectorizer (see */ -/* https://svn.open-mpi.org/trac/ompi/changeset/25290). The fix is */ -/* to disable vectorization by reducing the optimization level to */ -/* -O1 for _int_malloc(). The only reliable method to identify */ -/* release 2011.6.233 is the predefined __INTEL_COMPILER_BUILD_DATE */ -/* macro, which will have the value 20110811 (Linux, Windows, and */ -/* Mac OS X). (The predefined __INTEL_COMPILER macro is nonsense, */ -/* 9999, and both the 2011.6.233 and 2011.7.256 releases identify */ -/* themselves as V12.1.0 from the -v command line option.) */ - -#ifdef __INTEL_COMPILER_BUILD_DATE -# if __INTEL_COMPILER_BUILD_DATE == 20110811 -# pragma GCC optimization_level 1 -# endif -#endif - -Void_t* -_int_malloc(mstate av, size_t bytes) -{ - INTERNAL_SIZE_T nb; /* normalized request size */ - unsigned int idx; /* associated bin index */ - mbinptr bin; /* associated bin */ - mfastbinptr* fb; /* associated fastbin */ - - mchunkptr victim; /* inspected/selected chunk */ - INTERNAL_SIZE_T size; /* its size */ - int victim_index; /* its bin index */ - - mchunkptr remainder; /* remainder from a split */ - unsigned long remainder_size; /* its size */ - - unsigned int block; /* bit map traverser */ - unsigned int bit; /* bit map traverser */ - unsigned int map; /* current word of binmap */ - - mchunkptr fwd; /* misc temp for linking */ - mchunkptr bck; /* misc temp for linking */ - - /* - Convert request size to internal form by adding SIZE_SZ bytes - overhead plus possibly more to obtain necessary alignment and/or - to obtain a size of at least MINSIZE, the smallest allocatable - size. Also, checked_request2size traps (returning 0) request sizes - that are so large that they wrap around zero when padded and - aligned. - */ - - checked_request2size(bytes, nb); - - /* - If the size qualifies as a fastbin, first check corresponding bin. - This code is safe to execute even if av is not yet initialized, so we - can try it without checking, which saves some time on this fast path. - */ - - if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) { - fb = &(av->fastbins[(fastbin_index(nb))]); - if ( (victim = *fb) != 0) { - *fb = victim->fd; - check_remalloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - - /* - If a small request, check regular bin. Since these "smallbins" - hold one size each, no searching within bins is necessary. - (For a large request, we need to wait until unsorted chunks are - processed to find best fit. But for small ones, fits are exact - anyway, so we can check now, which is faster.) - */ - - if (in_smallbin_range(nb)) { - idx = smallbin_index(nb); - bin = bin_at(av,idx); - - if ( (victim = last(bin)) != bin) { - if (victim == 0) /* initialization check */ - malloc_consolidate(av); - else { - bck = victim->bk; - set_inuse_bit_at_offset(victim, nb); - bin->bk = bck; - bck->fd = bin; - - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - } - - /* - If this is a large request, consolidate fastbins before continuing. - While it might look excessive to kill all fastbins before - even seeing if there is space available, this avoids - fragmentation problems normally associated with fastbins. - Also, in practice, programs tend to have runs of either small or - large requests, but less often mixtures, so consolidation is not - invoked all that often in most programs. And the programs that - it is called frequently in otherwise tend to fragment. - */ - - else { - idx = largebin_index(nb); - if (have_fastchunks(av)) - malloc_consolidate(av); - } - - /* - Process recently freed or remaindered chunks, taking one only if - it is exact fit, or, if this a small request, the chunk is remainder from - the most recent non-exact fit. Place other traversed chunks in - bins. Note that this step is the only place in any routine where - chunks are placed in bins. - - The outer loop here is needed because we might not realize until - near the end of malloc that we should have consolidated, so must - do so and retry. This happens at most once, and only when we would - otherwise need to expand memory to service a "small" request. - */ - - for(;;) { - - while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { - bck = victim->bk; - size = chunksize(victim); - - /* - If a small request, try to use last remainder if it is the - only chunk in unsorted bin. This helps promote locality for - runs of consecutive small requests. This is the only - exception to best-fit, and applies only when there is - no exact fit for a small chunk. - */ - - if (in_smallbin_range(nb) && - bck == unsorted_chunks(av) && - victim == av->last_remainder && - (unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { - - /* split and reattach remainder */ - remainder_size = size - nb; - remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - av->last_remainder = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* remove from unsorted list */ - unsorted_chunks(av)->bk = bck; - bck->fd = unsorted_chunks(av); - - /* Take now instead of binning if exact fit */ - - if (size == nb) { - set_inuse_bit_at_offset(victim, size); - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* place chunk in bin */ - - if (in_smallbin_range(size)) { - victim_index = smallbin_index(size); - bck = bin_at(av, victim_index); - fwd = bck->fd; - } - else { - victim_index = largebin_index(size); - bck = bin_at(av, victim_index); - fwd = bck->fd; - - if (fwd != bck) { - /* if smaller than smallest, place first */ - assert((bck->bk->size & NON_MAIN_ARENA) == 0); - if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) { - fwd = bck; - bck = bck->bk; - } - else if ((unsigned long)(size) >= - (unsigned long)(FIRST_SORTED_BIN_SIZE)) { - - /* maintain large bins in sorted order */ - size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ - assert((fwd->size & NON_MAIN_ARENA) == 0); - while ((unsigned long)(size) < (unsigned long)(fwd->size)) { - fwd = fwd->fd; - assert((fwd->size & NON_MAIN_ARENA) == 0); - } - bck = fwd->bk; - } - } - } - - mark_bin(av, victim_index); - victim->bk = bck; - victim->fd = fwd; - fwd->bk = victim; - bck->fd = victim; - } - - /* - If a large request, scan through the chunks of current bin in - sorted order to find smallest that fits. This is the only step - where an unbounded number of chunks might be scanned without doing - anything useful with them. However the lists tend to be short. - */ - - if (!in_smallbin_range(nb)) { - bin = bin_at(av, idx); - - for (victim = last(bin); victim != bin; victim = victim->bk) { - size = chunksize(victim); - - if ((unsigned long)(size) >= (unsigned long)(nb)) { - remainder_size = size - nb; - unlink(victim, bck, fwd); - - /* Exhaust */ - if (remainder_size < MINSIZE) { - set_inuse_bit_at_offset(victim, size); - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - /* Split */ - else { - remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - } - } - - /* - Search for a chunk by scanning bins, starting with next largest - bin. This search is strictly by best-fit; i.e., the smallest - (with ties going to approximately the least recently used) chunk - that fits is selected. - - The bitmap avoids needing to check that most blocks are nonempty. - The particular case of skipping all bins during warm-up phases - when no chunks have been returned yet is faster than it might look. - */ - - ++idx; - bin = bin_at(av,idx); - block = idx2block(idx); - map = av->binmap[block]; - bit = idx2bit(idx); - - for (;;) { - - /* Skip rest of block if there are no more set bits in this block. */ - if (bit > map || bit == 0) { - do { - if (++block >= BINMAPSIZE) /* out of bins */ - goto use_top; - } while ( (map = av->binmap[block]) == 0); - - bin = bin_at(av, (block << BINMAPSHIFT)); - bit = 1; - } - - /* Advance to bin with set bit. There must be one. */ - while ((bit & map) == 0) { - bin = next_bin(bin); - bit <<= 1; - assert(bit != 0); - } - - /* Inspect the bin. It is likely to be non-empty */ - victim = last(bin); - - /* If a false alarm (empty bin), clear the bit. */ - if (victim == bin) { - av->binmap[block] = map &= ~bit; /* Write through */ - bin = next_bin(bin); - bit <<= 1; - } - - else { - size = chunksize(victim); - - /* We know the first chunk in this bin is big enough to use. */ - assert((unsigned long)(size) >= (unsigned long)(nb)); - - remainder_size = size - nb; - - /* unlink */ - bck = victim->bk; - bin->bk = bck; - bck->fd = bin; - - /* Exhaust */ - if (remainder_size < MINSIZE) { - set_inuse_bit_at_offset(victim, size); - if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* Split */ - else { - remainder = chunk_at_offset(victim, nb); - - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); - /* advertise as last remainder */ - if (in_smallbin_range(nb)) - av->last_remainder = remainder; - - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - set_foot(remainder, remainder_size); - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - } - } - - use_top: - /* - If large enough, split off the chunk bordering the end of memory - (held in av->top). Note that this is in accord with the best-fit - search rule. In effect, av->top is treated as larger (and thus - less well fitting) than any other available chunk since it can - be extended to be as large as necessary (up to system - limitations). - - We require that av->top always exists (i.e., has size >= - MINSIZE) after initialization, so if it would otherwise be - exhuasted by current request, it is replenished. (The main - reason for ensuring it exists is that we may need MINSIZE space - to put in fenceposts in sysmalloc.) - */ - - victim = av->top; - size = chunksize(victim); - - if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(victim, nb); - av->top = remainder; - set_head(victim, nb | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE); - - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); - } - - /* - If there is space available in fastbins, consolidate and retry, - to possibly avoid expanding memory. This can occur only if nb is - in smallbin range so we didn't consolidate upon entry. - */ - - else if (have_fastchunks(av)) { - assert(in_smallbin_range(nb)); - malloc_consolidate(av); - idx = smallbin_index(nb); /* restore original bin index */ - } - - /* - Otherwise, relay to handle system-dependent cases - */ - else - return sYSMALLOc(nb, av); - } -} - -/* - ------------------------------ free ------------------------------ -*/ - -void -_int_free(mstate av, Void_t* mem) -{ - mchunkptr p; /* chunk corresponding to mem */ - INTERNAL_SIZE_T size; /* its size */ - mfastbinptr* fb; /* associated fastbin */ - mchunkptr nextchunk; /* next contiguous chunk */ - INTERNAL_SIZE_T nextsize; /* its size */ - int nextinuse; /* true if nextchunk is used */ - INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ - mchunkptr bck; /* misc temp for linking */ - mchunkptr fwd; /* misc temp for linking */ - - - /* free(0) has no effect */ - if (mem != 0) { - p = mem2chunk(mem); - size = chunksize(p); - - check_inuse_chunk(av, p); - - /* - If eligible, place chunk on a fastbin so it can be found - and used quickly in malloc. - */ - - if ((unsigned long)(size) <= (unsigned long)(av->max_fast) - -#if TRIM_FASTBINS - /* - If TRIM_FASTBINS set, don't place chunks - bordering top into fastbins - */ - && (chunk_at_offset(p, size) != av->top) -#endif - ) { - - set_fastchunks(av); - fb = &(av->fastbins[fastbin_index(size)]); - p->fd = *fb; - *fb = p; - } - - /* - Consolidate other non-mmapped chunks as they arrive. - */ - - else if (!chunk_is_mmapped(p)) { - nextchunk = chunk_at_offset(p, size); - nextsize = chunksize(nextchunk); - assert(nextsize > 0); - - /* consolidate backward */ - if (!prev_inuse(p)) { - prevsize = p->prev_size; - size += prevsize; - p = chunk_at_offset(p, -((long) prevsize)); - unlink(p, bck, fwd); - } - - if (nextchunk != av->top) { - /* get and clear inuse bit */ - nextinuse = inuse_bit_at_offset(nextchunk, nextsize); - - /* consolidate forward */ - if (!nextinuse) { - unlink(nextchunk, bck, fwd); - size += nextsize; - } else - clear_inuse_bit_at_offset(nextchunk, 0); - - /* - Place the chunk in unsorted chunk list. Chunks are - not placed into regular bins until after they have - been given one chance to be used in malloc. - */ - - bck = unsorted_chunks(av); - fwd = bck->fd; - p->bk = bck; - p->fd = fwd; - bck->fd = p; - fwd->bk = p; - - set_head(p, size | PREV_INUSE); - set_foot(p, size); - - check_free_chunk(av, p); - } - - /* - If the chunk borders the current high end of memory, - consolidate into top - */ - - else { - size += nextsize; - set_head(p, size | PREV_INUSE); - av->top = p; - check_chunk(av, p); - } - - /* - If freeing a large space, consolidate possibly-surrounding - chunks. Then, if the total unused topmost memory exceeds trim - threshold, ask malloc_trim to reduce top. - - Unless max_fast is 0, we don't know if there are fastbins - bordering top, so we cannot tell for sure whether threshold - has been reached unless fastbins are consolidated. But we - don't want to consolidate on each free. As a compromise, - consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD - is reached. - */ - - if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { - if (have_fastchunks(av)) - malloc_consolidate(av); - - if (av == &main_arena) { -#ifndef MORECORE_CANNOT_TRIM - if ((unsigned long)(chunksize(av->top)) >= - (unsigned long)(mp_.trim_threshold)) - sYSTRIm(mp_.top_pad, av); -#endif - } else { - /* Always try heap_trim(), even if the top chunk is not - large, because the corresponding heap might go away. */ - heap_info *heap = heap_for_ptr(top(av)); - - assert(heap->ar_ptr == av); - heap_trim(heap, mp_.top_pad); - } - } - - } - /* - If the chunk was allocated via mmap, release via munmap(). Note - that if HAVE_MMAP is false but chunk_is_mmapped is true, then - user must have overwritten memory. There's nothing we can do to - catch this error unless MALLOC_DEBUG is set, in which case - check_inuse_chunk (above) will have triggered error. - */ - - else { -#if HAVE_MMAP - int ret; - INTERNAL_SIZE_T offset = p->prev_size; - mp_.n_mmaps--; - mp_.mmapped_mem -= (size + offset); - ret = munmap((char*)p - offset, size + offset); - /* munmap returns non-zero on failure */ - assert(ret == 0); -#endif - } - } -} - -/* - ------------------------- malloc_consolidate ------------------------- - - malloc_consolidate is a specialized version of free() that tears - down chunks held in fastbins. Free itself cannot be used for this - purpose since, among other things, it might place chunks back onto - fastbins. So, instead, we need to use a minor variant of the same - code. - - Also, because this routine needs to be called the first time through - malloc anyway, it turns out to be the perfect place to trigger - initialization code. -*/ - -#if __STD_C -static void malloc_consolidate(mstate av) -#else -static void malloc_consolidate(av) mstate av; -#endif -{ - mfastbinptr* fb; /* current fastbin being consolidated */ - mfastbinptr* maxfb; /* last fastbin (for loop control) */ - mchunkptr p; /* current chunk being consolidated */ - mchunkptr nextp; /* next chunk to consolidate */ - mchunkptr unsorted_bin; /* bin header */ - mchunkptr first_unsorted; /* chunk to link to */ - - /* These have same use as in free() */ - mchunkptr nextchunk; - INTERNAL_SIZE_T size; - INTERNAL_SIZE_T nextsize; - INTERNAL_SIZE_T prevsize; - int nextinuse; - mchunkptr bck; - mchunkptr fwd; - - /* - If max_fast is 0, we know that av hasn't - yet been initialized, in which case do so below - */ - - if (av->max_fast != 0) { - clear_fastchunks(av); - - unsorted_bin = unsorted_chunks(av); - - /* - Remove each chunk from fast bin and consolidate it, placing it - then in unsorted bin. Among other reasons for doing this, - placing in unsorted bin avoids needing to calculate actual bins - until malloc is sure that chunks aren't immediately going to be - reused anyway. - */ - - maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); - fb = &(av->fastbins[0]); - do { - if ( (p = *fb) != 0) { - *fb = 0; - - do { - check_inuse_chunk(av, p); - nextp = p->fd; - - /* Slightly streamlined version of consolidation code in free() */ - size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); - nextchunk = chunk_at_offset(p, size); - nextsize = chunksize(nextchunk); - - if (!prev_inuse(p)) { - prevsize = p->prev_size; - size += prevsize; - p = chunk_at_offset(p, -((long) prevsize)); - unlink(p, bck, fwd); - } - - if (nextchunk != av->top) { - nextinuse = inuse_bit_at_offset(nextchunk, nextsize); - - if (!nextinuse) { - size += nextsize; - unlink(nextchunk, bck, fwd); - } else - clear_inuse_bit_at_offset(nextchunk, 0); - - first_unsorted = unsorted_bin->fd; - unsorted_bin->fd = p; - first_unsorted->bk = p; - - set_head(p, size | PREV_INUSE); - p->bk = unsorted_bin; - p->fd = first_unsorted; - set_foot(p, size); - } - - else { - size += nextsize; - set_head(p, size | PREV_INUSE); - av->top = p; - } - - } while ( (p = nextp) != 0); - - } - } while (fb++ != maxfb); - } - else { - malloc_init_state(av); - check_malloc_state(av); - } -} - -/* - ------------------------------ realloc ------------------------------ -*/ - -Void_t* -_int_realloc(mstate av, Void_t* oldmem, size_t bytes) -{ - INTERNAL_SIZE_T nb; /* padded request size */ - - mchunkptr oldp; /* chunk corresponding to oldmem */ - INTERNAL_SIZE_T oldsize; /* its size */ - - mchunkptr newp; /* chunk to return */ - INTERNAL_SIZE_T newsize; /* its size */ - Void_t* newmem; /* corresponding user mem */ - - mchunkptr next; /* next contiguous chunk after oldp */ - - mchunkptr remainder; /* extra space at end of newp */ - unsigned long remainder_size; /* its size */ - - mchunkptr bck; /* misc temp for linking */ - mchunkptr fwd; /* misc temp for linking */ - - unsigned long copysize; /* bytes to copy */ - unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */ - INTERNAL_SIZE_T* s; /* copy source */ - INTERNAL_SIZE_T* d; /* copy destination */ - - -#if REALLOC_ZERO_BYTES_FREES - if (bytes == 0) { - _int_free(av, oldmem); - return 0; - } -#endif - - /* realloc of null is supposed to be same as malloc */ - if (oldmem == 0) return _int_malloc(av, bytes); - - checked_request2size(bytes, nb); - - oldp = mem2chunk(oldmem); - oldsize = chunksize(oldp); - - check_inuse_chunk(av, oldp); - - if (!chunk_is_mmapped(oldp)) { - - if ((unsigned long)(oldsize) >= (unsigned long)(nb)) { - /* already big enough; split below */ - newp = oldp; - newsize = oldsize; - } - - else { - next = chunk_at_offset(oldp, oldsize); - - /* Try to expand forward into top */ - if (next == av->top && - (unsigned long)(newsize = oldsize + chunksize(next)) >= - (unsigned long)(nb + MINSIZE)) { - set_head_size(oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0)); - av->top = chunk_at_offset(oldp, nb); - set_head(av->top, (newsize - nb) | PREV_INUSE); - check_inuse_chunk(av, oldp); - return chunk2mem(oldp); - } - - /* Try to expand forward into next chunk; split off remainder below */ - else if (next != av->top && - !inuse(next) && - (unsigned long)(newsize = oldsize + chunksize(next)) >= - (unsigned long)(nb)) { - newp = oldp; - unlink(next, bck, fwd); - } - - /* allocate, copy, free */ - else { - newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); - if (newmem == 0) - return 0; /* propagate failure */ - - newp = mem2chunk(newmem); - newsize = chunksize(newp); - - /* - Avoid copy if newp is next chunk after oldp. - */ - if (newp == next) { - newsize += oldsize; - newp = oldp; - } - else { - /* - Unroll copy of <= 36 bytes (72 if 8byte sizes) - We know that contents have an odd number of - INTERNAL_SIZE_T-sized words; minimally 3. - */ - - copysize = oldsize - SIZE_SZ; - s = (INTERNAL_SIZE_T*)(oldmem); - d = (INTERNAL_SIZE_T*)(newmem); - ncopies = copysize / sizeof(INTERNAL_SIZE_T); - assert(ncopies >= 3); - - if (ncopies > 9) - MALLOC_COPY(d, s, copysize); - - else { - *(d+0) = *(s+0); - *(d+1) = *(s+1); - *(d+2) = *(s+2); - if (ncopies > 4) { - *(d+3) = *(s+3); - *(d+4) = *(s+4); - if (ncopies > 6) { - *(d+5) = *(s+5); - *(d+6) = *(s+6); - if (ncopies > 8) { - *(d+7) = *(s+7); - *(d+8) = *(s+8); - } - } - } - } - - _int_free(av, oldmem); - check_inuse_chunk(av, newp); - return chunk2mem(newp); - } - } - } - - /* If possible, free extra space in old or extended chunk */ - - assert((unsigned long)(newsize) >= (unsigned long)(nb)); - - remainder_size = newsize - nb; - - if (remainder_size < MINSIZE) { /* not enough extra to split off */ - set_head_size(newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_inuse_bit_at_offset(newp, newsize); - } - else { /* split remainder */ - remainder = chunk_at_offset(newp, nb); - set_head_size(newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head(remainder, remainder_size | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - /* Mark remainder as inuse so free() won't complain */ - set_inuse_bit_at_offset(remainder, remainder_size); - _int_free(av, chunk2mem(remainder)); - } - - check_inuse_chunk(av, newp); - return chunk2mem(newp); - } - - /* - Handle mmap cases - */ - - else { -#if HAVE_MMAP - -#if HAVE_MREMAP - INTERNAL_SIZE_T offset = oldp->prev_size; - size_t pagemask = mp_.pagesize - 1; - char *cp; - unsigned long sum; - - /* Note the extra SIZE_SZ overhead */ - newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask; - - /* don't need to remap if still within same page */ - if (oldsize == newsize - offset) - return oldmem; - - /* OMPI change - take pessimistic approach and assume going to move */ - opal_mem_hooks_release_hook((char *)oldp - offset, oldsize + offset, 1); - cp = (char*)mremap((char*)oldp - offset, oldsize + offset, newsize, 1); - - if (cp != MAP_FAILED) { - - newp = (mchunkptr)(cp + offset); - set_head(newp, (newsize - offset)|IS_MMAPPED); - - assert(aligned_OK(chunk2mem(newp))); - assert((newp->prev_size == offset)); - - /* update statistics */ - sum = mp_.mmapped_mem += newsize - oldsize; - if (sum > (unsigned long)(mp_.max_mmapped_mem)) - mp_.max_mmapped_mem = sum; -#ifdef NO_THREADS - sum += main_arena.system_mem; - if (sum > (unsigned long)(mp_.max_total_mem)) - mp_.max_total_mem = sum; -#endif - - return chunk2mem(newp); - } -#endif - - /* Note the extra SIZE_SZ overhead. */ - if ((unsigned long)(oldsize) >= (unsigned long)(nb + SIZE_SZ)) - newmem = oldmem; /* do nothing */ - else { - /* Must alloc, copy, free. */ - newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); - if (newmem != 0) { - MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ); - _int_free(av, oldmem); - } - } - return newmem; - -#else - /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ - check_malloc_state(av); - MALLOC_FAILURE_ACTION; - return 0; -#endif - } -} - -/* - ------------------------------ memalign ------------------------------ -*/ - -Void_t* -_int_memalign(mstate av, size_t alignment, size_t bytes) -{ - INTERNAL_SIZE_T nb; /* padded request size */ - char* m; /* memory returned by malloc call */ - mchunkptr p; /* corresponding chunk */ - char* brk; /* alignment point within p */ - mchunkptr newp; /* chunk to return */ - INTERNAL_SIZE_T newsize; /* its size */ - INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ - mchunkptr remainder; /* spare room at end to split off */ - unsigned long remainder_size; /* its size */ - INTERNAL_SIZE_T size; - - /* If need less alignment than we give anyway, just relay to malloc */ - - if (alignment <= MALLOC_ALIGNMENT) return _int_malloc(av, bytes); - - /* Otherwise, ensure that it is at least a minimum chunk size */ - - if (alignment < MINSIZE) alignment = MINSIZE; - - /* Make sure alignment is power of 2 (in case MINSIZE is not). */ - if ((alignment & (alignment - 1)) != 0) { - size_t a = MALLOC_ALIGNMENT * 2; - while ((unsigned long)a < (unsigned long)alignment) a <<= 1; - alignment = a; - } - - checked_request2size(bytes, nb); - - /* - Strategy: find a spot within that chunk that meets the alignment - request, and then possibly free the leading and trailing space. - */ - - - /* Call malloc with worst case padding to hit alignment. */ - - m = (char*)(_int_malloc(av, nb + alignment + MINSIZE)); - - if (m == 0) return 0; /* propagate failure */ - - p = mem2chunk(m); - - if ((((unsigned long)(m)) % alignment) != 0) { /* misaligned */ - - /* - Find an aligned spot inside chunk. Since we need to give back - leading space in a chunk of at least MINSIZE, if the first - calculation places us at a spot with less than MINSIZE leader, - we can move to the next aligned spot -- we've allocated enough - total room so that this is always possible. - */ - - brk = (char*)mem2chunk(((unsigned long)(m + alignment - 1)) & - -((signed long) alignment)); - if ((unsigned long)(brk - (char*)(p)) < MINSIZE) - brk += alignment; - - newp = (mchunkptr)brk; - leadsize = brk - (char*)(p); - newsize = chunksize(p) - leadsize; - - /* For mmapped chunks, just adjust offset */ - if (chunk_is_mmapped(p)) { - newp->prev_size = p->prev_size + leadsize; - set_head(newp, newsize|IS_MMAPPED); - return chunk2mem(newp); - } - - /* Otherwise, give back leader, use the rest */ - set_head(newp, newsize | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_inuse_bit_at_offset(newp, newsize); - set_head_size(p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); - _int_free(av, chunk2mem(p)); - p = newp; - - assert (newsize >= nb && - (((unsigned long)(chunk2mem(p))) % alignment) == 0); - } - - /* Also give back spare room at the end */ - if (!chunk_is_mmapped(p)) { - size = chunksize(p); - if ((unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { - remainder_size = size - nb; - remainder = chunk_at_offset(p, nb); - set_head(remainder, remainder_size | PREV_INUSE | - (av != &main_arena ? NON_MAIN_ARENA : 0)); - set_head_size(p, nb); - _int_free(av, chunk2mem(remainder)); - } - } - - check_inuse_chunk(av, p); - return chunk2mem(p); -} - -#if 0 -/* - ------------------------------ calloc ------------------------------ -*/ - -#if __STD_C -Void_t* cALLOc(size_t n_elements, size_t elem_size) -#else -Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size; -#endif -{ - mchunkptr p; - unsigned long clearsize; - unsigned long nclears; - INTERNAL_SIZE_T* d; - - Void_t* mem = mALLOc(n_elements * elem_size); - - if (mem != 0) { - p = mem2chunk(mem); - -#if MMAP_CLEARS - if (!chunk_is_mmapped(p)) /* don't need to clear mmapped space */ -#endif - { - /* - Unroll clear of <= 36 bytes (72 if 8byte sizes) - We know that contents have an odd number of - INTERNAL_SIZE_T-sized words; minimally 3. - */ - - d = (INTERNAL_SIZE_T*)mem; - clearsize = chunksize(p) - SIZE_SZ; - nclears = clearsize / sizeof(INTERNAL_SIZE_T); - assert(nclears >= 3); - - if (nclears > 9) - MALLOC_ZERO(d, clearsize); - - else { - *(d+0) = 0; - *(d+1) = 0; - *(d+2) = 0; - if (nclears > 4) { - *(d+3) = 0; - *(d+4) = 0; - if (nclears > 6) { - *(d+5) = 0; - *(d+6) = 0; - if (nclears > 8) { - *(d+7) = 0; - *(d+8) = 0; - } - } - } - } - } - } - return mem; -} -#endif /* 0 */ - -/* - ------------------------- independent_calloc ------------------------- -*/ - -Void_t** -#if __STD_C -_int_icalloc(mstate av, size_t n_elements, size_t elem_size, Void_t* chunks[]) -#else -_int_icalloc(av, n_elements, elem_size, chunks) -mstate av; size_t n_elements; size_t elem_size; Void_t* chunks[]; -#endif -{ - size_t sz[1] = {elem_size}; - /* opts arg of 3 means all elements are same size, and should be cleared */ - return iALLOc(av, n_elements, sz, 3, chunks); -} - -/* - ------------------------- independent_comalloc ------------------------- -*/ - -Void_t** -#if __STD_C -_int_icomalloc(mstate av, size_t n_elements, size_t sizes[], Void_t* chunks[]) -#else -_int_icomalloc(av, n_elements, sizes, chunks) -mstate av; size_t n_elements; size_t sizes[]; Void_t* chunks[]; -#endif -{ - return iALLOc(av, n_elements, sizes, 0, chunks); -} - - -/* - ------------------------------ ialloc ------------------------------ - ialloc provides common support for independent_X routines, handling all of - the combinations that can result. - - The opts arg has: - bit 0 set if all elements are same size (using sizes[0]) - bit 1 set if elements should be zeroed -*/ - - -static Void_t** -#if __STD_C -iALLOc(mstate av, size_t n_elements, size_t* sizes, int opts, Void_t* chunks[]) -#else -iALLOc(av, n_elements, sizes, opts, chunks) -mstate av; size_t n_elements; size_t* sizes; int opts; Void_t* chunks[]; -#endif -{ - INTERNAL_SIZE_T element_size; /* chunksize of each element, if all same */ - INTERNAL_SIZE_T contents_size; /* total size of elements */ - INTERNAL_SIZE_T array_size; /* request size of pointer array */ - Void_t* mem; /* malloced aggregate space */ - mchunkptr p; /* corresponding chunk */ - INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */ - Void_t** marray; /* either "chunks" or malloced ptr array */ - mchunkptr array_chunk; /* chunk for malloced ptr array */ - int mmx; /* to disable mmap */ - INTERNAL_SIZE_T size; - INTERNAL_SIZE_T size_flags; - size_t i; - - /* Ensure initialization/consolidation */ - if (have_fastchunks(av)) malloc_consolidate(av); - - /* compute array length, if needed */ - if (chunks != 0) { - if (n_elements == 0) - return chunks; /* nothing to do */ - marray = chunks; - array_size = 0; - } - else { - /* if empty req, must still return chunk representing empty array */ - if (n_elements == 0) - return (Void_t**) _int_malloc(av, 0); - marray = 0; - array_size = request2size(n_elements * (sizeof(Void_t*))); - } - - /* compute total element size */ - if (opts & 0x1) { /* all-same-size */ - element_size = request2size(*sizes); - contents_size = n_elements * element_size; - } - else { /* add up all the sizes */ - element_size = 0; - contents_size = 0; - for (i = 0; i != n_elements; ++i) - contents_size += request2size(sizes[i]); - } - - /* subtract out alignment bytes from total to minimize overallocation */ - size = contents_size + array_size - MALLOC_ALIGN_MASK; - - /* - Allocate the aggregate chunk. - But first disable mmap so malloc won't use it, since - we would not be able to later free/realloc space internal - to a segregated mmap region. - */ - mmx = mp_.n_mmaps_max; /* disable mmap */ - mp_.n_mmaps_max = 0; - mem = _int_malloc(av, size); - mp_.n_mmaps_max = mmx; /* reset mmap */ - if (mem == 0) - return 0; - - p = mem2chunk(mem); - assert(!chunk_is_mmapped(p)); - remainder_size = chunksize(p); - - if (opts & 0x2) { /* optionally clear the elements */ - MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size); - } - - size_flags = PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0); - - /* If not provided, allocate the pointer array as final part of chunk */ - if (marray == 0) { - array_chunk = chunk_at_offset(p, contents_size); - marray = (Void_t**) (chunk2mem(array_chunk)); - set_head(array_chunk, (remainder_size - contents_size) | size_flags); - remainder_size = contents_size; - } - - /* split out elements */ - for (i = 0; ; ++i) { - marray[i] = chunk2mem(p); - if (i != n_elements-1) { - if (element_size != 0) - size = element_size; - else - size = request2size(sizes[i]); - remainder_size -= size; - set_head(p, size | size_flags); - p = chunk_at_offset(p, size); - } - else { /* the final element absorbs any overallocation slop */ - set_head(p, remainder_size | size_flags); - break; - } - } - -#if MALLOC_DEBUG - if (marray != chunks) { - /* final element must have exactly exhausted chunk */ - if (element_size != 0) - assert(remainder_size == element_size); - else - assert(remainder_size == request2size(sizes[i])); - check_inuse_chunk(av, mem2chunk(marray)); - } - - for (i = 0; i != n_elements; ++i) - check_inuse_chunk(av, mem2chunk(marray[i])); -#endif - - return marray; -} - - -/* - ------------------------------ valloc ------------------------------ -*/ - -Void_t* -#if __STD_C -_int_valloc(mstate av, size_t bytes) -#else -_int_valloc(av, bytes) mstate av; size_t bytes; -#endif -{ - /* Ensure initialization/consolidation */ - if (have_fastchunks(av)) malloc_consolidate(av); - return _int_memalign(av, mp_.pagesize, bytes); -} - -/* - ------------------------------ pvalloc ------------------------------ -*/ - - -Void_t* -#if __STD_C -_int_pvalloc(mstate av, size_t bytes) -#else -_int_pvalloc(av, bytes) mstate av, size_t bytes; -#endif -{ - size_t pagesz; - - /* Ensure initialization/consolidation */ - if (have_fastchunks(av)) malloc_consolidate(av); - pagesz = mp_.pagesize; - return _int_memalign(av, pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); -} - - -/* - ------------------------------ malloc_trim ------------------------------ -*/ - -#if __STD_C -int mTRIm(size_t pad) -#else -int mTRIm(pad) size_t pad; -#endif -{ - mstate av = &main_arena; /* already locked */ - - /* Ensure initialization/consolidation */ - malloc_consolidate(av); - -#ifndef MORECORE_CANNOT_TRIM - return sYSTRIm(pad, av); -#else - return 0; -#endif -} - - -/* - ------------------------- malloc_usable_size ------------------------- -*/ - -#if __STD_C -size_t mUSABLe(Void_t* mem) -#else -size_t mUSABLe(mem) Void_t* mem; -#endif -{ - mchunkptr p; - if (mem != 0) { - p = mem2chunk(mem); - if (chunk_is_mmapped(p)) - return chunksize(p) - 2*SIZE_SZ; - else if (inuse(p)) - return chunksize(p) - SIZE_SZ; - } - return 0; -} - -/* - ---------------------- internal mallinfo ----------------------------- -*/ - -void _int_get_arena_info(mstate av, struct malloc_arena_info *mai) -{ - size_t i; - mbinptr b; - mchunkptr p; - size_t binavail = 0; - size_t fastavail = 0; - int nbinblocks = 0; - int nfastblocks = 0; - - (void)mutex_lock(&av->mutex); - - /* Ensure initialization */ - if (av->top == 0) malloc_consolidate(av); - - check_malloc_state(av); - - /* traverse fastbins */ - for (i = 0; i < NFASTBINS; ++i) { - for (p = av->fastbins[i]; p != 0; p = p->fd) { - ++nfastblocks; - fastavail += chunksize(p); - } - } - - /* traverse regular bins */ - for (i = 1; i < NBINS; ++i) { - b = bin_at(av, i); - for (p = last(b); p != b; p = p->bk) { - ++nbinblocks; - binavail += chunksize(p); - } - } - - mai->nfastblocks = nfastblocks; - mai->nbinblocks = nbinblocks; - mai->fastavail = fastavail; - mai->binavail = binavail; - mai->top_size = chunksize(av->top); - mai->system_mem = av->system_mem; - mai->max_system_mem = av->max_system_mem; - mai->stat_lock_direct = av->stat_lock_direct; - mai->stat_lock_loop = av->stat_lock_loop; - mai->stat_lock_wait = av->stat_lock_wait; - - (void)mutex_unlock(&av->mutex); -} - -void -_int_get_global_info (struct malloc_global_info *mgi) -{ - mgi->n_mmaps = mp_.n_mmaps; - mgi->max_n_mmaps = mp_.max_n_mmaps; - mgi->mmapped_mem = mp_.mmapped_mem; - mgi->max_mmapped_mem = mp_.max_mmapped_mem; - mgi->max_total_mem = mp_.max_total_mem; -#if THREAD_STATS - mgi->stat_n_heaps = stat_n_heaps; -#else - mgi->stat_n_heaps = 0; -#endif -} - -/* - ------------------------------ malloc_stats ------------------------------ -*/ - -/* Now in separate file, malloc-stats.c. */ - -/* - ------------------------------ mallopt ------------------------------ -*/ - -#if __STD_C -int mALLOPt(int param_number, int value) -#else -int mALLOPt(param_number, value) int param_number; int value; -#endif -{ - mstate av = &main_arena; - int res = 1; - - if(__malloc_initialized < 0) - ptmalloc_init (); - (void)mutex_lock(&av->mutex); - /* Ensure initialization/consolidation */ - malloc_consolidate(av); - - switch(param_number) { - case M_MXFAST: - if (value >= 0 && value <= MAX_FAST_SIZE) { - set_max_fast(av, value); - } - else - res = 0; - break; - - case M_TRIM_THRESHOLD: - mp_.trim_threshold = value; - break; - - case M_TOP_PAD: - mp_.top_pad = value; - break; - - case M_MMAP_THRESHOLD: -#if USE_ARENAS - /* Forbid setting the threshold too high. */ - if((unsigned long)value > HEAP_MAX_SIZE/2) - res = 0; - else -#endif - mp_.mmap_threshold = value; - break; - - case M_MMAP_MAX: -#if !HAVE_MMAP - if (value != 0) - res = 0; - else -#endif - mp_.n_mmaps_max = value; - break; - - case M_CHECK_ACTION: - check_action = value; - break; - } - (void)mutex_unlock(&av->mutex); - return res; -} - - -/* - -------------------- Alternative MORECORE functions -------------------- -*/ - - -/* - General Requirements for MORECORE. - - The MORECORE function must have the following properties: - - If MORECORE_CONTIGUOUS is false: - - * MORECORE must allocate in multiples of pagesize. It will - only be called with arguments that are multiples of pagesize. - - * MORECORE(0) must return an address that is at least - MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) - - else (i.e. If MORECORE_CONTIGUOUS is true): - - * Consecutive calls to MORECORE with positive arguments - return increasing addresses, indicating that space has been - contiguously extended. - - * MORECORE need not allocate in multiples of pagesize. - Calls to MORECORE need not have args of multiples of pagesize. - - * MORECORE need not page-align. - - In either case: - - * MORECORE may allocate more memory than requested. (Or even less, - but this will generally result in a malloc failure.) - - * MORECORE must not allocate memory when given argument zero, but - instead return one past the end address of memory from previous - nonzero call. This malloc does NOT call MORECORE(0) - until at least one call with positive arguments is made, so - the initial value returned is not important. - - * Even though consecutive calls to MORECORE need not return contiguous - addresses, it must be OK for malloc'ed chunks to span multiple - regions in those cases where they do happen to be contiguous. - - * MORECORE need not handle negative arguments -- it may instead - just return MORECORE_FAILURE when given negative arguments. - Negative arguments are always multiples of pagesize. MORECORE - must not misinterpret negative args as large positive unsigned - args. You can suppress all such calls from even occurring by defining - MORECORE_CANNOT_TRIM, - - There is some variation across systems about the type of the - argument to sbrk/MORECORE. If size_t is unsigned, then it cannot - actually be size_t, because sbrk supports negative args, so it is - normally the signed type of the same width as size_t (sometimes - declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much - matter though. Internally, we use "long" as arguments, which should - work across all reasonable possibilities. - - Additionally, if MORECORE ever returns failure for a positive - request, and HAVE_MMAP is true, then mmap is used as a noncontiguous - system allocator. This is a useful backup strategy for systems with - holes in address spaces -- in this case sbrk cannot contiguously - expand the heap, but mmap may be able to map noncontiguous space. - - If you'd like mmap to ALWAYS be used, you can define MORECORE to be - a function that always returns MORECORE_FAILURE. - - If you are using this malloc with something other than sbrk (or its - emulation) to supply memory regions, you probably want to set - MORECORE_CONTIGUOUS as false. As an example, here is a custom - allocator kindly contributed for pre-OSX macOS. It uses virtually - but not necessarily physically contiguous non-paged memory (locked - in, present and won't get swapped out). You can use it by - uncommenting this section, adding some #includes, and setting up the - appropriate defines above: - - #define MORECORE osMoreCore - #define MORECORE_CONTIGUOUS 0 - - There is also a shutdown routine that should somehow be called for - cleanup upon program exit. - - #define MAX_POOL_ENTRIES 100 - #define MINIMUM_MORECORE_SIZE (64 * 1024) - static int next_os_pool; - void *our_os_pools[MAX_POOL_ENTRIES]; - - void *osMoreCore(int size) - { - void *ptr = 0; - static void *sbrk_top = 0; - - if (size > 0) - { - if (size < MINIMUM_MORECORE_SIZE) - size = MINIMUM_MORECORE_SIZE; - if (CurrentExecutionLevel() == kTaskLevel) - ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); - if (ptr == 0) - { - return (void *) MORECORE_FAILURE; - } - // save ptrs so they can be freed during cleanup - our_os_pools[next_os_pool] = ptr; - next_os_pool++; - ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); - sbrk_top = (char *) ptr + size; - return ptr; - } - else if (size < 0) - { - // we don't currently support shrink behavior - return (void *) MORECORE_FAILURE; - } - else - { - return sbrk_top; - } - } - - // cleanup any allocated memory pools - // called as last thing before shutting down driver - - void osCleanupMem(void) - { - void **ptr; - - for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) - if (*ptr) - { - PoolDeallocate(*ptr); - *ptr = 0; - } - } - -*/ - -/* OMPI: Need to expose our own posix_memalign, or the wrong one will - be used */ -# include - -/* We need a wrapper function for one of the additions of POSIX. */ -int -posix_memalign (void **memptr, size_t alignment, size_t size) -{ - void *mem; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, size_t, - __const __malloc_ptr_t)) = - __memalign_hook; -#endif - - /* Test whether the SIZE argument is valid. It must be a power of - two multiple of sizeof (void *). */ - if (alignment % sizeof (void *) != 0 - || !powerof2 (alignment / sizeof (void *)) != 0 - || alignment == 0) - return EINVAL; - - /* OMPI change: the hook is us -- don't call the hook */ -#if 0 - /* Call the hook here, so that caller is posix_memalign's caller - and not posix_memalign itself. */ - if (hook != NULL) - mem = (*hook)(alignment, size, RETURN_ADDRESS (0)); - else - mem = public_mEMALIGn (alignment, size); -#else - mem = public_mEMALIGn (alignment, size); -#endif - - if (mem != NULL) { - *memptr = mem; - return 0; - } - - return ENOMEM; -} - -#ifdef _LIBC -weak_alias (__posix_memalign, posix_memalign) - -strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc) -strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree) -strong_alias (__libc_free, __free) strong_alias (__libc_free, free) -strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc) -strong_alias (__libc_memalign, __memalign) -weak_alias (__libc_memalign, memalign) -strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc) -strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc) -strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc) -strong_alias (__libc_mallinfo, __mallinfo) -weak_alias (__libc_mallinfo, mallinfo) -strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt) - -weak_alias (__malloc_usable_size, malloc_usable_size) -weak_alias (__malloc_trim, malloc_trim) -weak_alias (__malloc_get_state, malloc_get_state) -weak_alias (__malloc_set_state, malloc_set_state) - -#endif /* _LIBC */ - -/* ------------------------------------------------------------ -History: - -[see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc] - -*/ -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/linux/malloc.h b/opal/mca/memory/linux/malloc.h deleted file mode 100644 index bd0430e3b2..0000000000 --- a/opal/mca/memory/linux/malloc.h +++ /dev/null @@ -1,300 +0,0 @@ -/* Prototypes and definition for malloc implementation. - Copyright (C) 1996,97,99,2000,2002,2003,2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _MALLOC_H -#define _MALLOC_H 1 - -/* add the opal config header file for the OPAL_DECLSPEC */ -#include "opal_config.h" - - -#ifdef _LIBC -#include -#endif - -/* - $Id: malloc.h,v 1.7 2004/08/08 12:34:57 wg Exp $ - `ptmalloc2', a malloc implementation for multiple threads without - lock contention, by Wolfram Gloger . - - VERSION 2.7.0 - - This work is mainly derived from malloc-2.7.0 by Doug Lea - , which is available from: - - ftp://gee.cs.oswego.edu/pub/misc/malloc.c - - This trimmed-down header file only provides function prototypes and - the exported data structures. For more detailed function - descriptions and compile-time options, see the source file - `malloc.c'. -*/ - -#if defined(__STDC__) || defined (__cplusplus) -# include -# define __malloc_ptr_t void * -#else -# undef size_t -# define size_t unsigned int -# undef ptrdiff_t -# define ptrdiff_t int -# define __malloc_ptr_t char * -#endif - -#ifdef _LIBC -/* Used by GNU libc internals. */ -# define __malloc_size_t size_t -# define __malloc_ptrdiff_t ptrdiff_t -#elif !defined __attribute_malloc__ -# define __attribute_malloc__ -#endif - -#ifdef __GNUC__ - -/* GCC can always grok prototypes. For C++ programs we add throw() - to help it optimize the function calls. But this works only with - gcc 2.8.x and egcs. */ -# if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8) -# define __THROW throw () -# else -#ifdef __THROW -# undef __THROW -#endif -# define __THROW -# endif -# define __MALLOC_P(args) args __THROW -/* This macro will be used for functions which might take C++ callback - functions. */ -# define __MALLOC_PMT(args) args - -#else /* Not GCC. */ - -# define __THROW - -# if (defined __STDC__ && __STDC__) || defined __cplusplus - -# define __MALLOC_P(args) args -# define __MALLOC_PMT(args) args - -# ifndef __const -# define __const const -# endif - -# else /* Not ANSI C or C++. */ - -# define __MALLOC_P(args) () /* No prototypes. */ -# define __MALLOC_PMT(args) () - -# ifndef __const -# define __const -# endif - -# endif /* ANSI C or C++. */ - -#endif /* GCC. */ - -#ifndef NULL -# ifdef __cplusplus -# define NULL 0 -# else -# define NULL ((__malloc_ptr_t) 0) -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* Nonzero if the malloc is already initialized. */ -#ifdef _LIBC -/* In the GNU libc we rename the global variable - `__malloc_initialized' to `__libc_malloc_initialized'. */ -# define __malloc_initialized __libc_malloc_initialized -#endif -extern int __malloc_initialized; - -/* Allocate SIZE bytes of memory. */ -OPAL_DECLSPEC extern __malloc_ptr_t malloc __MALLOC_P ((size_t __size)) __attribute_malloc__; - -/* Allocate NMEMB elements of SIZE bytes each, all initialized to 0. */ -OPAL_DECLSPEC extern __malloc_ptr_t calloc __MALLOC_P ((size_t __nmemb, size_t __size)) - __attribute_malloc__; - -/* Re-allocate the previously allocated block in __ptr, making the new - block SIZE bytes long. */ -OPAL_DECLSPEC extern __malloc_ptr_t realloc __MALLOC_P ((__malloc_ptr_t __ptr, - size_t __size)) - __attribute_malloc__; - -/* Free a block allocated by `malloc', `realloc' or `calloc'. */ -OPAL_DECLSPEC extern void free __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Free a block allocated by `calloc'. */ -OPAL_DECLSPEC extern void cfree __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Allocate SIZE bytes allocated to ALIGNMENT bytes. */ -OPAL_DECLSPEC extern __malloc_ptr_t memalign __MALLOC_P ((size_t __alignment, size_t __size)); - -/* Allocate SIZE bytes on a page boundary. */ -OPAL_DECLSPEC extern __malloc_ptr_t valloc __MALLOC_P ((size_t __size)) __attribute_malloc__; - -/* Equivalent to valloc(minimum-page-that-holds(n)), that is, round up - __size to nearest pagesize. */ -OPAL_DECLSPEC extern __malloc_ptr_t pvalloc __MALLOC_P ((size_t __size)) - __attribute_malloc__; - -/* Underlying allocation function; successive calls should return - contiguous pieces of memory. */ -OPAL_DECLSPEC extern __malloc_ptr_t (*__morecore) __MALLOC_PMT ((ptrdiff_t __size)); - -/* Default value of `__morecore'. */ -OPAL_DECLSPEC extern __malloc_ptr_t __default_morecore __MALLOC_P ((ptrdiff_t __size)) - __attribute_malloc__; - -/* SVID2/XPG mallinfo structure */ - -struct mallinfo { - int arena; /* non-mmapped space allocated from system */ - int ordblks; /* number of free chunks */ - int smblks; /* number of fastbin blocks */ - int hblks; /* number of mmapped regions */ - int hblkhd; /* space in mmapped regions */ - int usmblks; /* maximum total allocated space */ - int fsmblks; /* space available in freed fastbin blocks */ - int uordblks; /* total allocated space */ - int fordblks; /* total free space */ - int keepcost; /* top-most, releasable (via malloc_trim) space */ -}; - -/* Returns a copy of the updated current mallinfo. */ -OPAL_DECLSPEC extern struct mallinfo mallinfo __MALLOC_P ((void)); - -/* SVID2/XPG mallopt options */ -#ifndef M_MXFAST -# define M_MXFAST 1 /* maximum request size for "fastbins" */ -#endif -#ifndef M_NLBLKS -# define M_NLBLKS 2 /* UNUSED in this malloc */ -#endif -#ifndef M_GRAIN -# define M_GRAIN 3 /* UNUSED in this malloc */ -#endif -#ifndef M_KEEP -# define M_KEEP 4 /* UNUSED in this malloc */ -#endif - -/* mallopt options that actually do something */ -#define M_TRIM_THRESHOLD -1 -#define M_TOP_PAD -2 -#define M_MMAP_THRESHOLD -3 -#define M_MMAP_MAX -4 -#define M_CHECK_ACTION -5 - -/* General SVID/XPG interface to tunable parameters. */ -OPAL_DECLSPEC extern int mallopt __MALLOC_P ((int __param, int __val)); - -/* Release all but __pad bytes of freed top-most memory back to the - system. Return 1 if successful, else 0. */ -OPAL_DECLSPEC extern int malloc_trim __MALLOC_P ((size_t __pad)); - -/* Report the number of usable allocated bytes associated with allocated - chunk __ptr. */ -OPAL_DECLSPEC extern size_t malloc_usable_size __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Prints brief summary statistics on stderr. */ -OPAL_DECLSPEC extern void malloc_stats __MALLOC_P ((void)); - -/* Record the state of all malloc variables in an opaque data structure. */ -OPAL_DECLSPEC extern __malloc_ptr_t malloc_get_state __MALLOC_P ((void)); - -/* Restore the state of all malloc variables from data obtained with - malloc_get_state(). */ -OPAL_DECLSPEC extern int malloc_set_state __MALLOC_P ((__malloc_ptr_t __ptr)); - -/* Called once when malloc is initialized; redefining this variable in - the application provides the preferred way to set up the hook - pointers. */ -OPAL_DECLSPEC extern void (*__malloc_initialize_hook) __MALLOC_PMT ((void)); -/* Hooks for debugging and user-defined versions. */ -OPAL_DECLSPEC extern void (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern __malloc_ptr_t (*__malloc_hook) __MALLOC_PMT ((size_t __size, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern __malloc_ptr_t (*__realloc_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - size_t __size, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern __malloc_ptr_t (*__memalign_hook) __MALLOC_PMT ((size_t __alignment, - size_t __size, - __const __malloc_ptr_t)); -OPAL_DECLSPEC extern void (*__after_morecore_hook) __MALLOC_PMT ((void)); - -/* Activate a standard set of debugging hooks. */ -OPAL_DECLSPEC extern void __malloc_check_init __MALLOC_P ((void)); - -/* Internal routines, operating on "arenas". */ -struct malloc_state; -typedef struct malloc_state *mstate; - -OPAL_DECLSPEC extern mstate _int_new_arena __MALLOC_P ((size_t __ini_size)); -OPAL_DECLSPEC extern __malloc_ptr_t _int_malloc __MALLOC_P ((mstate __m, size_t __size)); -OPAL_DECLSPEC extern void _int_free __MALLOC_P ((mstate __m, __malloc_ptr_t __ptr)); -OPAL_DECLSPEC extern __malloc_ptr_t _int_realloc __MALLOC_P ((mstate __m, - __malloc_ptr_t __ptr, - size_t __size)); -OPAL_DECLSPEC extern __malloc_ptr_t _int_memalign __MALLOC_P ((mstate __m, size_t __alignment, - size_t __size)); -/* Return arena number __n, or 0 if out of bounds. Arena 0 is the - main arena. */ -OPAL_DECLSPEC extern mstate _int_get_arena __MALLOC_P ((int __n)); - -/* Implementation-specific mallinfo. More detailed than mallinfo, and - also works for size_t wider than int. */ -struct malloc_arena_info { - int nfastblocks; /* number of freed "fastchunks" */ - int nbinblocks; /* number of available chunks in bins */ - size_t fastavail; /* total space in freed "fastchunks" */ - size_t binavail; /* total space in binned chunks */ - size_t top_size; /* size of top chunk */ - size_t system_mem; /* bytes allocated from system in this arena */ - size_t max_system_mem; /* max. bytes allocated from system */ - /* Statistics for locking. Only kept if THREAD_STATS is defined - at compile time. */ - long stat_lock_direct, stat_lock_loop, stat_lock_wait; -}; - -struct malloc_global_info { - int n_mmaps; /* number of mmap'ed chunks */ - int max_n_mmaps; /* max. number of mmap'ed chunks reached */ - size_t mmapped_mem; /* total bytes allocated in mmap'ed chunks */ - size_t max_mmapped_mem; /* max. bytes allocated in mmap'ed chunks */ - size_t max_total_mem; /* only kept for NO_THREADS */ - int stat_n_heaps; /* only kept if THREAD_STATS is defined */ -}; - -OPAL_DECLSPEC extern void _int_get_arena_info __MALLOC_P ((mstate __m, - struct malloc_arena_info *__ma)); -OPAL_DECLSPEC extern void _int_get_global_info __MALLOC_P ((struct malloc_global_info *__m)); - -OPAL_DECLSPEC extern int posix_memalign (void **memptr, size_t alignment, size_t size); - -#ifdef __cplusplus -} /* end of extern "C" */ -#endif - -#endif /* malloc.h */ diff --git a/opal/mca/memory/linux/memory_linux.h b/opal/mca/memory/linux/memory_linux.h deleted file mode 100644 index 9334f3ae82..0000000000 --- a/opal/mca/memory/linux/memory_linux.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_MEMORY_LINUX_H -#define OPAL_MEMORY_LINUX_H - -#include "opal_config.h" - -#include "opal/mca/memory/memory.h" - -BEGIN_C_DECLS - -/* Component structure */ - -typedef struct opal_memory_linux_component_t { - opal_memory_base_component_2_0_0_t super; - - /* Component data */ - int verbose_level; - int enable_ummunotify; - int enable_ptmalloc2; - -#if MEMORY_LINUX_UMMUNOTIFY - /* Ummunotify-specific data */ - int ummunotify_fd; -#endif - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - int use_memalign; - size_t memalign_threshold; -#endif - -#if MEMORY_LINUX_PTMALLOC2 - /* Ptmalloc2-specific data. Note that these variables are all marked as volatile. - * This is needed because of what may be a buggy optimization in the GCC 4.9.2 - * compilers and later. These variables are used in different code paths which the - * compiler is not aware of. - * Extra details located at these URLs: - * Open MPI User List: http://www.open-mpi.org/community/lists/users/2015/06/27039.php - * Bug Discussion: https://github.com/open-mpi/ompi/pull/625 - * GCC Discussion: https://gcc.gnu.org/ml/gcc-bugs/2015-06/msg00757.html - */ - volatile bool free_invoked; - volatile bool malloc_invoked; - volatile bool realloc_invoked; - volatile bool memalign_invoked; - volatile bool munmap_invoked; -#endif -} opal_memory_linux_component_t; - -/* memory_linux_component.c */ - -extern opal_memory_linux_component_t mca_memory_linux_component; - - -#if MEMORY_LINUX_UMMUNOTIFY -/* memory_linux_ummunotify.c */ -int opal_memory_linux_ummunotify_open(void); -int opal_memory_linux_ummunotify_close(void); -#endif /* MEMORY_LINUX_UMMUNOTIFY */ - -#if MEMORY_LINUX_PTMALLOC2 -/* memory_linux_ptmalloc2.c */ -int opal_memory_linux_ptmalloc2_open(void); -int opal_memory_linux_ptmalloc2_close(void); - -/* memory_linux_munmap.c */ -OPAL_DECLSPEC int opal_memory_linux_free_ptmalloc2_munmap(void *start, size_t length, int from_alloc); -OPAL_DECLSPEC int munmap(void* addr, size_t len); -#endif /* !MEMORY_LINUX_PTMALLOC2 */ - -#if MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT -OPAL_DECLSPEC void opal_memory_linux_malloc_init_hook(void); -#endif /* MEMORY_LINUX_HAVE_MALLOC_HOOK_SUPPORT */ - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED -OPAL_DECLSPEC void opal_memory_linux_malloc_set_alignment(int use_memalign, size_t memalign_threshold); -#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ - -extern bool opal_memory_linux_opened; - -END_C_DECLS - -#endif diff --git a/opal/mca/memory/linux/memory_linux_component.c b/opal/mca/memory/linux/memory_linux_component.c deleted file mode 100644 index 323ab19134..0000000000 --- a/opal/mca/memory/linux/memory_linux_component.c +++ /dev/null @@ -1,407 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* This component basically fronts two different memory management - schemes: the Linux "ummunotify" kernel module and hooking in a - substitute ptmalloc2 allocator. Both of these mechanisms are - unified under a single component because the "memory" framework - both only allows one component to be selected, and that one - component must be compile-time linked into libopen-pal. Hence, if - we want to try to use either one of these mechanisms, we have to - have them both in a single component. - - When using ptmalloc2, the goal of this component is to wholly - replace the underlying allocator with our internal ptmalloc2 - allocator. See the file README-open-mpi.txt for details of how it - works. - - When using ummunotify, we can probe to find out when the MMU map - has been changed (i.e., memory has been released back to the OS). */ - -#include "opal_config.h" - -#if HAVE_MALLOC_H -#include -#endif - -#include "opal/constants.h" -#include "opal/mca/base/mca_base_var.h" -#include "opal/mca/memory/memory.h" -#include "opal/mca/memory/base/empty.h" -#include "opal/memoryhooks/memory.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" - -#include "opal/mca/memory/linux/memory_linux.h" -#undef opal_memory_changed -#include "opal/mca/memory/linux/public.h" - -static int linux_open(void); -static int linux_close(void); -static int linux_register(void); -static int linux_query(int *); - -#if MEMORY_LINUX_UMMUNOTIFY -static bool ummunotify_opened = false; -#endif -#if MEMORY_LINUX_PTMALLOC2 -static bool ptmalloc2_opened = false; -#endif - -bool opal_memory_linux_disable = false; -static int mca_memory_linux_priority; - -bool opal_memory_linux_opened = false; - -opal_memory_linux_component_t mca_memory_linux_component = { - /* First, the opal_memory_base_component_2_0_0_t */ - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - .memoryc_version = { - OPAL_MEMORY_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "linux", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = linux_open, - .mca_close_component = linux_close, - .mca_register_component_params = linux_register, - }, - .memoryc_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - /* Memory framework functions. These function pointer values - are replaced by memory_linux_ummunotify.c at run time if we - end up using ummunotify support. */ - .memoryc_init_hook = opal_memory_linux_malloc_init_hook, - .memoryc_query = linux_query, - .memoryc_register = opal_memory_base_component_register_empty, - .memoryc_deregister = opal_memory_base_component_deregister_empty, -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - .memoryc_set_alignment = opal_memory_linux_malloc_set_alignment, -#else - .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, -#endif - }, - - /* Component-specific data, filled in later (compiler will 0/NULL - it out) */ -}; - -static bool ptmalloc2_available = MEMORY_LINUX_PTMALLOC2; -static bool ummunotify_available = MEMORY_LINUX_UMMUNOTIFY; - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - -static void *(*prev_malloc_hook)(size_t, const void *); - -/* This is a memory allocator hook. The purpose of this is to make - * every malloc aligned. - * There two basic cases here: - * - * 1. Memory manager for Open MPI is enabled. Then memalign below will - * be overridden by __memalign_hook which is set to - * opal_memory_linux_memalign_hook. Thus, _malloc_hook is going to - * use opal_memory_linux_memalign_hook. - * - * 2. No memory manager support. The memalign below is just regular glibc - * memalign which will be called through __malloc_hook instead of malloc. - */ -static void *_opal_memory_linux_malloc_align_hook(size_t sz, const void* caller); - -static mca_base_var_enum_value_t align_values[] = { - {-1, "disabled"}, - {0, "0"}, - {32, "32"}, - {64, "64"}, - {0, NULL} -}; -#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ - - -/* - * Register MCA params - */ -static int linux_register(void) -{ -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - mca_base_var_enum_t *new_enum; -#endif - int ret; - /* Information only */ - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ptmalloc2_available", - "Whether ptmalloc2 support is included in Open MPI or not (1 = yes, 0 = no)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_CONSTANT, - &ptmalloc2_available); - if (0 > ret) { - return ret; - } - - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ummunotify_available", - "Whether ummunotify support is included in Open MPI or not (1 = yes, 0 = no)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_CONSTANT, - &ummunotify_available); - if (0 > ret) { - return ret; - } - - /* Allow user to manually enable/disable */ - mca_memory_linux_component.enable_ptmalloc2 = -1; - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ptmalloc2_enable", - "Whether to enable ptmalloc2 support or not (negative = try to enable, but continue even if support is not available, 0 = do not enable support, positive = try to enable and fail if support is not available)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_memory_linux_component.enable_ptmalloc2); - if (0 > ret) { - return ret; - } - - mca_memory_linux_component.enable_ummunotify = -1; - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "ummunotify_enable", - "Whether to enable ummunotify support or not (negative = try to enable, but continue even if support is not available, 0 = do not enable support, positive = try to enable and fail if support is not available)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_memory_linux_component.enable_ummunotify); - if (0 > ret) { - return ret; - } - - opal_memory_linux_disable = false; - (void) mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "disable", - "If this MCA parameter is set to 1 **VIA ENVIRONMENT VARIABLE ONLY*** (this MCA parameter *CANNOT* be set in a file or on the mpirun command line!), this component will be disabled and will not attempt to use either ummunotify or memory hook support", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_ENVIRONMENT_ONLY, - OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, - &opal_memory_linux_disable); - if (0 > ret) { - return ret; - } - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - (void)mca_base_var_enum_create("memory_linux_memalign", align_values, &new_enum); - mca_memory_linux_component.use_memalign = -1; - ret = mca_base_component_var_register(&mca_memory_linux_component.super.memoryc_version, - "memalign", - "[64 | 32 | 0] - Enable memory alignment for all malloc calls.", - MCA_BASE_VAR_TYPE_INT, - new_enum, - 0, - 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_memory_linux_component.use_memalign); - OBJ_RELEASE(new_enum); - if (0 > ret) { - return ret; - } - - mca_memory_linux_component.memalign_threshold = 12288; - ret = mca_base_component_var_register(&mca_memory_linux_component.super.memoryc_version, - "memalign_threshold", - "Allocating memory more than memory_linux_memalign_threshold" - "bytes will automatically be aligned to the value of memory_linux_memalign bytes." - "(default: 12288)", - MCA_BASE_VAR_TYPE_SIZE_T, - NULL, - 0, - 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_memory_linux_component.memalign_threshold); - if (0 > ret) { - return ret; - } - - mca_memory_linux_priority = 50; - ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, - "priority", "Priority of the linux memory hook component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_CONSTANT, &mca_memory_linux_priority); - if (0 > ret) { - return ret; - } -#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ - - return (0 > ret) ? ret : OPAL_SUCCESS; -} - -static int linux_query (int *priority) -{ - *priority = mca_memory_linux_priority; - return OPAL_SUCCESS; -} - -static int linux_open(void) -{ - const int *verbose = NULL; - int i; - - i = mca_base_var_find("opal", "memory", NULL, "base_verbose"); - mca_base_var_get_value(i, &verbose, NULL, NULL); - mca_memory_linux_component.verbose_level = verbose ? verbose[0] : 0; - - /* Try initializing ummunotify first; if that fails, try - ptmalloc2. */ -#if MEMORY_LINUX_UMMUNOTIFY - if (mca_memory_linux_component.enable_ummunotify) { - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: attempting to initialize ummunotify support"); - } - if (OPAL_SUCCESS == opal_memory_linux_ummunotify_open()) { - ummunotify_opened = true; - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ummunotify successfully initialized; we'll use that"); - } - goto done; - } - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ummunotify failed to initialize"); - } - } -#endif - -#if MEMORY_LINUX_PTMALLOC2 - if (mca_memory_linux_component.enable_ptmalloc2) { - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: attempting to initialize ptmalloc2 support"); - } - if (OPAL_SUCCESS == opal_memory_linux_ptmalloc2_open()) { - ptmalloc2_opened = true; - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ptmalloc2 successfully initialized; we'll use that"); - } - goto done; - } - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: ptmalloc2 failed to initialize"); - } - } -#endif - - /* We can return OPAL_ERR_NOT_AVAILABLE if nothing is - available; that will make the MCA base silently disregard this - component. */ - - if (mca_memory_linux_component.verbose_level >= 10) { - opal_output(0, "memory:linux: no memory hooks available in this process"); - } - return OPAL_ERR_NOT_AVAILABLE; - -done: - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - /* save original call */ - prev_malloc_hook = NULL; - - if (mca_memory_linux_component.use_memalign > 0 && - (opal_mem_hooks_support_level() & - (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT)) != 0) { - prev_malloc_hook = __malloc_hook; - __malloc_hook = _opal_memory_linux_malloc_align_hook; - } -#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ - opal_memory_linux_opened = true; - - return OPAL_SUCCESS; -} - -static int linux_close(void) -{ - int v = mca_memory_linux_component.verbose_level; - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED - /* restore original call */ - if (prev_malloc_hook) { - __malloc_hook = prev_malloc_hook; - prev_malloc_hook = NULL; - } -#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ - -#if MEMORY_LINUX_UMMUNOTIFY - if (ummunotify_opened) { - if (v >= 10) { - opal_output(0, "memory:linux: shutting down ummunotify support"); - } - opal_memory_linux_ummunotify_close(); - ummunotify_opened = false; - } -#endif -#if MEMORY_LINUX_PTMALLOC2 - if (ptmalloc2_opened) { - if (v >= 10) { - opal_output(0, "memory:linux: shutting down ptmalloc2 support"); - } - opal_memory_linux_ptmalloc2_close(); - ptmalloc2_opened = false; - } -#endif - - return OPAL_SUCCESS; -} - -#if MEMORY_LINUX_MALLOC_ALIGN_ENABLED -void opal_memory_linux_malloc_set_alignment(int use_memalign, size_t memalign_threshold) -{ - /* ignore cases when this capability is enabled explicitly using - * mca variables - */ - if ((NULL == prev_malloc_hook) && (-1 == mca_memory_linux_component.use_memalign)) { - if (use_memalign == 0 || use_memalign == 32 || use_memalign == 64) { - mca_memory_linux_component.use_memalign = use_memalign; - mca_memory_linux_component.memalign_threshold = memalign_threshold; - if ((opal_mem_hooks_support_level() & - (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT)) != 0) { - prev_malloc_hook = __malloc_hook; - __malloc_hook = _opal_memory_linux_malloc_align_hook; - } - } - } -} - -static void *_opal_memory_linux_malloc_align_hook(size_t sz, const void* caller) -{ - if (sz < mca_memory_linux_component.memalign_threshold) { - return prev_malloc_hook(sz, caller); - } else { - return memalign(mca_memory_linux_component.use_memalign, sz); - } -} -#endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ diff --git a/opal/mca/memory/linux/memory_linux_munmap.c b/opal/mca/memory/linux/memory_linux_munmap.c deleted file mode 100644 index bb1783973a..0000000000 --- a/opal/mca/memory/linux/memory_linux_munmap.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#if defined(HAVE___MUNMAP) -/* here so we only include others if we absolutely have to */ -#elif defined(HAVE_SYSCALL) -#include -#include -#endif -#if defined(HAVE_DLSYM) -#ifndef __USE_GNU -#define __USE_GNU -#endif -#include -#endif - -#include "opal/memoryhooks/memory_internal.h" - -#include "memory_linux.h" - -/* - * munmap is always intercepted - */ -#if defined(HAVE___MUNMAP) -int __munmap(void* addr, size_t len); -#endif - - -/* intercept munmap, as the user can give back memory that way as well. */ -OPAL_DECLSPEC int munmap(void* addr, size_t len) -{ - return opal_memory_linux_free_ptmalloc2_munmap(addr, len, 0); -} - - -/* three ways to call munmap. Prefered is to just call syscall, so - that we can intercept both munmap and __munmap. If that isn't - possible, try calling __munmap from munmap and let __munmap go. If - that doesn't work, try dlsym */ -int opal_memory_linux_free_ptmalloc2_munmap(void *start, size_t length, - int from_alloc) -{ -#if !defined(HAVE___MUNMAP) && \ - !(defined(HAVE_SYSCALL) && defined(__NR_munmap)) && defined(HAVE_DLSYM) - static int (*realmunmap)(void*, size_t); -#endif - - mca_memory_linux_component.munmap_invoked = true; - - opal_mem_hooks_release_hook(start, length, from_alloc); - -#if defined(HAVE___MUNMAP) - return __munmap(start, length); -#elif defined(HAVE_SYSCALL) && defined(__NR_munmap) - return syscall(__NR_munmap, start, length); -#elif defined(HAVE_DLSYM) - if (NULL == realmunmap) { - union { - int (*munmap_fp)(void*, size_t); - void *munmap_p; - } tmp; - - tmp.munmap_p = dlsym(RTLD_NEXT, "munmap"); - realmunmap = tmp.munmap_fp; - } - - return realmunmap(start, length); -#else - #error "Can not determine how to call munmap" -#endif -} diff --git a/opal/mca/memory/linux/memory_linux_ptmalloc2.c b/opal/mca/memory/linux/memory_linux_ptmalloc2.c deleted file mode 100644 index 00a50dc77c..0000000000 --- a/opal/mca/memory/linux/memory_linux_ptmalloc2.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include - -#include "opal/constants.h" -#include "opal/util/output.h" -#include "opal/memoryhooks/memory.h" -#include "opal/memoryhooks/memory_internal.h" - -#include "opal/mca/memory/linux/memory_linux.h" - - -/* Need to call a function in hooks.c to ensure that all those symbols - get pulled in at link time (e.g., when building libmpi.a, so that - those symbols end up in the final executable -- especially if we - use --disable-dlopen and therefore -Wl,--export-dynamic isn't used - when we build OMPI). */ -extern void opal_memory_linux_hook_pull(bool *want_hooks); - - -/* - * Try to initialize ptmalloc2 - */ -int opal_memory_linux_ptmalloc2_open(void) -{ - int val = 0; - void *p; - bool want_hooks = true; - - /* Call a [somewhat] dummy function in hooks.c. ***Do not remove - this call!*** See comment at the beginning of this file - explaining why it is here. It will also check to see if an - environment variable has been set to disable this component - (note that OPAL_ERR_NOT_AVAILABLE is a special return value - that will silently fail the open component call; all others - will issue an error). */ - opal_memory_linux_hook_pull(&want_hooks); - if (!want_hooks) { - return OPAL_ERR_NOT_AVAILABLE; - } - - /* We will also provide malloc/free support if we've been - activated. We don't exclusively rely on the - __malloc_initialize_hook() previously being called because it's - possible that our hook was called, but then someone else reset - the hooks to point to something else (i.e., before MPI_INIT). - So explicitly test here if our hooks are still in place. If - they are, then enable FREE|CHUNK_SUPPORT. If not, then don't - enable that support -- just leave it at MUNMAP_SUPPORT. - - (Look in hooks.c for the __malloc_initialize_hook setup) */ - - /* Do a simple set of tests to see if our hooks are still the ones - installed. Explicitly reset the flags indicating that our - functions were invoked */ - p = malloc(1024 * 1024 * 4); - if (NULL == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - p = realloc(p, 1024 * 1024 * 4 + 32); - if (NULL == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - free(p); - p = memalign(4, 1024 * 1024); - if (NULL == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - free(p); - -#if HAVE_POSIX_MEMALIGN - /* Double check for posix_memalign, too */ - if (mca_memory_linux_component.memalign_invoked) { - mca_memory_linux_component.memalign_invoked = false; - if (0 != posix_memalign(&p, sizeof(void*), 1024 * 1024)) { - return OPAL_ERR_IN_ERRNO; - } - free(p); - } -#endif - - if (mca_memory_linux_component.malloc_invoked && - mca_memory_linux_component.realloc_invoked && - mca_memory_linux_component.memalign_invoked && - mca_memory_linux_component.free_invoked) { - /* Happiness; our functions were invoked */ - val |= OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_CHUNK_SUPPORT; - } - - /* Check if our mmap layering is working */ - p = mmap(NULL, 4096, PROT_READ, (MAP_ANONYMOUS | MAP_PRIVATE), -1, 0); - if (MAP_FAILED == p) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - munmap(p, 4096); - if (mca_memory_linux_component.munmap_invoked) { - val |= OPAL_MEMORY_MUNMAP_SUPPORT; - } - - /* All done */ - if (val > 0) { - opal_mem_hooks_set_support(val); - return OPAL_SUCCESS; - } - return OPAL_ERR_NOT_AVAILABLE; -} - - -int opal_memory_linux_ptmalloc2_close(void) -{ - /* Nothing to do, really. This function exists just for - symmetry. */ - - return OPAL_SUCCESS; -} diff --git a/opal/mca/memory/linux/memory_linux_ummunotify.c b/opal/mca/memory/linux/memory_linux_ummunotify.c deleted file mode 100644 index 4f85416d0c..0000000000 --- a/opal/mca/memory/linux/memory_linux_ummunotify.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#include -#include -#include -#ifdef HAVE_STROPTS_H -#include -#endif -#include -#include -#include - -#include - -#include "opal_stdint.h" -#include "opal/constants.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/mca/memory/memory.h" -#include "opal/memoryhooks/memory.h" -#include "opal/memoryhooks/memory_internal.h" - -#include "opal/mca/memory/linux/memory_linux.h" -#include "opal/mca/memory/linux/public.h" - -#define DEV_UMMUNOTIFY "/dev/ummunotify" - - -/* - * Local functions - */ -static int ummunotify_process(void); -static int ummunotify_register(void *start, size_t len, uint64_t cookie); -static int ummunotify_deregister(void *start, size_t len, uint64_t cookie); - - -/* - * Local variables - */ -static bool initialized = false; - - -/* - * Global variables (these need to be global variables rather than in - * the component struct because they are accessed in the - * opal_memory_changed() macro defined in public.h, and we don't want - * to have to include the component structure definition in public.h). - */ -uint64_t opal_memory_linux_ummunotify_counter_last_value = 0; -volatile uint64_t *opal_memory_linux_ummunotify_counter = - &opal_memory_linux_ummunotify_counter_last_value; - - -int opal_memory_linux_ummunotify_open(void) -{ - uint64_t *p; - - /* Just to be safe... */ - opal_memory_linux_ummunotify_counter_last_value = 0; - opal_memory_linux_ummunotify_counter = - &opal_memory_linux_ummunotify_counter_last_value; - - /* Open the device. Try to give a meaningful error message if - we're unable to open it. */ - mca_memory_linux_component.ummunotify_fd = - open(DEV_UMMUNOTIFY, O_RDONLY | O_NONBLOCK); - if (mca_memory_linux_component.ummunotify_fd < 0) { - char hostname[HOST_NAME_MAX]; - gethostname(hostname, sizeof(hostname)); - - if (EACCES == errno) { - /* This will get a proper show_help when merged into the - linux component */ - opal_show_help("help-opal-memory-linux.txt", - "ummunotify eaccess", true, - hostname, DEV_UMMUNOTIFY); - } else if (ENOENT != errno) { - /* Don't print an error if DEV_UMMUNOTIFY simply doesn't exist */ - opal_show_help("help-opal-memory-linux.txt", - "ummunotify open error", true, - hostname, DEV_UMMUNOTIFY, - strerror(errno), errno); - } - return OPAL_ERR_NOT_SUPPORTED; - } - - p = mmap(NULL, sizeof(*opal_memory_linux_ummunotify_counter), - PROT_READ, MAP_SHARED, - mca_memory_linux_component.ummunotify_fd, 0); - if (MAP_FAILED == opal_memory_linux_ummunotify_counter) { - close(mca_memory_linux_component.ummunotify_fd); - mca_memory_linux_component.ummunotify_fd = -1; - return OPAL_ERR_NOT_SUPPORTED; - } - opal_memory_linux_ummunotify_counter = p; - - /* If everything went well, tell OMPI that we have full support - for the memory hooks and fill in the component function - pointers */ - opal_mem_hooks_set_support(OPAL_MEMORY_FREE_SUPPORT | - OPAL_MEMORY_CHUNK_SUPPORT | - OPAL_MEMORY_MUNMAP_SUPPORT); - mca_memory_linux_component.super.memoryc_process = ummunotify_process; - mca_memory_linux_component.super.memoryc_register = ummunotify_register; - mca_memory_linux_component.super.memoryc_deregister = ummunotify_deregister; - initialized = true; - - return OPAL_SUCCESS; -} - - -/* - * Called during opal_finalize (usually during MPI_FINALIZE) to tear - * down anything that can/should be torn down to disable this - * component. The application may continue for a while after - * MPI_FINALIZE, so we should do as much as possible to disable - * anything we enabled during ummunotify_open(). - */ -int opal_memory_linux_ummunotify_close(void) -{ - if (initialized && mca_memory_linux_component.ummunotify_fd >= 0) { - munmap((void*) opal_memory_linux_ummunotify_counter, - sizeof(*opal_memory_linux_ummunotify_counter)); - close(mca_memory_linux_component.ummunotify_fd); - mca_memory_linux_component.ummunotify_fd = -1; - opal_memory_linux_ummunotify_counter = - &opal_memory_linux_ummunotify_counter_last_value; - initialized = false; - } - - return OPAL_SUCCESS; -} - -/* - * Called when opal_memory_changed() returns 1 - */ -static int ummunotify_process(void) -{ - int n; - unsigned int i; - struct ummunotify_event events[128]; - - /* Loop reading from the ummunot fd until there's nothing left to - read. If we get a LAST event, re-record the counter. */ - while (initialized) { - n = read(mca_memory_linux_component.ummunotify_fd, - &events, sizeof(events)); - if (n <= 0) { - return (EAGAIN == errno) ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO; - } - - for (i = 0; i < n / sizeof(events[0]); ++i) { - switch (events[i].type) { - case UMMUNOTIFY_EVENT_TYPE_INVAL: - /* 0 => this callback did not come from malloc */ - OPAL_OUTPUT((-1, "ummunot: invalidate start %p, end %p", - (void*) events[i].hint_start, - (void*) events[i].hint_end)); - opal_mem_hooks_release_hook((void *) (uintptr_t) events[i].hint_start, - events[i].hint_end - events[i].hint_start, - 0); - break; - - case UMMUNOTIFY_EVENT_TYPE_LAST: - opal_memory_linux_ummunotify_counter_last_value = - events[i].user_cookie_counter; - /* Are there more events to read? */ - if (opal_memory_linux_ummunotify_counter_last_value == - *opal_memory_linux_ummunotify_counter) { - OPAL_OUTPUT((-1, "ummunot: LAST; done")); - return OPAL_SUCCESS; - } - OPAL_OUTPUT((-1, "ummunot: LAST; but looping around")); - break; - } - } - } - - /* Will only get here if this component has not been - initialized */ - return OPAL_SUCCESS; -} - -static int ummunotify_register(void *start, size_t len, uint64_t cookie) -{ - struct ummunotify_register_ioctl r; - r.reserved = 0; - r.start = (unsigned long) start; - r.end = (unsigned long) start + len; - r.user_cookie = cookie; - - OPAL_OUTPUT((-1, "ummunot: register %p - %p", - start, ((char*) start) + len)); - if (initialized && ioctl(mca_memory_linux_component.ummunotify_fd, - UMMUNOTIFY_REGISTER_REGION, &r)) { - OPAL_OUTPUT((-1, "Error in ioctl register!")); - return OPAL_ERR_IN_ERRNO; - } - - return OPAL_SUCCESS; -} - -static int ummunotify_deregister(void *start, size_t len, uint64_t cookie) -{ - OPAL_OUTPUT((-1, "ummunot: deregister %p - %p", - start, ((char*) start) + len)); - if (initialized && ioctl(mca_memory_linux_component.ummunotify_fd, - UMMUNOTIFY_UNREGISTER_REGION, &cookie)) { - OPAL_OUTPUT((-1, "Error in ioctl unregister!")); - return OPAL_ERR_IN_ERRNO; - } - - return OPAL_SUCCESS; -} diff --git a/opal/mca/memory/linux/owner.txt b/opal/mca/memory/linux/owner.txt deleted file mode 100644 index 2662d53d1d..0000000000 --- a/opal/mca/memory/linux/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: MELLANOX,CISCO -status: maintenance diff --git a/opal/mca/memory/linux/public.h b/opal/mca/memory/linux/public.h deleted file mode 100644 index cc30e8c42a..0000000000 --- a/opal/mca/memory/linux/public.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_MEMORY_LINUX_PUBLIC_H -#define OPAL_MEMORY_LINUX_PUBLIC_H - -#include "opal_config.h" - -#include - -OPAL_DECLSPEC extern volatile uint64_t *opal_memory_linux_ummunotify_counter; -OPAL_DECLSPEC extern uint64_t opal_memory_linux_ummunotify_counter_last_value; - -#define opal_memory_changed() \ - (opal_memory_linux_ummunotify_counter_last_value != \ - *opal_memory_linux_ummunotify_counter) - -#endif diff --git a/opal/mca/memory/linux/rename.h b/opal/mca/memory/linux/rename.h deleted file mode 100644 index 03bd5ce322..0000000000 --- a/opal/mca/memory/linux/rename.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Name-shift all the internal ptmalloc22 symbols to guarantee to not - conflict / confuse / override the internal glibc symbols. */ - -#define __default_morecore opal_memory_ptmalloc2_default_morecore - -#define _int_malloc opal_memory_ptmalloc2_int_malloc -#define _int_free opal_memory_ptmalloc2_int_free -#define _int_realloc opal_memory_ptmalloc2_int_realloc -#define _int_memalign opal_memory_ptmalloc2_int_memalign -#define _int_valloc opal_memory_ptmalloc2_int_valloc -#define _int_pvalloc opal_memory_ptmalloc2_int_pvalloc -#define _int_icalloc opal_memory_ptmalloc2_int_icalloc -#define _int_icomalloc opal_memory_ptmalloc2_int_icomalloc - -#define mTRIm opal_memory_ptmalloc2_mTRIm -#define mUSABLe opal_memory_ptmalloc2_mUSABLe -#define mALLOPt opal_memory_ptmalloc2_mALLOPt - -#define mem2mem_check opal_memory_ptmalloc2_mem2mem_check -#define top_check opal_memory_ptmalloc2_top_check -#define munmap_chunk opal_memory_ptmalloc2_munmap_chunk -#define mremap_chunk opal_memory_ptmalloc2_mremap_chunk - -#define malloc_check opal_memory_ptmalloc2_malloc_check -#define free_check opal_memory_ptmalloc2_free_check -#define realloc_check opal_memory_ptmalloc2_realloc_check -#define memalign_check opal_memory_ptmalloc2_memalign_check - -#define malloc_starter opal_memory_ptmalloc2_malloc_starter -#define memalign_starter opal_memory_ptmalloc2_memalign_starter -#define free_starter opal_memory_ptmalloc2_free_starter - -#define malloc_atfork opal_memory_ptmalloc2_malloc_atfork -#define free_atfork opal_memory_ptmalloc2_free_atfork - -#define _int_get_arena opal_memory_ptmalloc2_int_get_arena -#define _int_get_arena_info opal_memory_ptmalloc2_int_get_arena_info -#define _int_get_global_info opal_memory_ptmalloc2_int_get_global_info -#define _int_new_arena opal_memory_ptmalloc2_int_new_arena -#define __malloc_check_init opal_memory_ptmalloc2_malloc_check_init -#define malloc_stats opal_memory_ptmalloc2_malloc_stats - -#define posix_memalign opal_memory_ptmalloc2_posix_memalign diff --git a/opal/mca/memory/linux/sysdeps/generic/atomic.h b/opal/mca/memory/linux/sysdeps/generic/atomic.h deleted file mode 100644 index a3aeed13e8..0000000000 --- a/opal/mca/memory/linux/sysdeps/generic/atomic.h +++ /dev/null @@ -1 +0,0 @@ -/* Empty placeholder */ diff --git a/opal/mca/memory/linux/sysdeps/generic/malloc-machine.h b/opal/mca/memory/linux/sysdeps/generic/malloc-machine.h deleted file mode 100644 index 345137060d..0000000000 --- a/opal/mca/memory/linux/sysdeps/generic/malloc-machine.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Basic platform-independent macro definitions for mutexes, - thread-specific data and parameters for malloc. - Copyright (C) 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _GENERIC_MALLOC_MACHINE_H -#define _GENERIC_MALLOC_MACHINE_H - -#include - -#ifndef mutex_init /* No threads, provide dummy macros */ - -# define NO_THREADS - -/* The mutex functions used to do absolutely nothing, i.e. lock, - trylock and unlock would always just return 0. However, even - without any concurrently active threads, a mutex can be used - legitimately as an `in use' flag. To make the code that is - protected by a mutex async-signal safe, these macros would have to - be based on atomic test-and-set operations, for example. */ -typedef int mutex_t; - -# define mutex_init(m) (*(m) = 0) -# define mutex_lock(m) ((*(m) = 1), 0) -# define mutex_trylock(m) (*(m) ? 1 : ((*(m) = 1), 0)) -# define mutex_unlock(m) (*(m) = 0) - -typedef void *tsd_key_t; -# define tsd_key_create(key, destr) do {} while(0) -# define tsd_setspecific(key, data) ((key) = (data)) -# define tsd_getspecific(key, vptr) (vptr = (key)) - -# define thread_atfork(prepare, parent, child) do {} while(0) - -#endif /* !defined mutex_init */ - -#ifndef atomic_full_barrier -# define atomic_full_barrier() __asm ("" ::: "memory") -#endif - -#ifndef atomic_read_barrier -# define atomic_read_barrier() atomic_full_barrier () -#endif - -#ifndef atomic_write_barrier -# define atomic_write_barrier() atomic_full_barrier () -#endif - -#ifndef DEFAULT_TOP_PAD -# define DEFAULT_TOP_PAD 131072 -#endif - -#endif /* !defined(_GENERIC_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/generic/thread-st.h b/opal/mca/memory/linux/sysdeps/generic/thread-st.h deleted file mode 100644 index 0243774b7c..0000000000 --- a/opal/mca/memory/linux/sysdeps/generic/thread-st.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * $Id:$ - * Generic version: no threads. - * by Wolfram Gloger 2004 - */ - -#include - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - int id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("No threads.\n"); -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - st->id = 1; - st->func(st); - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - for(i=0; i. - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef _PTHREAD_MALLOC_MACHINE_H -#define _PTHREAD_MALLOC_MACHINE_H - -#include - -#undef thread_atfork_static - -/* Use fast inline spinlocks with gcc. */ -#if (defined __i386__ || defined __x86_64__) && defined __GNUC__ && \ - !defined USE_NO_SPINLOCKS - -#include -#include - -typedef struct { - volatile unsigned int lock; - int pad0_; -} mutex_t; - -#define MUTEX_INITIALIZER { 0 } -#define mutex_init(m) ((m)->lock = 0) -static inline int mutex_lock(mutex_t *m) { - int cnt = 0, r; - struct timespec tm; - - for(;;) { - __asm__ __volatile__ - ("xchgl %0, %1" - : "=r"(r), "=m"(m->lock) - : "0"(1), "m"(m->lock) - : "memory"); - if(!r) - return 0; - if(cnt < 50) { - sched_yield(); - cnt++; - } else { - tm.tv_sec = 0; - tm.tv_nsec = 2000001; - nanosleep(&tm, NULL); - cnt = 0; - } - } -} -static inline int mutex_trylock(mutex_t *m) { - int r; - - __asm__ __volatile__ - ("xchgl %0, %1" - : "=r"(r), "=m"(m->lock) - : "0"(1), "m"(m->lock) - : "memory"); - return r; -} -static inline int mutex_unlock(mutex_t *m) { - m->lock = 0; - __asm __volatile__ ("" : : : "memory"); - return 0; -} - -#else - -/* Normal pthread mutex. */ -typedef pthread_mutex_t mutex_t; - -#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -#define mutex_init(m) pthread_mutex_init(m, NULL) -#define mutex_lock(m) pthread_mutex_lock(m) -#define mutex_trylock(m) pthread_mutex_trylock(m) -#define mutex_unlock(m) pthread_mutex_unlock(m) - -#endif /* (__i386__ || __x86_64__) && __GNUC__ && !USE_NO_SPINLOCKS */ - -/* thread specific data */ -#if defined(__sgi) || defined(USE_TSD_DATA_HACK) - -/* Hack for thread-specific data, e.g. on Irix 6.x. We can't use - pthread_setspecific because that function calls malloc() itself. - The hack only works when pthread_t can be converted to an integral - type. */ - -typedef void *tsd_key_t[256]; -#define tsd_key_create(key, destr) do { \ - int i; \ - for(i=0; i<256; i++) (*key)[i] = 0; \ -} while(0) -#define tsd_setspecific(key, data) \ - (key[(unsigned)pthread_self() % 256] = (data)) -#define tsd_getspecific(key, vptr) \ - (vptr = key[(unsigned)pthread_self() % 256]) - -#else - -typedef pthread_key_t tsd_key_t; - -#define tsd_key_create(key, destr) pthread_key_create(key, destr) -#define tsd_setspecific(key, data) pthread_setspecific(key, data) -#define tsd_getspecific(key, vptr) (vptr = pthread_getspecific(key)) - -#endif - -/* at fork */ -#define thread_atfork(prepare, parent, child) \ - pthread_atfork(prepare, parent, child) - -#include - -#endif /* !defined(_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/pthread/thread-st.h b/opal/mca/memory/linux/sysdeps/pthread/thread-st.h deleted file mode 100644 index f97a0a3552..0000000000 --- a/opal/mca/memory/linux/sysdeps/pthread/thread-st.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * $Id: thread-st.h$ - * pthread version - * by Wolfram Gloger 2004 - */ - -#include -#include - -pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER; -pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER; - -#ifndef USE_PTHREADS_STACKS -#define USE_PTHREADS_STACKS 0 -#endif - -#ifndef STACKSIZE -#define STACKSIZE 32768 -#endif - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - pthread_t id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("Using posix threads.\n"); - pthread_cond_init(&finish_cond, NULL); - pthread_mutex_init(&finish_mutex, NULL); -} - -static void * -thread_wrapper(void *ptr) -{ - struct thread_st *st = (struct thread_st*)ptr; - - /*printf("begin %p\n", st->sp);*/ - st->func(st); - pthread_mutex_lock(&finish_mutex); - st->flags = 1; - pthread_mutex_unlock(&finish_mutex); - pthread_cond_signal(&finish_cond); - /*printf("end %p\n", st->sp);*/ - return NULL; -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - { - pthread_attr_t* attr_p = 0; -#if USE_PTHREADS_STACKS - pthread_attr_t attr; - - pthread_attr_init (&attr); - if(!st->sp) - st->sp = malloc(STACKSIZE+16); - if(!st->sp) - return -1; - if(pthread_attr_setstacksize(&attr, STACKSIZE)) - fprintf(stderr, "error setting stacksize"); - else - pthread_attr_setstackaddr(&attr, st->sp + STACKSIZE); - /*printf("create %p\n", st->sp);*/ - attr_p = &attr; -#endif - return pthread_create(&st->id, attr_p, thread_wrapper, st); - } - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - - pthread_mutex_lock(&finish_mutex); - for(;;) { - int term = 0; - for(i=0; i 0) - break; - pthread_cond_wait(&finish_cond, &finish_mutex); - } - pthread_mutex_unlock(&finish_mutex); -} - -/* - * Local variables: - * tab-width: 4 - * End: - */ diff --git a/opal/mca/memory/linux/sysdeps/solaris/malloc-machine.h b/opal/mca/memory/linux/sysdeps/solaris/malloc-machine.h deleted file mode 100644 index 00e33b0801..0000000000 --- a/opal/mca/memory/linux/sysdeps/solaris/malloc-machine.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Basic platform-independent macro definitions for mutexes, - thread-specific data and parameters for malloc. - Solaris threads version. - Copyright (C) 2004 Wolfram Gloger . - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef _SOLARIS_MALLOC_MACHINE_H -#define _SOLARIS_MALLOC_MACHINE_H - -#include - -typedef thread_t thread_id; - -#define MUTEX_INITIALIZER { 0 } -#define mutex_init(m) mutex_init(m, USYNC_THREAD, NULL) - -/* - * Hack for thread-specific data on Solaris. We can't use thr_setspecific - * because that function calls malloc() itself. - */ -typedef void *tsd_key_t[256]; -#define tsd_key_create(key, destr) do { \ - int i; \ - for(i=0; i<256; i++) (*key)[i] = 0; \ -} while(0) -#define tsd_setspecific(key, data) (key[(unsigned)thr_self() % 256] = (data)) -#define tsd_getspecific(key, vptr) (vptr = key[(unsigned)thr_self() % 256]) - -#define thread_atfork(prepare, parent, child) do {} while(0) - -#include - -#endif /* !defined(_SOLARIS_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/solaris/thread-st.h b/opal/mca/memory/linux/sysdeps/solaris/thread-st.h deleted file mode 100644 index dbb4b097e8..0000000000 --- a/opal/mca/memory/linux/sysdeps/solaris/thread-st.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * $Id:$ - * Solaris version - * by Wolfram Gloger 2004 - */ - -#include -#include - -#ifndef STACKSIZE -#define STACKSIZE 32768 -#endif - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - thread_id id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("Using Solaris threads.\n"); -} - -static void * -thread_wrapper(void *ptr) -{ - struct thread_st *st = (struct thread_st*)ptr; - - /*printf("begin %p\n", st->sp);*/ - st->func(st); - /*printf("end %p\n", st->sp);*/ - return NULL; -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - if(!st->sp) - st->sp = malloc(STACKSIZE); - if(!st->sp) return -1; - thr_create(st->sp, STACKSIZE, thread_wrapper, st, THR_NEW_LWP, &st->id); - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - thread_t id; - - thr_join(0, &id, NULL); - for(i=0; i. - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that (i) the above copyright notices and this permission -notice appear in all copies of the software and related documentation, -and (ii) the name of Wolfram Gloger may not be used in any advertising -or publicity relating to the software. - -THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, -EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY -WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, -INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY -DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY -OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef _SPROC_MALLOC_MACHINE_H -#define _SPROC_MALLOC_MACHINE_H - -#include -#include -#include -#include - -typedef abilock_t mutex_t; - -#define MUTEX_INITIALIZER { 0 } -#define mutex_init(m) init_lock(m) -#define mutex_lock(m) (spin_lock(m), 0) -#define mutex_trylock(m) acquire_lock(m) -#define mutex_unlock(m) release_lock(m) - -typedef int tsd_key_t; -int tsd_key_next; -#define tsd_key_create(key, destr) ((*key) = tsd_key_next++) -#define tsd_setspecific(key, data) (((void **)(&PRDA->usr_prda))[key] = data) -#define tsd_getspecific(key, vptr) (vptr = ((void **)(&PRDA->usr_prda))[key]) - -#define thread_atfork(prepare, parent, child) do {} while(0) - -#include - -#endif /* !defined(_SPROC_MALLOC_MACHINE_H) */ diff --git a/opal/mca/memory/linux/sysdeps/sproc/thread-st.h b/opal/mca/memory/linux/sysdeps/sproc/thread-st.h deleted file mode 100644 index a512f92130..0000000000 --- a/opal/mca/memory/linux/sysdeps/sproc/thread-st.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * $Id:$ - * sproc version - * by Wolfram Gloger 2001, 2004 - */ - -#include -#include -#include - -#ifndef STACKSIZE -#define STACKSIZE 32768 -#endif - -struct thread_st { - char *sp; /* stack pointer, can be 0 */ - void (*func)(struct thread_st* st); /* must be set by user */ - thread_id id; - int flags; - struct user_data u; -}; - -static void -thread_init(void) -{ - printf("Using sproc() threads.\n"); -} - -static void -thread_wrapper(void *ptr, size_t stack_len) -{ - struct thread_st *st = (struct thread_st*)ptr; - - /*printf("begin %p\n", st->sp);*/ - st->func(st); - /*printf("end %p\n", st->sp);*/ -} - -/* Create a thread. */ -static int -thread_create(struct thread_st *st) -{ - st->flags = 0; - if(!st->sp) - st->sp = malloc(STACKSIZE); - if(!st->sp) return -1; - st->id = sprocsp(thread_wrapper, PR_SALL, st, st->sp+STACKSIZE, STACKSIZE); - if(st->id < 0) { - return -1; - } - return 0; -} - -/* Wait for one of several subthreads to finish. */ -static void -wait_for_thread(struct thread_st st[], int n_thr, - int (*end_thr)(struct thread_st*)) -{ - int i; - int id; - - int status = 0; - id = wait(&status); - if(status != 0) { - if(WIFSIGNALED(status)) - printf("thread %id terminated by signal %d\n", - id, WTERMSIG(status)); - else - printf("thread %id exited with status %d\n", - id, WEXITSTATUS(status)); - } - for(i=0; i 0 - -static void -mem_init(unsigned char *ptr, unsigned long size) -{ - unsigned long i, j; - - if(size == 0) return; - for(i=0; i>8)) & 0xFF); - } - j = (unsigned long)ptr ^ (size-1); - ptr[size-1] = ((j ^ (j>>8)) & 0xFF); -} - -static int -mem_check(unsigned char *ptr, unsigned long size) -{ - unsigned long i, j; - - if(size == 0) return 0; - for(i=0; i>8)) & 0xFF)) return 1; - } - j = (unsigned long)ptr ^ (size-1); - if(ptr[size-1] != ((j ^ (j>>8)) & 0xFF)) return 2; - return 0; -} - -static int -zero_check(unsigned* ptr, unsigned long size) -{ - unsigned char* ptr2; - - while(size >= sizeof(*ptr)) { - if(*ptr++ != 0) - return -1; - size -= sizeof(*ptr); - } - ptr2 = (unsigned char*)ptr; - while(size > 0) { - if(*ptr2++ != 0) - return -1; - --size; - } - return 0; -} - -#endif /* TEST > 0 */ - -/* Allocate a bin with malloc(), realloc() or memalign(). r must be a - random number >= 1024. */ - -static void -bin_alloc(struct bin *m, unsigned long size, int r) -{ -#if TEST > 0 - if(mem_check(m->ptr, m->size)) { - printf("memory corrupt!\n"); - exit(1); - } -#endif - r %= 1024; - /*printf("%d ", r);*/ - if(r < 4) { /* memalign */ - if(m->size > 0) free(m->ptr); - m->ptr = (unsigned char *)memalign(sizeof(int) << r, size); - } else if(r < 20) { /* calloc */ - if(m->size > 0) free(m->ptr); - m->ptr = (unsigned char *)calloc(size, 1); -#if TEST > 0 - if(zero_check((unsigned*)m->ptr, size)) { - long i; - for(i=0; iptr[i] != 0) - break; - printf("calloc'ed memory non-zero (ptr=%p, i=%ld)!\n", m->ptr, i); - exit(1); - } -#endif - } else if(r < 100 && m->size < REALLOC_MAX) { /* realloc */ - if(m->size == 0) m->ptr = NULL; - m->ptr = realloc(m->ptr, size); - } else { /* plain malloc */ - if(m->size > 0) free(m->ptr); - m->ptr = (unsigned char *)malloc(size); - } - if(!m->ptr) { - printf("out of memory (r=%d, size=%ld)!\n", r, (long)size); - exit(1); - } - m->size = size; -#if TEST > 0 - mem_init(m->ptr, m->size); -#endif -} - -/* Free a bin. */ - -static void -bin_free(struct bin *m) -{ - if(m->size == 0) return; -#if TEST > 0 - if(mem_check(m->ptr, m->size)) { - printf("memory corrupt!\n"); - exit(1); - } -#endif - free(m->ptr); - m->size = 0; -} - -/* - * Local variables: - * tab-width: 4 - * End: - */ diff --git a/opal/mca/memory/linux/t-test1.c b/opal/mca/memory/linux/t-test1.c deleted file mode 100644 index 15dc7c6aae..0000000000 --- a/opal/mca/memory/linux/t-test1.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * $Id: t-test1.c,v 1.2 2004/11/04 14:58:45 wg Exp $ - * by Wolfram Gloger 1996-1999, 2001, 2004 - * A multi-thread test for malloc performance, maintaining one pool of - * allocated bins per thread. - */ - -#if (defined __STDC__ && __STDC__) || defined __cplusplus -# include -#endif -#include -#include -#include -#include -#include -#include -#include - -#if !USE_MALLOC -#include -#else -#include "malloc.h" -#endif - -#include "lran2.h" -#include "t-test.h" - -struct user_data { - int bins, max; - unsigned long size; - long seed; -}; -#include "thread-st.h" - -#define N_TOTAL 10 -#ifndef N_THREADS -#define N_THREADS 2 -#endif -#ifndef N_TOTAL_PRINT -#define N_TOTAL_PRINT 50 -#endif -#ifndef MEMORY -#define MEMORY 8000000l -#endif -#define SIZE 10000 -#define I_MAX 10000 -#define ACTIONS_MAX 30 -#ifndef TEST_FORK -#define TEST_FORK 0 -#endif - -#define RANDOM(d,s) (lran2(d) % (s)) - -struct bin_info { - struct bin *m; - unsigned long size, bins; -}; - -#if TEST > 0 - -void -bin_test(struct bin_info *p) -{ - int b; - - for(b=0; bbins; b++) { - if(mem_check(p->m[b].ptr, p->m[b].size)) { - printf("memory corrupt!\n"); - abort(); - } - } -} - -#endif - -void -malloc_test(struct thread_st *st) -{ - int b, i, j, actions, pid = 1; - struct bin_info p; - struct lran2_st ld; /* data for random number generator */ - - lran2_init(&ld, st->u.seed); -#if TEST_FORK>0 - if(RANDOM(&ld, TEST_FORK) == 0) { - int status; - -#if !USE_THR - pid = fork(); -#else - pid = fork1(); -#endif - if(pid > 0) { - /*printf("forked, waiting for %d...\n", pid);*/ - waitpid(pid, &status, 0); - printf("done with %d...\n", pid); - if(!WIFEXITED(status)) { - printf("child term with signal %d\n", WTERMSIG(status)); - exit(1); - } - return; - } - exit(0); - } -#endif - p.m = (struct bin *)malloc(st->u.bins*sizeof(*p.m)); - p.bins = st->u.bins; - p.size = st->u.size; - for(b=0; bu.max;) { -#if TEST > 1 - bin_test(&p); -#endif - actions = RANDOM(&ld, ACTIONS_MAX); -#if USE_MALLOC && MALLOC_DEBUG - if(actions < 2) { mallinfo(); } -#endif - for(j=0; j 2 - bin_test(&p); -#endif - } -#if 0 /* Test illegal free()s while setting MALLOC_CHECK_ */ - for(j=0; j<8; j++) { - b = RANDOM(&ld, p.bins); - if(p.m[b].ptr) { - int offset = (RANDOM(&ld, 11) - 5)*8; - char *rogue = (char*)(p.m[b].ptr) + offset; - /*printf("p=%p rogue=%p\n", p.m[b].ptr, rogue);*/ - free(rogue); - } - } -#endif - i += actions; - } - for(b=0; bid); -#endif - if(n_total >= n_total_max) { - n_running--; - } else if(st->u.seed++, thread_create(st)) { - printf("Creating thread #%d failed.\n", n_total); - } else { - n_total++; - if(n_total%N_TOTAL_PRINT == 0) - printf("n_total = %d\n", n_total); - } - return 0; -} - -#if 0 -/* Protect address space for allocation of n threads by LinuxThreads. */ -static void -protect_stack(int n) -{ - char buf[2048*1024]; - char* guard; - size_t guard_size = 2*2048*1024UL*(n+2); - - buf[0] = '\0'; - guard = (char*)(((unsigned long)buf - 4096)& ~4095UL) - guard_size; - printf("Setting up stack guard at %p\n", guard); - if(mmap(guard, guard_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, - -1, 0) - != guard) - printf("failed!\n"); -} -#endif - -int -main(int argc, char *argv[]) -{ - int i, bins; - int n_thr=N_THREADS; - int i_max=I_MAX; - unsigned long size=SIZE; - struct thread_st *st; - -#if USE_MALLOC && USE_STARTER==2 - ptmalloc_init(); - printf("ptmalloc_init\n"); -#endif - - if(argc > 1) n_total_max = atoi(argv[1]); - if(n_total_max < 1) n_thr = 1; - if(argc > 2) n_thr = atoi(argv[2]); - if(n_thr < 1) n_thr = 1; - if(n_thr > 100) n_thr = 100; - if(argc > 3) i_max = atoi(argv[3]); - - if(argc > 4) size = atol(argv[4]); - if(size < 2) size = 2; - - bins = MEMORY/(size*n_thr); - if(argc > 5) bins = atoi(argv[5]); - if(bins < 4) bins = 4; - - /*protect_stack(n_thr);*/ - - thread_init(); - printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n", - n_total_max, n_thr, i_max, size, bins); - - st = (struct thread_st *)malloc(n_thr*sizeof(*st)); - if(!st) exit(-1); - -#if !defined NO_THREADS && (defined __sun__ || defined sun) - /* I know of no other way to achieve proper concurrency with Solaris. */ - thr_setconcurrency(n_thr); -#endif - - /* Start all n_thr threads. */ - for(i=0; i0;) { - wait_for_thread(st, n_thr, my_end_thread); - } - for(i=0; i -#endif -#include -#include -#include -#include -#include -#include - -#if !USE_MALLOC -#include -#else -#include "malloc.h" -#endif - -#include "lran2.h" -#include "t-test.h" - -struct user_data { - int max; - unsigned long size; - long seed; -}; -#include "thread-st.h" -#include "malloc-machine.h" /* for mutex */ - -#define N_TOTAL 10 -#ifndef N_THREADS -#define N_THREADS 2 -#endif -#ifndef N_TOTAL_PRINT -#define N_TOTAL_PRINT 50 -#endif -#define STACKSIZE 32768 -#ifndef MEMORY -#define MEMORY 8000000l -#endif -#define SIZE 10000 -#define I_MAX 10000 -#define BINS_PER_BLOCK 20 - -#define RANDOM(d,s) (lran2(d) % (s)) - -struct block { - struct bin b[BINS_PER_BLOCK]; - mutex_t mutex; -} *blocks; - -int n_blocks; - -#if TEST > 0 - -void -bin_test(void) -{ - int b, i; - - for(b=0; bu.seed); - for(i=0; i<=st->u.max;) { -#if TEST > 1 - bin_test(); -#endif - bl = &blocks[RANDOM(&ld, n_blocks)]; - r = RANDOM(&ld, 1024); - if(r < 200) { /* free only */ - mutex_lock(&bl->mutex); - for(b=0; bb[b]); - mutex_unlock(&bl->mutex); - i += BINS_PER_BLOCK; - } else { /* alloc/realloc */ - /* Generate random numbers in advance. */ - for(b=0; bu.size) + 1; - rnum[b] = lran2(&ld); - } - mutex_lock(&bl->mutex); - for(b=0; bb[b], rsize[b], rnum[b]); - mutex_unlock(&bl->mutex); - i += BINS_PER_BLOCK; - } -#if TEST > 2 - bin_test(); -#endif - } -} - -int n_total=0, n_total_max=N_TOTAL, n_running; - -int -my_end_thread(struct thread_st *st) -{ - /* Thread st has finished. Start a new one. */ -#if 0 - printf("Thread %lx terminated.\n", (long)st->id); -#endif - if(n_total >= n_total_max) { - n_running--; - } else if(st->u.seed++, thread_create(st)) { - printf("Creating thread #%d failed.\n", n_total); - } else { - n_total++; - if(n_total%N_TOTAL_PRINT == 0) - printf("n_total = %d\n", n_total); - } - return 0; -} - -int -main(int argc, char *argv[]) -{ - int i, j, bins; - int n_thr=N_THREADS; - int i_max=I_MAX; - unsigned long size=SIZE; - struct thread_st *st; - -#if USE_MALLOC && USE_STARTER==2 - ptmalloc_init(); - printf("ptmalloc_init\n"); -#endif - - if(argc > 1) n_total_max = atoi(argv[1]); - if(n_total_max < 1) n_thr = 1; - if(argc > 2) n_thr = atoi(argv[2]); - if(n_thr < 1) n_thr = 1; - if(n_thr > 100) n_thr = 100; - if(argc > 3) i_max = atoi(argv[3]); - - if(argc > 4) size = atol(argv[4]); - if(size < 2) size = 2; - - bins = MEMORY/size; - if(argc > 5) bins = atoi(argv[5]); - if(bins < BINS_PER_BLOCK) bins = BINS_PER_BLOCK; - - n_blocks = bins/BINS_PER_BLOCK; - blocks = (struct block *)malloc(n_blocks*sizeof(*blocks)); - if(!blocks) - exit(1); - - thread_init(); - printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n", - n_total_max, n_thr, i_max, size, n_blocks*BINS_PER_BLOCK); - - for(i=0; i0;) { - wait_for_thread(st, n_thr, my_end_thread); - } - - for(i=0; i, 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#include -#include "malloc.h" - -static int errors = 0; - -static void -merror (const char *msg) -{ - ++errors; - printf ("Error: %s\n", msg); -} - -int -main (void) -{ - void *p1, *p2; - void *save_state; - long i; - - errno = 0; - - p1 = malloc (10); - if (p1 == NULL) - merror ("malloc (10) failed."); - - p2 = malloc (20); - if (p2 == NULL) - merror ("malloc (20) failed."); - - free (malloc (10)); - - for (i=0; i<100; ++i) - { - save_state = malloc_get_state (); - if (save_state == NULL) - { - merror ("malloc_get_state () failed."); - break; - } - /*free (malloc (10)); This could change the top chunk! */ - malloc_set_state (save_state); - p1 = realloc (p1, i*4 + 4); - if (p1 == NULL) - merror ("realloc (i*4) failed."); - free (save_state); - } - - p1 = realloc (p1, 40); - free (p2); - p2 = malloc (10); - if (p2 == NULL) - merror ("malloc (10) failed."); - free (p1); - - return errors != 0; -} - -/* - * Local variables: - * c-basic-offset: 2 - * End: - */ diff --git a/opal/mca/memory/linux/tst-mstats.c b/opal/mca/memory/linux/tst-mstats.c deleted file mode 100644 index 5d84312841..0000000000 --- a/opal/mca/memory/linux/tst-mstats.c +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright (C) 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Wolfram Gloger , 2004. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#include -#include "malloc.h" - -static int errors = 0; - -static void -merror (const char *msg) -{ - ++errors; - printf ("Error: %s\n", msg); -} - -int -main (void) -{ - void *p1, *p2; - long i; - mstate a; - struct malloc_arena_info mai; - int nfree; - unsigned long navail; - - errno = 0; - - malloc_stats(); /* check that it works even without initialization */ - a = _int_get_arena(0); - if (!a) { - merror ("Can't get main arena."); - return 1; - } - free (malloc (10)); - _int_get_arena_info(a, &mai); - printf("nfree = %d\navail = %lu\nfastavail = %lu\ntop_size = %lu\n", - mai.nbinblocks + mai.nfastblocks, - (unsigned long)mai.binavail, - (unsigned long)mai.fastavail, - (unsigned long)mai.top_size); - if (mai.nfastblocks+mai.nbinblocks < 1) - merror ("initial _int_get_arena_info() failed."); - nfree = mai.nbinblocks + mai.nfastblocks; - navail = mai.binavail + mai.fastavail; - - p1 = malloc (10); - if (p1 == NULL) - merror ("malloc (10) failed."); - p2 = malloc (30); - if (p2 == NULL) - merror ("malloc (30) failed."); - - free (malloc (10)); - - for (i=0; i<100; ++i) - { - p1 = realloc (p1, i*7 + 3); - if (p1 == NULL) - merror ("realloc (i*7 + 3) failed."); - } - free (p2); - - _int_get_arena_info(a, &mai); - printf("nfree = %d\navail = %lu\nfastavail = %lu\ntop_size = %lu\n", - mai.nbinblocks + mai.nfastblocks, - (unsigned long)mai.binavail, - (unsigned long)mai.fastavail, - (unsigned long)mai.top_size); - /* Assume that no memory is returned to the system from these small - chunks. */ - if (mai.nbinblocks+mai.nfastblocks < nfree || - mai.binavail+mai.fastavail < navail) - merror ("final _int_get_arena_info() failed."); - malloc_stats(); - - return errors != 0; -} - -/* - * Local variables: - * c-basic-offset: 2 - * End: - */