From 2541f15880c5c193a5eb5db9524c85d149a019ad Mon Sep 17 00:00:00 2001
From: Brian Barrett <brbarret@open-mpi.org>
Date: Tue, 9 Aug 2005 19:39:39 +0000
Subject: [PATCH] copy ptmalloc2 release into the main branch

This commit was SVN r6787.
---
 opal/memory/ptmalloc2/COPYRIGHT               |   19 +
 opal/memory/ptmalloc2/ChangeLog               |  181 +
 opal/memory/ptmalloc2/Makefile                |  218 +
 opal/memory/ptmalloc2/README                  |  192 +
 opal/memory/ptmalloc2/arena.c                 |  800 +++
 opal/memory/ptmalloc2/hooks.c                 |  640 ++
 opal/memory/ptmalloc2/lran2.h                 |   51 +
 opal/memory/ptmalloc2/malloc-stats.c          |  161 +
 opal/memory/ptmalloc2/malloc.c                | 5439 +++++++++++++++++
 opal/memory/ptmalloc2/malloc.h                |  291 +
 .../memory/ptmalloc2/sysdeps/generic/atomic.h |    1 +
 .../sysdeps/generic/malloc-machine.h          |   68 +
 .../ptmalloc2/sysdeps/generic/thread-st.h     |   48 +
 .../sysdeps/pthread/malloc-machine.h          |  132 +
 .../ptmalloc2/sysdeps/pthread/thread-st.h     |  111 +
 .../sysdeps/solaris/malloc-machine.h          |   51 +
 .../ptmalloc2/sysdeps/solaris/thread-st.h     |   72 +
 .../ptmalloc2/sysdeps/sproc/malloc-machine.h  |   51 +
 .../ptmalloc2/sysdeps/sproc/thread-st.h       |   84 +
 opal/memory/ptmalloc2/t-test.h                |  143 +
 opal/memory/ptmalloc2/t-test1.c               |  285 +
 opal/memory/ptmalloc2/t-test2.c               |  231 +
 opal/memory/ptmalloc2/tst-mallocstate.c       |   82 +
 opal/memory/ptmalloc2/tst-mstats.c            |  100 +
 24 files changed, 9451 insertions(+)
 create mode 100644 opal/memory/ptmalloc2/COPYRIGHT
 create mode 100644 opal/memory/ptmalloc2/ChangeLog
 create mode 100644 opal/memory/ptmalloc2/Makefile
 create mode 100644 opal/memory/ptmalloc2/README
 create mode 100644 opal/memory/ptmalloc2/arena.c
 create mode 100644 opal/memory/ptmalloc2/hooks.c
 create mode 100644 opal/memory/ptmalloc2/lran2.h
 create mode 100644 opal/memory/ptmalloc2/malloc-stats.c
 create mode 100644 opal/memory/ptmalloc2/malloc.c
 create mode 100644 opal/memory/ptmalloc2/malloc.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/generic/atomic.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/generic/malloc-machine.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/generic/thread-st.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/pthread/malloc-machine.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/pthread/thread-st.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/solaris/malloc-machine.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/solaris/thread-st.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/sproc/malloc-machine.h
 create mode 100644 opal/memory/ptmalloc2/sysdeps/sproc/thread-st.h
 create mode 100644 opal/memory/ptmalloc2/t-test.h
 create mode 100644 opal/memory/ptmalloc2/t-test1.c
 create mode 100644 opal/memory/ptmalloc2/t-test2.c
 create mode 100644 opal/memory/ptmalloc2/tst-mallocstate.c
 create mode 100644 opal/memory/ptmalloc2/tst-mstats.c

diff --git a/opal/memory/ptmalloc2/COPYRIGHT b/opal/memory/ptmalloc2/COPYRIGHT
new file mode 100644
index 0000000000..4615c2637c
--- /dev/null
+++ b/opal/memory/ptmalloc2/COPYRIGHT
@@ -0,0 +1,19 @@
+Copyright (c) 2001-2004 Wolfram Gloger
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that (i) the above copyright notices and this permission
+notice appear in all copies of the software and related documentation,
+and (ii) the name of Wolfram Gloger may not be used in any advertising
+or publicity relating to the software.
+
+THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
+INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
+OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
diff --git a/opal/memory/ptmalloc2/ChangeLog b/opal/memory/ptmalloc2/ChangeLog
new file mode 100644
index 0000000000..b4db8b65a6
--- /dev/null
+++ b/opal/memory/ptmalloc2/ChangeLog
@@ -0,0 +1,181 @@
+2004-11-05  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/hooks.c (malloc_starter, memalign_starter): Call
+	ptmalloc_init_minimal().
+
+2004-11-04  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c (USE_STARTER): New macro.
+	* malloc/hooks.c: Use USE_STARTER.
+	* malloc/arena.c: Use USE_STARTER.
+
+2004-08-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c: Use strong_alias instead of weak_alias wherever
+	possible.
+
+2002-12-06  Roland McGrath  <roland@redhat.com>
+
+	* malloc/arena.c (ptmalloc_init_minimal): New function, broken out
+	of ptmalloc_init.
+
+2002-08-23  Roland McGrath  <roland@redhat.com>
+
+	* malloc/hooks.c (__malloc_initialize_hook, __free_hook,
+	__malloc_hook, __realloc_hook, __memalign_hook,
+	__after_morecore_hook): Variable definitions moved to ...
+	* malloc/malloc.c: ... here, so as to be before all references.
+
+2004-10-19  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/hooks.c (mem2chunk_check, top_check): Handle
+	non-contiguous arena.  Reported by Michael Dalton
+	<mwdalton@stanford.edu> [BZ #457].  Add further checks for top
+	chunk.
+
+2004-08-08  Wolfram Gloger  <wg@malloc.de>
+
+	* include/malloc.h (mstate): Move type declaration from here...
+	* malloc/malloc.h: ...to here.
+	(struct malloc_arena_info, struct malloc_global_info): New types.
+	(_int_get_arena, _int_get_arena_info, _int_get_global_info): New
+	functions.
+	* malloc/malloc.c (mSTATS, public_mSTATs, mALLINFo): Remove.
+	(_int_get_arena_info, _int_get_global_info): New functions.
+	* malloc/arena.c (_int_get_arena): New function.
+	* malloc/malloc-stats.c: New file.
+	* malloc/tst-mstats.c: New file.
+	* malloc/Makefile (tests): Add tst-mstats.
+	(distribute): Remove no-longer existing thread-m.h.
+	(dist-routines): Add malloc-stats.
+	* malloc/Versions: Add _int_get_arena, _int_get_arena_info,
+	_int_get_global_info.
+	
+2004-07-25  Wolfram Gloger  <wg@malloc.de>
+
+	* sysdeps/generic/thread-st.h: New file.
+	* sysdeps/pthread/thread-st.h: New file.
+	* sysdeps/sproc/thread-st.h: New file.
+	* sysdeps/solaris/thread-st.h: New file.
+	* thread-st.h: Removed.
+
+2004-03-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (__posix_memalign): Correct alignment check.
+	Reported by Don Heller <dheller@cse.psu.edu>.
+
+2003-12-17  Jakub Jelinek  <jakub@redhat.com>
+
+	* malloc/malloc.c (__posix_memalign): If __memalign_hook != NULL,
+	call it directly instead of memalign_internal.
+
+2003-09-27  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c: Include <malloc-machine.h> earlier instead of
+	"thread-m.h", so that default parameters can be overridden in a
+	system-specific malloc-machine.h.  Remove extra ; from extern "C"
+	closing brace.
+	* sysdeps/generic/malloc-machine.h: New file.
+	* malloc/thread-m.h: Removed.
+
+2003-09-08  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c (sYSMALLOc): Move foreign sbrk accounting into
+	contiguous case.  Bug report from Prem Gopalan
+	<prem@mazunetworks.com>.
+
+2003-08-18  Art Haas  <ahaas@airmail.net>
+
+	* malloc/malloc.h: Remove unneeded ';' where closing the C++
+	extern block.
+
+2003-06-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (public_mALLINFo): Initialize malloc if it
+	hasn't happened yet.
+
+2003-05-28  Roland McGrath  <roland@redhat.com>
+
+	* malloc/malloc.h [! __GNUC__] (__const): Define if undefined.
+
+2003-05-04  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* malloc/arena.c (arena_get2): Add atomic_write_barrier.
+	* malloc/thread-m.h: Include <atomic.h>.
+	(atomic_full_barrier): Provide default.
+	(atomic_read_barrier): Likewise.
+	(atomic_write_barrier): Likewise.
+
+2003-05-01  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (mSTATs): Call ptmalloc_init if necessary.
+
+2003-01-27  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/hooks.c (mem2chunk_check): Check alignment of mem
+	pointer, not of the computed chunk.  Bug report from Carlos
+	O'Donell <carlos@baldric.uwo.ca>.
+
+2002-12-27  Jakub Jelinek  <jakub@redhat.com>
+
+	* malloc/arena.c (ptmalloc_init): Don't call next_env_entry if
+	_environ is NULL.
+
+2002-12-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (mALLOPt): Make sure malloc is initialized.
+
+2002-12-06  Roland McGrath  <roland@redhat.com>
+
+	* malloc/hooks.c [_LIBC && (USE___THREAD || (USE_TLS && !SHARED))]
+	(malloc_starter, memalign_starter, free_starter): Don't define these.
+
+	* malloc/hooks.c (memalign_starter): New function.
+	* malloc/malloc.c: Declare it.
+	* malloc/arena.c (save_memalign_hook): New variable.
+	(ptmalloc_init): Set __memalign_hook to memalign_starter.
+
+2002-11-18  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/arena.c
+	(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2): Do
+	nothing if not initialized.  Bug report from Marcus Brinkmann
+	<Marcus.Brinkmann@ruhr-uni-bochum.de>.
+
+2002-10-07  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c (sYSMALLOc): Only check for breakage due
+	to foreign sbrk()'s if arena is contiguous.  Bug report from
+	Bruno Haible <bruno@clisp.org>.
+
+2002-07-11  Wolfram Gloger  <wmglo@dent.med.uni-muenchen.de>
+
+	* malloc/hooks.c: typo fix in NO_THREADS case, realloc_check
+	fix in HAVE_MREMAP case.
+
+2002-06-11  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c: Fix error path when new_heap() returns NULL.
+	Reported by Michael Meissner <meissner@redhat.com>.
+
+2002-03-29  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c: Add short description and prototypes for
+	malloc_get_state, malloc_set_state and posix_memalign, for
+	consistency and to avoid warnings with -Wstrict-prototypes.
+	Reported by Andreas Jaeger <aj@suse.de>.
+
+2002-03-13  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c (sYSMALLOc): Don't change brk if mmap
+	failed.
+
+2002-01-18  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/malloc.c: Rewrite, adapted from Doug Lea's malloc-2.7.0.c.
+	* malloc/malloc.h: Likewise.
+	* malloc/arena.c: New file.
+	* malloc/hooks.c: New file.
+	* malloc/tst-mallocstate.c: New file.
+	* malloc/Makefile: Add new testcase tst-mallocstate.
+	Add arena.c and hooks.c to distribute.  Fix commented CPPFLAGS.
diff --git a/opal/memory/ptmalloc2/Makefile b/opal/memory/ptmalloc2/Makefile
new file mode 100644
index 0000000000..5080dd5196
--- /dev/null
+++ b/opal/memory/ptmalloc2/Makefile
@@ -0,0 +1,218 @@
+# Makefile for ptmalloc, version 2
+# by Wolfram Gloger 1996-1999, 2001, 2002, 2003, 2004
+
+DIST_FILES0 = ChangeLog malloc.h malloc.c arena.c hooks.c \
+ malloc-stats.c \
+ sysdeps \
+ tst-mallocstate.c tst-mstats.c
+DIST_FILES1 = COPYRIGHT README Makefile \
+ $(DIST_FILES0) \
+ lran2.h t-test.h t-test1.c t-test2.c \
+ #debian
+DIST_FILES2 = $(DIST_FILES1) \
+ Makefile.glibc glibc-include RCS/*,v
+# malloc-int.h 
+TAR_FLAGS = --numeric-owner --exclude "*~" --exclude "debian/tmp*"
+
+#CC = /pkg/gcc-2.95.2-wg/bin/gcc
+CC = cc
+
+SYS_FLAGS  =
+OPT_FLAGS  = -g -O # -O2
+WARN_FLAGS = #-Wall -Wstrict-prototypes
+SH_FLAGS   = -shared -fpic
+
+INC_FLAGS  = -Isysdeps/generic
+
+# Flags for the test programs
+T_FLAGS   = -DUSE_MALLOC=1 -DTEST=1
+
+# Flags for the compilation of malloc.c
+M_FLAGS   = -DTHREAD_STATS=1 #-DMALLOC_DEBUG=1
+
+# Thread flags.
+# See the platform-specific targets below.
+THR_FLAGS = -DUSE_TSD_DATA_HACK -D_REENTRANT
+THR_LIBS  = -lpthread
+
+RM        = rm -f
+AR        = ar
+RANLIB    = ranlib
+
+MALLOC_OBJ = malloc.o malloc-stats.o
+LIB_MALLOC = libmalloc.a
+
+T_SUF =
+TESTS = t-test1$(T_SUF) t-test2$(T_SUF) \
+        tst-mallocstate$(T_SUF) tst-mstats$(T_SUF)
+
+CFLAGS = $(SYS_FLAGS) $(OPT_FLAGS) $(WARN_FLAGS) $(THR_FLAGS) $(INC_FLAGS)
+
+.c.o:
+	$(CC) -c $(CFLAGS) $<
+
+all: $(LIB_MALLOC) $(TESTS)
+
+malloc.o: malloc.c malloc.h
+	$(CC) -c $(CFLAGS) $(M_FLAGS) $<
+
+malloc-stats.o: malloc-stats.c malloc.h
+	$(CC) -c $(CFLAGS) $(M_FLAGS) $<
+
+libmalloc.a: $(MALLOC_OBJ)
+	$(AR) cr $@ $(MALLOC_OBJ)
+	$(RANLIB) $@
+
+shared: malloc.so
+
+malloc.so: malloc.c malloc-stats.c malloc.h
+	$(CC) $(SH_FLAGS) $(CFLAGS) $(M_FLAGS) malloc.c malloc-stats.c -o $@
+
+again:
+	$(RM) $(TESTS)
+	$(MAKE) $(TESTS)
+
+clean:
+	$(RM) $(MALLOC_OBJ) libmalloc.a malloc.so $(TESTS) core core.[0-9]*
+
+t-test1$(T_SUF): t-test1.c t-test.h $(LIB_MALLOC)
+	$(CC) $(CFLAGS) $(T_FLAGS) t-test1.c $(LIB_MALLOC) $(THR_LIBS) -o $@
+
+t-test2$(T_SUF): t-test2.c t-test.h $(LIB_MALLOC)
+	$(CC) $(CFLAGS) $(T_FLAGS) t-test2.c $(LIB_MALLOC) $(THR_LIBS) -o $@
+
+tst-mallocstate$(T_SUF): tst-mallocstate.c $(LIB_MALLOC)
+	$(CC) $(CFLAGS) $(T_FLAGS) tst-mallocstate.c $(LIB_MALLOC) \
+	 $(THR_LIBS) -o $@
+
+tst-mstats$(T_SUF): tst-mstats.c $(LIB_MALLOC)
+	$(CC) $(CFLAGS) $(T_FLAGS) tst-mstats.c $(LIB_MALLOC) \
+	 $(THR_LIBS) -o $@
+
+############################################################################
+# Platform-specific targets. The ones ending in `-libc' are provided
+# to enable comparison with the standard malloc implementation from
+# the system's native C library.  The option USE_TSD_DATA_HACK is now
+# the default for pthreads systems, as most (Irix 6, Solaris 2) seem
+# to need it.  Try with USE_TSD_DATA_HACK undefined only if you're
+# confident that your systems's thread specific data functions do _not_
+# use malloc themselves.
+
+# posix threads with TSD data hack
+posix:
+	$(MAKE) THR_FLAGS='-DUSE_TSD_DATA_HACK -D_REENTRANT' \
+ OPT_FLAGS='$(OPT_FLAGS)' SYS_FLAGS='$(SYS_FLAGS)' CC='$(CC)' \
+ INC_FLAGS='-Isysdeps/pthread -Isysdeps/generic -I.'
+ THR_LIBS=-lpthread
+
+# posix threads with explicit initialization.  Known to be needed on HPUX.
+posix-explicit:
+	$(MAKE) THR_FLAGS='-D_REENTRANT -DUSE_TSD_DATA_HACK -DUSE_STARTER=2' \
+	 THR_LIBS=-lpthread \
+	 OPT_FLAGS='$(OPT_FLAGS)' SYS_FLAGS='$(SYS_FLAGS)' CC='$(CC)' \
+	 INC_FLAGS='-Isysdeps/pthread -Isysdeps/generic -I.' \
+	 M_FLAGS='$(M_FLAGS)'
+
+# posix threads without TSD data hack -- not known to work
+posix-with-tsd:
+	$(MAKE) THR_FLAGS='-D_REENTRANT' THR_LIBS=-lpthread \
+	INC_FLAGS='-Isysdeps/pthread -Isysdeps/generic -I.' \
+	M_FLAGS='$(M_FLAGS)'
+
+posix-libc:
+	$(MAKE) THR_FLAGS='-D_REENTRANT' THR_LIBS=-lpthread \
+	INC_FLAGS='-Isysdeps/pthread -Isysdeps/generic -I.' \
+	M_FLAGS='$(M_FLAGS)' LIB_MALLOC= T_FLAGS= T_SUF=-libc
+
+linux-pthread:
+	$(MAKE) SYS_FLAGS='-D_GNU_SOURCE=1' \
+ WARN_FLAGS='-Wall -Wstrict-prototypes' \
+ OPT_FLAGS='$(OPT_FLAGS)' THR_FLAGS='-DUSE_TSD_DATA_HACK' \
+ INC_FLAGS='-Isysdeps/pthread -Isysdeps/generic -I.' M_FLAGS='$(M_FLAGS)' \
+ TESTS='$(TESTS)'
+
+linux-malloc.so:
+	$(MAKE) SYS_FLAGS='-D_GNU_SOURCE=1' \
+ WARN_FLAGS='-Wall -Wstrict-prototypes' \
+ OPT_FLAGS='$(OPT_FLAGS)' THR_FLAGS='-DUSE_TSD_DATA_HACK' \
+ INC_FLAGS='-Isysdeps/pthread -Isysdeps/generic -I.' M_FLAGS='$(M_FLAGS)' \
+ malloc.so
+
+sproc:
+	$(MAKE) THR_FLAGS='' THR_LIBS='' OPT_FLAGS='$(OPT_FLAGS)' CC='$(CC)' \
+	INC_FLAGS='-Isysdeps/sproc -Isysdeps/generic -I.' \
+	M_FLAGS='$(M_FLAGS)'
+
+sproc-shared:
+	$(MAKE) THR_FLAGS='' THR_LIBS= \
+	 SH_FLAGS='-shared -check_registry /usr/lib/so_locations' \
+	 INC_FLAGS='-Isysdeps/sproc -Isysdeps/generic -I.' \
+	  LIB_MALLOC=malloc.so M_FLAGS='$(M_FLAGS)'
+
+sproc-libc:
+	$(MAKE) THR_FLAGS='1' THR_LIBS= LIB_MALLOC= T_FLAGS= \
+	 INC_FLAGS='-Isysdeps/sproc -Isysdeps/generic -I.' \
+	 T_SUF=-libc M_FLAGS='$(M_FLAGS)'
+
+solaris:
+	$(MAKE) THR_FLAGS='-D_REENTRANT' OPT_FLAGS='$(OPT_FLAGS)' \
+	 INC_FLAGS='-Isysdeps/solaris -Isysdeps/generic -I.' \
+	THR_LIBS=-lthread M_FLAGS='$(M_FLAGS)'
+
+solaris-libc:
+	$(MAKE) THR_FLAGS='-D_REENTRANT' OPT_FLAGS='$(OPT_FLAGS)' \
+	 INC_FLAGS='-Isysdeps/solaris -Isysdeps/generic -I.' \
+ THR_LIBS=-lthread LIB_MALLOC= T_FLAGS= T_SUF=-libc M_FLAGS='$(M_FLAGS)'
+
+nothreads:
+	$(MAKE) OPT_FLAGS='$(OPT_FLAGS)' SYS_FLAGS='$(SYS_FLAGS)' \
+	 INC_FLAGS='-Isysdeps/generic -I.' \
+ THR_FLAGS='' THR_LIBS='' M_FLAGS='$(M_FLAGS)'
+
+gcc-nothreads:
+	$(MAKE) CC='gcc' WARN_FLAGS='-Wall' OPT_FLAGS='$(OPT_FLAGS)' \
+	 INC_FLAGS='-Isysdeps/generic -I.' \
+ SYS_FLAGS='$(SYS_FLAGS)' THR_FLAGS='' THR_LIBS='' M_FLAGS='$(M_FLAGS)'
+
+linux-nothreads:
+	$(MAKE) CC='gcc' WARN_FLAGS='-Wall' OPT_FLAGS='$(OPT_FLAGS)' \
+	 INC_FLAGS='-Isysdeps/generic -I.' \
+ SYS_FLAGS='-D_GNU_SOURCE' THR_FLAGS='' THR_LIBS='' M_FLAGS='$(M_FLAGS)'
+
+# note: non-ANSI compilers are no longer considered important
+traditional:
+	$(MAKE) THR_FLAGS='' THR_LIBS='' CC='gcc -traditional'
+
+#glibc-test:
+#	$(MAKE) THR_FLAGS='-DUSE_PTHREADS=1 -D_LIBC' \
+# SYS_FLAGS='-D_GNU_SOURCE=1 ' \
+# WARN_FLAGS='-Wall -Wstrict-prototypes -Wbad-function-cast -Wmissing-noreturn -Wmissing-prototypes -Wmissing-declarations -Wcomment -Wcomments -Wtrigraphs -Wmultichar -Wstrict-prototypes -Winline' \
+# INC_FLAGS='-Iglibc-include -include glibc-include/libc-symbols.h' \
+# malloc.o && mv malloc.o malloc-glibc.o
+
+############################################################################
+
+check: $(TESTS)
+	./t-test1
+	./t-test2
+	./tst-mallocstate || echo "Test mallocstate failed!"
+	./tst-mstats || echo "Test mstats failed!"
+
+snap:
+	cd ..; tar $(TAR_FLAGS) -c -f - $(DIST_FILES1:%=ptmalloc2/%) | \
+	 gzip -9 >ptmalloc2-current.tar.gz
+
+dist:
+	cd ..; tar $(TAR_FLAGS) -c -f - $(DIST_FILES2:%=ptmalloc2/%) | \
+	 gzip -9 >ptmalloc2.tar.gz
+
+Makefile.glibc.diff: Makefile.glibc
+	-diff -u /mount/public/export/glibc/cvs/libc/malloc/Makefile \
+ Makefile.glibc >$@
+
+dist-glibc: Makefile.glibc.diff
+	tar cf - $(DIST_FILES0) Makefile.glibc.diff | \
+	 gzip -9 >../libc.malloc.tar.gz
+
+# dependencies
+malloc.o: arena.c hooks.c
diff --git a/opal/memory/ptmalloc2/README b/opal/memory/ptmalloc2/README
new file mode 100644
index 0000000000..fed6995134
--- /dev/null
+++ b/opal/memory/ptmalloc2/README
@@ -0,0 +1,192 @@
+ptmalloc2 - a multi-thread malloc implementation
+================================================
+
+Wolfram Gloger (wg@malloc.de)
+
+Nov 2004
+
+
+Introduction
+============
+
+This package is a modified version of Doug Lea's malloc-2.7.1pre
+implementation (available seperately from ftp://g.oswego.edu/pub/misc)
+that I adapted for multiple threads, while trying to avoid lock
+contention as much as possible.  Many thanks should go to Doug Lea
+(dl@cs.oswego.edu) for the great original malloc implementation.
+
+As part of the GNU C library, the source files are available under the
+GNU Library General Public License (see the comments in the files).
+But as part of this stand-alone package, the code is also available
+under the (probably less restrictive) conditions described in the file
+'COPYRIGHT'.  In any case, there is no warranty whatsoever for this
+package.
+
+The current distribution should be available from:
+
+http://www.malloc.de/malloc/ptmalloc2.tar.gz
+
+
+Compilation
+===========
+
+It should be possible to build ptmalloc2 on any UN*X-like system that
+implements the sbrk(), mmap(), munmap() and mprotect() calls.  If
+mmap() is not available, it is only possible to produce a
+non-threadsafe implementation.  Since there are now several source
+files, a library (libmalloc.a) is generated.  See the Makefile for
+examples of the compile-time options.
+
+Note that support for non-ANSI compilers is no longer a significant
+goal.
+
+Several example targets are provided in the Makefile:
+
+ o Posix threads (pthreads), compile with "make posix"
+
+ o Posix threads with explicit initialization, compile with
+   "make posix-explicit" (known to be required on HPUX)
+
+ o Posix threads without "tsd data hack" (see below), compile with
+   "make posix-with-tsd"
+
+ o Solaris threads, compile with "make solaris"
+
+ o SGI sproc() threads, compile with "make sproc"
+
+ o no threads, compile with "make nothreads"
+
+For Linux:
+
+ o make "linux-pthread" (almost the same as "make posix")
+
+Note that some compilers need special flags for multi-threaded code,
+e.g. with Solaris cc with Posix threads, one should use:
+
+% make posix SYS_FLAGS='-mt'
+
+Some additional targets, ending in `-libc', are also provided in the
+Makefile, to compare performance of the test programs to the case when
+linking with the standard malloc implementation in libc.
+
+A potential problem remains: If any of the system-specific functions
+for getting/setting thread-specific data or for locking a mutex call
+one of the malloc-related functions internally, the implementation
+cannot work at all due to infinite recursion.  One example seems to be
+Solaris 2.4.  I would like to hear if this problem occurs on other
+systems, and whether similar workarounds could be applied.
+
+For Posix threads, too, an optional hack like that has been integrated
+(activated when defining USE_TSD_DATA_HACK) which depends on
+`pthread_t' being convertible to an integral type (which is of course
+not generally guaranteed).  USE_TSD_DATA_HACK is now the default
+because I haven't yet found a non-glibc pthreads system where this
+hack is _not_ needed.
+
+*NEW* and _important_: In (currently) one place in the ptmalloc2
+source, a write memory barrier is needed, named
+atomic_write_barrier().  This macro needs to be defined at the end of
+malloc-machine.h.  For gcc, a fallback in the form of a full memory
+barrier is already defined, but you may need to add another definition
+if you don't use gcc.
+
+Usage
+=====
+
+Just link libmalloc.a into your application.
+
+Some wicked systems (e.g. HPUX apparently) won't let malloc call _any_
+thread-related functions before main().  On these systems,
+USE_STARTER=2 must be defined during compilation (see "make
+posix-explicit" above) and the global initialization function
+ptmalloc_init() must be called explitly, preferably at the start of
+main().
+
+Otherwise, when using ptmalloc2, no special precautions are necessary.
+
+Link order is important
+=======================
+
+On some systems, when overriding malloc and linking against shared
+libraries, the link order becomes very important.  E.g., when linking
+C++ programs on Solaris, don't rely on libC being included by default,
+but instead put `-lthread' behind `-lC' on the command line:
+
+  CC ... libmalloc.a -lC -lthread
+
+This is because there are global constructors in libC that need
+malloc/ptmalloc, which in turn needs to have the thread library to be
+already initialized.
+
+Debugging hooks
+===============
+
+All calls to malloc(), realloc(), free() and memalign() are routed
+through the global function pointers __malloc_hook, __realloc_hook,
+__free_hook and __memalign_hook if they are not NULL (see the malloc.h
+header file for declarations of these pointers).  Therefore the malloc
+implementation can be changed at runtime, if care is taken not to call
+free() or realloc() on pointers obtained with a different
+implementation than the one currently in effect.  (The easiest way to
+guarantee this is to set up the hooks before any malloc call, e.g.
+with a function pointed to by the global variable
+__malloc_initialize_hook).
+
+A useful application of the hooks is built-in into ptmalloc2: The
+implementation is usually very unforgiving with respect to misuse,
+such as free()ing a pointer twice or free()ing a pointer not obtained
+with malloc() (these will typically crash the application
+immediately).  To debug in such situations, you can set the
+environment variable `MALLOC_CHECK_' (note the trailing underscore).
+Performance will suffer somewhat, but you will get more controlled
+behaviour in the case of misuse.  If MALLOC_CHECK_=0, wrong free()s
+will be silently ignored, if MALLOC_CHECK_=1, diagnostics will be
+printed on stderr, and if MALLOC_CHECK_=2, abort() will be called on
+any error.
+
+You can now also tune other malloc parameters (normally adjused via
+mallopt() calls from the application) with environment variables:
+
+    MALLOC_TRIM_THRESHOLD_    for deciding to shrink the heap (in bytes)
+
+    MALLOC_TOP_PAD_           how much extra memory to allocate on
+                              each system call (in bytes)
+
+    MALLOC_MMAP_THRESHOLD_    min. size for chunks allocated via
+                              mmap() (in bytes)
+
+    MALLOC_MMAP_MAX_          max. number of mmapped regions to use
+
+Tests
+=====
+
+Two testing applications, t-test1 and t-test2, are included in this
+source distribution.  Both perform pseudo-random sequences of
+allocations/frees, and can be given numeric arguments (all arguments
+are optional):
+
+% t-test[12] <n-total> <n-parallel> <n-allocs> <size-max> <bins>
+
+    n-total = total number of threads executed (default 10)
+    n-parallel = number of threads running in parallel (2)
+    n-allocs = number of malloc()'s / free()'s per thread (10000)
+    size-max = max. size requested with malloc() in bytes (10000)
+    bins = number of bins to maintain
+
+The first test `t-test1' maintains a completely seperate pool of
+allocated bins for each thread, and should therefore show full
+parallelism.  On the other hand, `t-test2' creates only a single pool
+of bins, and each thread randomly allocates/frees any bin.  Some lock
+contention is to be expected in this case, as the threads frequently
+cross each others arena.
+
+Performance results from t-test1 should be quite repeatable, while the
+behaviour of t-test2 depends on scheduling variations.
+
+Conclusion
+==========
+
+I'm always interested in performance data and feedback, just send mail
+to ptmalloc@malloc.de.
+
+Good luck!
diff --git a/opal/memory/ptmalloc2/arena.c b/opal/memory/ptmalloc2/arena.c
new file mode 100644
index 0000000000..66aa52c27f
--- /dev/null
+++ b/opal/memory/ptmalloc2/arena.c
@@ -0,0 +1,800 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* $Id: arena.c,v 1.9 2004/11/05 14:42:23 wg Exp $ */
+
+/* Compile-time constants.  */
+
+#define HEAP_MIN_SIZE (32*1024)
+#ifndef HEAP_MAX_SIZE
+#define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */
+#endif
+
+/* HEAP_MIN_SIZE and HEAP_MAX_SIZE limit the size of mmap()ed heaps
+   that are dynamically created for multi-threaded programs.  The
+   maximum size must be a power of two, for fast determination of
+   which heap belongs to a chunk.  It should be much larger than the
+   mmap threshold, so that requests with a size just below that
+   threshold can be fulfilled without creating too many heaps.  */
+
+
+#ifndef THREAD_STATS
+#define THREAD_STATS 0
+#endif
+
+/* If THREAD_STATS is non-zero, some statistics on mutex locking are
+   computed.  */
+
+/***************************************************************************/
+
+#define top(ar_ptr) ((ar_ptr)->top)
+
+/* A heap is a single contiguous memory region holding (coalesceable)
+   malloc_chunks.  It is allocated with mmap() and always starts at an
+   address aligned to HEAP_MAX_SIZE.  Not used unless compiling with
+   USE_ARENAS. */
+
+typedef struct _heap_info {
+  mstate ar_ptr; /* Arena for this heap. */
+  struct _heap_info *prev; /* Previous heap. */
+  size_t size;   /* Current size in bytes. */
+  size_t pad;    /* Make sure the following data is properly aligned. */
+} heap_info;
+
+/* Thread specific data */
+
+static tsd_key_t arena_key;
+static mutex_t list_lock;
+
+#if THREAD_STATS
+static int stat_n_heaps;
+#define THREAD_STAT(x) x
+#else
+#define THREAD_STAT(x) do ; while(0)
+#endif
+
+/* Mapped memory in non-main arenas (reliable only for NO_THREADS). */
+static unsigned long arena_mem;
+
+/* Already initialized? */
+int __malloc_initialized = -1;
+
+/**************************************************************************/
+
+#if USE_ARENAS
+
+/* arena_get() acquires an arena and locks the corresponding mutex.
+   First, try the one last locked successfully by this thread.  (This
+   is the common case and handled with a macro for speed.)  Then, loop
+   once over the circularly linked list of arenas.  If no arena is
+   readily available, create a new one.  In this latter case, `size'
+   is just a hint as to how much memory will be required immediately
+   in the new arena. */
+
+#define arena_get(ptr, size) do { \
+  Void_t *vptr = NULL; \
+  ptr = (mstate)tsd_getspecific(arena_key, vptr); \
+  if(ptr && !mutex_trylock(&ptr->mutex)) { \
+    THREAD_STAT(++(ptr->stat_lock_direct)); \
+  } else \
+    ptr = arena_get2(ptr, (size)); \
+} while(0)
+
+/* find the heap and corresponding arena for a given ptr */
+
+#define heap_for_ptr(ptr) \
+ ((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1)))
+#define arena_for_chunk(ptr) \
+ (chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena)
+
+#else /* !USE_ARENAS */
+
+/* There is only one arena, main_arena. */
+
+#if THREAD_STATS
+#define arena_get(ar_ptr, sz) do { \
+  ar_ptr = &main_arena; \
+  if(!mutex_trylock(&ar_ptr->mutex)) \
+    ++(ar_ptr->stat_lock_direct); \
+  else { \
+    (void)mutex_lock(&ar_ptr->mutex); \
+    ++(ar_ptr->stat_lock_wait); \
+  } \
+} while(0)
+#else
+#define arena_get(ar_ptr, sz) do { \
+  ar_ptr = &main_arena; \
+  (void)mutex_lock(&ar_ptr->mutex); \
+} while(0)
+#endif
+#define arena_for_chunk(ptr) (&main_arena)
+
+#endif /* USE_ARENAS */
+
+/**************************************************************************/
+
+#ifndef NO_THREADS
+
+/* atfork support.  */
+
+static __malloc_ptr_t (*save_malloc_hook) __MALLOC_P ((size_t __size,
+						       __const __malloc_ptr_t));
+# if !defined _LIBC || !defined USE_TLS || (defined SHARED && !USE___THREAD)
+static __malloc_ptr_t (*save_memalign_hook) __MALLOC_P ((size_t __align,
+							 size_t __size,
+						       __const __malloc_ptr_t));
+# endif
+static void           (*save_free_hook) __MALLOC_P ((__malloc_ptr_t __ptr,
+						     __const __malloc_ptr_t));
+static Void_t*        save_arena;
+
+/* Magic value for the thread-specific arena pointer when
+   malloc_atfork() is in use.  */
+
+#define ATFORK_ARENA_PTR ((Void_t*)-1)
+
+/* The following hooks are used while the `atfork' handling mechanism
+   is active. */
+
+static Void_t*
+malloc_atfork(size_t sz, const Void_t *caller)
+{
+  Void_t *vptr = NULL;
+  Void_t *victim;
+
+  tsd_getspecific(arena_key, vptr);
+  if(vptr == ATFORK_ARENA_PTR) {
+    /* We are the only thread that may allocate at all.  */
+    if(save_malloc_hook != malloc_check) {
+      return _int_malloc(&main_arena, sz);
+    } else {
+      if(top_check()<0)
+        return 0;
+      victim = _int_malloc(&main_arena, sz+1);
+      return mem2mem_check(victim, sz);
+    }
+  } else {
+    /* Suspend the thread until the `atfork' handlers have completed.
+       By that time, the hooks will have been reset as well, so that
+       mALLOc() can be used again. */
+    (void)mutex_lock(&list_lock);
+    (void)mutex_unlock(&list_lock);
+    return public_mALLOc(sz);
+  }
+}
+
+static void
+free_atfork(Void_t* mem, const Void_t *caller)
+{
+  Void_t *vptr = NULL;
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk(mem);         /* do not bother to replicate free_check here */
+
+#if HAVE_MMAP
+  if (chunk_is_mmapped(p))                       /* release mmapped memory. */
+  {
+    munmap_chunk(p);
+    return;
+  }
+#endif
+
+  ar_ptr = arena_for_chunk(p);
+  tsd_getspecific(arena_key, vptr);
+  if(vptr != ATFORK_ARENA_PTR)
+    (void)mutex_lock(&ar_ptr->mutex);
+  _int_free(ar_ptr, mem);
+  if(vptr != ATFORK_ARENA_PTR)
+    (void)mutex_unlock(&ar_ptr->mutex);
+}
+
+/* The following two functions are registered via thread_atfork() to
+   make sure that the mutexes remain in a consistent state in the
+   fork()ed version of a thread.  Also adapt the malloc and free hooks
+   temporarily, because the `atfork' handler mechanism may use
+   malloc/free internally (e.g. in LinuxThreads). */
+
+static void
+ptmalloc_lock_all __MALLOC_P((void))
+{
+  mstate ar_ptr;
+
+  if(__malloc_initialized < 1)
+    return;
+  (void)mutex_lock(&list_lock);
+  for(ar_ptr = &main_arena;;) {
+    (void)mutex_lock(&ar_ptr->mutex);
+    ar_ptr = ar_ptr->next;
+    if(ar_ptr == &main_arena) break;
+  }
+  save_malloc_hook = __malloc_hook;
+  save_free_hook = __free_hook;
+  __malloc_hook = malloc_atfork;
+  __free_hook = free_atfork;
+  /* Only the current thread may perform malloc/free calls now. */
+  tsd_getspecific(arena_key, save_arena);
+  tsd_setspecific(arena_key, ATFORK_ARENA_PTR);
+}
+
+static void
+ptmalloc_unlock_all __MALLOC_P((void))
+{
+  mstate ar_ptr;
+
+  if(__malloc_initialized < 1)
+    return;
+  tsd_setspecific(arena_key, save_arena);
+  __malloc_hook = save_malloc_hook;
+  __free_hook = save_free_hook;
+  for(ar_ptr = &main_arena;;) {
+    (void)mutex_unlock(&ar_ptr->mutex);
+    ar_ptr = ar_ptr->next;
+    if(ar_ptr == &main_arena) break;
+  }
+  (void)mutex_unlock(&list_lock);
+}
+
+#ifdef __linux__
+
+/* In LinuxThreads, unlocking a mutex in the child process after a
+   fork() is currently unsafe, whereas re-initializing it is safe and
+   does not leak resources.  Therefore, a special atfork handler is
+   installed for the child. */
+
+static void
+ptmalloc_unlock_all2 __MALLOC_P((void))
+{
+  mstate ar_ptr;
+
+  if(__malloc_initialized < 1)
+    return;
+#if defined _LIBC || defined MALLOC_HOOKS
+  tsd_setspecific(arena_key, save_arena);
+  __malloc_hook = save_malloc_hook;
+  __free_hook = save_free_hook;
+#endif
+  for(ar_ptr = &main_arena;;) {
+    (void)mutex_init(&ar_ptr->mutex);
+    ar_ptr = ar_ptr->next;
+    if(ar_ptr == &main_arena) break;
+  }
+  (void)mutex_init(&list_lock);
+}
+
+#else
+
+#define ptmalloc_unlock_all2 ptmalloc_unlock_all
+
+#endif
+
+#endif /* !defined NO_THREADS */
+
+/* Initialization routine. */
+#ifdef _LIBC
+#include <string.h>
+extern char **_environ;
+
+static char *
+internal_function
+next_env_entry (char ***position)
+{
+  char **current = *position;
+  char *result = NULL;
+
+  while (*current != NULL)
+    {
+      if (__builtin_expect ((*current)[0] == 'M', 0)
+	  && (*current)[1] == 'A'
+	  && (*current)[2] == 'L'
+	  && (*current)[3] == 'L'
+	  && (*current)[4] == 'O'
+	  && (*current)[5] == 'C'
+	  && (*current)[6] == '_')
+	{
+	  result = &(*current)[7];
+
+	  /* Save current position for next visit.  */
+	  *position = ++current;
+
+	  break;
+	}
+
+      ++current;
+    }
+
+  return result;
+}
+#endif /* _LIBC */
+
+/* Set up basic state so that _int_malloc et al can work.  */
+static void
+ptmalloc_init_minimal __MALLOC_P((void))
+{
+#if DEFAULT_TOP_PAD != 0
+  mp_.top_pad        = DEFAULT_TOP_PAD;
+#endif
+  mp_.n_mmaps_max    = DEFAULT_MMAP_MAX;
+  mp_.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+  mp_.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+  mp_.pagesize       = malloc_getpagesize;
+}
+
+
+#if !(USE_STARTER & 2)
+static
+#endif
+void
+ptmalloc_init __MALLOC_P((void))
+{
+#if __STD_C
+  const char* s;
+#else
+  char* s;
+#endif
+  int secure = 0;
+
+  if(__malloc_initialized >= 0) return;
+  __malloc_initialized = 0;
+
+  if (mp_.pagesize == 0)
+    ptmalloc_init_minimal();
+
+#ifndef NO_THREADS
+# if USE_STARTER & 1
+  /* With some threads implementations, creating thread-specific data
+     or initializing a mutex may call malloc() itself.  Provide a
+     simple starter version (realloc() won't work). */
+  save_malloc_hook = __malloc_hook;
+  save_memalign_hook = __memalign_hook;
+  save_free_hook = __free_hook;
+  __malloc_hook = malloc_starter;
+  __memalign_hook = memalign_starter;
+  __free_hook = free_starter;
+#  ifdef _LIBC
+  /* Initialize the pthreads interface. */
+  if (__pthread_initialize != NULL)
+    __pthread_initialize();
+#  endif /* !defined _LIBC */
+# endif /* USE_STARTER & 1 */
+#endif /* !defined NO_THREADS */
+  mutex_init(&main_arena.mutex);
+  main_arena.next = &main_arena;
+
+  mutex_init(&list_lock);
+  tsd_key_create(&arena_key, NULL);
+  tsd_setspecific(arena_key, (Void_t *)&main_arena);
+  thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2);
+#ifndef NO_THREADS
+# if USE_STARTER & 1
+  __malloc_hook = save_malloc_hook;
+  __memalign_hook = save_memalign_hook;
+  __free_hook = save_free_hook;
+# endif
+# if USE_STARTER & 2
+  __malloc_hook = 0;
+  __memalign_hook = 0;
+  __free_hook = 0;
+# endif
+#endif
+#ifdef _LIBC
+  secure = __libc_enable_secure;
+  s = NULL;
+  if (__builtin_expect (_environ != NULL, 1))
+    {
+      char **runp = _environ;
+      char *envline;
+
+      while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL,
+			       0))
+	{
+	  size_t len = strcspn (envline, "=");
+
+	  if (envline[len] != '=')
+	    /* This is a "MALLOC_" variable at the end of the string
+	       without a '=' character.  Ignore it since otherwise we
+	       will access invalid memory below.  */
+	    continue;
+
+	  switch (len)
+	    {
+	    case 6:
+	      if (memcmp (envline, "CHECK_", 6) == 0)
+		s = &envline[7];
+	      break;
+	    case 8:
+	      if (! secure && memcmp (envline, "TOP_PAD_", 8) == 0)
+		mALLOPt(M_TOP_PAD, atoi(&envline[9]));
+	      break;
+	    case 9:
+	      if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0)
+		mALLOPt(M_MMAP_MAX, atoi(&envline[10]));
+	      break;
+	    case 15:
+	      if (! secure)
+		{
+		  if (memcmp (envline, "TRIM_THRESHOLD_", 15) == 0)
+		    mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16]));
+		  else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0)
+		    mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16]));
+		}
+	      break;
+	    default:
+	      break;
+	    }
+	}
+    }
+#else
+  if (! secure)
+    {
+      if((s = getenv("MALLOC_TRIM_THRESHOLD_")))
+	mALLOPt(M_TRIM_THRESHOLD, atoi(s));
+      if((s = getenv("MALLOC_TOP_PAD_")))
+	mALLOPt(M_TOP_PAD, atoi(s));
+      if((s = getenv("MALLOC_MMAP_THRESHOLD_")))
+	mALLOPt(M_MMAP_THRESHOLD, atoi(s));
+      if((s = getenv("MALLOC_MMAP_MAX_")))
+	mALLOPt(M_MMAP_MAX, atoi(s));
+    }
+  s = getenv("MALLOC_CHECK_");
+#endif
+  if(s) {
+    if(s[0]) mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0'));
+    __malloc_check_init();
+  }
+  if(__malloc_initialize_hook != NULL)
+    (*__malloc_initialize_hook)();
+  __malloc_initialized = 1;
+}
+
+/* There are platforms (e.g. Hurd) with a link-time hook mechanism. */
+#ifdef thread_atfork_static
+thread_atfork_static(ptmalloc_lock_all, ptmalloc_unlock_all, \
+                     ptmalloc_unlock_all2)
+#endif
+
+
+
+/* Managing heaps and arenas (for concurrent threads) */
+
+#if USE_ARENAS
+
+#if MALLOC_DEBUG > 1
+
+/* Print the complete contents of a single heap to stderr. */
+
+static void
+#if __STD_C
+dump_heap(heap_info *heap)
+#else
+dump_heap(heap) heap_info *heap;
+#endif
+{
+  char *ptr;
+  mchunkptr p;
+
+  fprintf(stderr, "Heap %p, size %10lx:\n", heap, (long)heap->size);
+  ptr = (heap->ar_ptr != (mstate)(heap+1)) ?
+    (char*)(heap + 1) : (char*)(heap + 1) + sizeof(struct malloc_state);
+  p = (mchunkptr)(((unsigned long)ptr + MALLOC_ALIGN_MASK) &
+                  ~MALLOC_ALIGN_MASK);
+  for(;;) {
+    fprintf(stderr, "chunk %p size %10lx", p, (long)p->size);
+    if(p == top(heap->ar_ptr)) {
+      fprintf(stderr, " (top)\n");
+      break;
+    } else if(p->size == (0|PREV_INUSE)) {
+      fprintf(stderr, " (fence)\n");
+      break;
+    }
+    fprintf(stderr, "\n");
+    p = next_chunk(p);
+  }
+}
+
+#endif /* MALLOC_DEBUG > 1 */
+
+/* Create a new heap.  size is automatically rounded up to a multiple
+   of the page size. */
+
+static heap_info *
+internal_function
+#if __STD_C
+new_heap(size_t size, size_t top_pad)
+#else
+new_heap(size, top_pad) size_t size, top_pad;
+#endif
+{
+  size_t page_mask = malloc_getpagesize - 1;
+  char *p1, *p2;
+  unsigned long ul;
+  heap_info *h;
+
+  if(size+top_pad < HEAP_MIN_SIZE)
+    size = HEAP_MIN_SIZE;
+  else if(size+top_pad <= HEAP_MAX_SIZE)
+    size += top_pad;
+  else if(size > HEAP_MAX_SIZE)
+    return 0;
+  else
+    size = HEAP_MAX_SIZE;
+  size = (size + page_mask) & ~page_mask;
+
+  /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
+     No swap space needs to be reserved for the following large
+     mapping (on Linux, this is the case for all non-writable mappings
+     anyway). */
+  p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE);
+  if(p1 != MAP_FAILED) {
+    p2 = (char *)(((unsigned long)p1 + (HEAP_MAX_SIZE-1)) & ~(HEAP_MAX_SIZE-1));
+    ul = p2 - p1;
+    munmap(p1, ul);
+    munmap(p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
+  } else {
+    /* Try to take the chance that an allocation of only HEAP_MAX_SIZE
+       is already aligned. */
+    p2 = (char *)MMAP(0, HEAP_MAX_SIZE, PROT_NONE, MAP_PRIVATE|MAP_NORESERVE);
+    if(p2 == MAP_FAILED)
+      return 0;
+    if((unsigned long)p2 & (HEAP_MAX_SIZE-1)) {
+      munmap(p2, HEAP_MAX_SIZE);
+      return 0;
+    }
+  }
+  if(mprotect(p2, size, PROT_READ|PROT_WRITE) != 0) {
+    munmap(p2, HEAP_MAX_SIZE);
+    return 0;
+  }
+  h = (heap_info *)p2;
+  h->size = size;
+  THREAD_STAT(stat_n_heaps++);
+  return h;
+}
+
+/* Grow or shrink a heap.  size is automatically rounded up to a
+   multiple of the page size if it is positive. */
+
+static int
+#if __STD_C
+grow_heap(heap_info *h, long diff)
+#else
+grow_heap(h, diff) heap_info *h; long diff;
+#endif
+{
+  size_t page_mask = malloc_getpagesize - 1;
+  long new_size;
+
+  if(diff >= 0) {
+    diff = (diff + page_mask) & ~page_mask;
+    new_size = (long)h->size + diff;
+    if(new_size > HEAP_MAX_SIZE)
+      return -1;
+    if(mprotect((char *)h + h->size, diff, PROT_READ|PROT_WRITE) != 0)
+      return -2;
+  } else {
+    new_size = (long)h->size + diff;
+    if(new_size < (long)sizeof(*h))
+      return -1;
+    /* Try to re-map the extra heap space freshly to save memory, and
+       make it inaccessible. */
+    if((char *)MMAP((char *)h + new_size, -diff, PROT_NONE,
+                    MAP_PRIVATE|MAP_FIXED) == (char *) MAP_FAILED)
+      return -2;
+    /*fprintf(stderr, "shrink %p %08lx\n", h, new_size);*/
+  }
+  h->size = new_size;
+  return 0;
+}
+
+/* Delete a heap. */
+
+#define delete_heap(heap) munmap((char*)(heap), HEAP_MAX_SIZE)
+
+static int
+internal_function
+#if __STD_C
+heap_trim(heap_info *heap, size_t pad)
+#else
+heap_trim(heap, pad) heap_info *heap; size_t pad;
+#endif
+{
+  mstate ar_ptr = heap->ar_ptr;
+  unsigned long pagesz = mp_.pagesize;
+  mchunkptr top_chunk = top(ar_ptr), p, bck, fwd;
+  heap_info *prev_heap;
+  long new_size, top_size, extra;
+
+  /* Can this heap go away completely? */
+  while(top_chunk == chunk_at_offset(heap, sizeof(*heap))) {
+    prev_heap = heap->prev;
+    p = chunk_at_offset(prev_heap, prev_heap->size - (MINSIZE-2*SIZE_SZ));
+    assert(p->size == (0|PREV_INUSE)); /* must be fencepost */
+    p = prev_chunk(p);
+    new_size = chunksize(p) + (MINSIZE-2*SIZE_SZ);
+    assert(new_size>0 && new_size<(long)(2*MINSIZE));
+    if(!prev_inuse(p))
+      new_size += p->prev_size;
+    assert(new_size>0 && new_size<HEAP_MAX_SIZE);
+    if(new_size + (HEAP_MAX_SIZE - prev_heap->size) < pad + MINSIZE + pagesz)
+      break;
+    ar_ptr->system_mem -= heap->size;
+    arena_mem -= heap->size;
+    delete_heap(heap);
+    heap = prev_heap;
+    if(!prev_inuse(p)) { /* consolidate backward */
+      p = prev_chunk(p);
+      unlink(p, bck, fwd);
+    }
+    assert(((unsigned long)((char*)p + new_size) & (pagesz-1)) == 0);
+    assert( ((char*)p + new_size) == ((char*)heap + heap->size) );
+    top(ar_ptr) = top_chunk = p;
+    set_head(top_chunk, new_size | PREV_INUSE);
+    /*check_chunk(ar_ptr, top_chunk);*/
+  }
+  top_size = chunksize(top_chunk);
+  extra = ((top_size - pad - MINSIZE + (pagesz-1))/pagesz - 1) * pagesz;
+  if(extra < (long)pagesz)
+    return 0;
+  /* Try to shrink. */
+  if(grow_heap(heap, -extra) != 0)
+    return 0;
+  ar_ptr->system_mem -= extra;
+  arena_mem -= extra;
+
+  /* Success. Adjust top accordingly. */
+  set_head(top_chunk, (top_size - extra) | PREV_INUSE);
+  /*check_chunk(ar_ptr, top_chunk);*/
+  return 1;
+}
+
+static mstate
+internal_function
+#if __STD_C
+arena_get2(mstate a_tsd, size_t size)
+#else
+arena_get2(a_tsd, size) mstate a_tsd; size_t size;
+#endif
+{
+  mstate a;
+  int err;
+
+  if(!a_tsd)
+    a = a_tsd = &main_arena;
+  else {
+    a = a_tsd->next;
+    if(!a) {
+      /* This can only happen while initializing the new arena. */
+      (void)mutex_lock(&main_arena.mutex);
+      THREAD_STAT(++(main_arena.stat_lock_wait));
+      return &main_arena;
+    }
+  }
+
+  /* Check the global, circularly linked list for available arenas. */
+ repeat:
+  do {
+    if(!mutex_trylock(&a->mutex)) {
+      THREAD_STAT(++(a->stat_lock_loop));
+      tsd_setspecific(arena_key, (Void_t *)a);
+      return a;
+    }
+    a = a->next;
+  } while(a != a_tsd);
+
+  /* If not even the list_lock can be obtained, try again.  This can
+     happen during `atfork', or for example on systems where thread
+     creation makes it temporarily impossible to obtain _any_
+     locks. */
+  if(mutex_trylock(&list_lock)) {
+    a = a_tsd;
+    goto repeat;
+  }
+  (void)mutex_unlock(&list_lock);
+
+  /* Nothing immediately available, so generate a new arena.  */
+  a = _int_new_arena(size);
+  if(!a)
+    return 0;
+
+  tsd_setspecific(arena_key, (Void_t *)a);
+  mutex_init(&a->mutex);
+  err = mutex_lock(&a->mutex); /* remember result */
+
+  /* Add the new arena to the global list.  */
+  (void)mutex_lock(&list_lock);
+  a->next = main_arena.next;
+  atomic_write_barrier ();
+  main_arena.next = a;
+  (void)mutex_unlock(&list_lock);
+
+  if(err) /* locking failed; keep arena for further attempts later */
+    return 0;
+
+  THREAD_STAT(++(a->stat_lock_loop));
+  return a;
+}
+
+/* Create a new arena with initial size "size".  */
+
+mstate
+_int_new_arena(size_t size)
+{
+  mstate a;
+  heap_info *h;
+  char *ptr;
+  unsigned long misalign;
+
+  h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT),
+	       mp_.top_pad);
+  if(!h) {
+    /* Maybe size is too large to fit in a single heap.  So, just try
+       to create a minimally-sized arena and let _int_malloc() attempt
+       to deal with the large request via mmap_chunk().  */
+    h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad);
+    if(!h)
+      return 0;
+  }
+  a = h->ar_ptr = (mstate)(h+1);
+  malloc_init_state(a);
+  /*a->next = NULL;*/
+  a->system_mem = a->max_system_mem = h->size;
+  arena_mem += h->size;
+#ifdef NO_THREADS
+  if((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) >
+     mp_.max_total_mem)
+    mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem;
+#endif
+
+  /* Set up the top chunk, with proper alignment. */
+  ptr = (char *)(a + 1);
+  misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK;
+  if (misalign > 0)
+    ptr += MALLOC_ALIGNMENT - misalign;
+  top(a) = (mchunkptr)ptr;
+  set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE);
+
+  return a;
+}
+
+/* Obtain the arena number n.  Needed in malloc_stats.  */
+
+mstate
+_int_get_arena (int n)
+{
+  mstate a = &main_arena;
+
+  while (n-- != 0) {
+    a = a->next;
+    if (a == &main_arena)
+      return 0;
+  }
+  return a;
+}
+
+#endif /* USE_ARENAS */
+
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/hooks.c b/opal/memory/ptmalloc2/hooks.c
new file mode 100644
index 0000000000..769fbbdbcb
--- /dev/null
+++ b/opal/memory/ptmalloc2/hooks.c
@@ -0,0 +1,640 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* $Id: hooks.c,v 1.12 2004/11/05 14:42:32 wg Exp $ */
+
+#ifndef DEFAULT_CHECK_ACTION
+#define DEFAULT_CHECK_ACTION 1
+#endif
+
+/* What to do if the standard debugging hooks are in place and a
+   corrupt pointer is detected: do nothing (0), print an error message
+   (1), or call abort() (2). */
+
+/* Hooks for debugging versions.  The initial hooks just call the
+   initialization routine, then do the normal work. */
+
+#if !(USE_STARTER & 2)
+
+static Void_t*
+#if __STD_C
+malloc_hook_ini(size_t sz, const __malloc_ptr_t caller)
+#else
+malloc_hook_ini(sz, caller)
+     size_t sz; const __malloc_ptr_t caller;
+#endif
+{
+  __malloc_hook = NULL;
+  ptmalloc_init();
+  return public_mALLOc(sz);
+}
+
+static Void_t*
+#if __STD_C
+realloc_hook_ini(Void_t* ptr, size_t sz, const __malloc_ptr_t caller)
+#else
+realloc_hook_ini(ptr, sz, caller)
+     Void_t* ptr; size_t sz; const __malloc_ptr_t caller;
+#endif
+{
+  __malloc_hook = NULL;
+  __realloc_hook = NULL;
+  ptmalloc_init();
+  return public_rEALLOc(ptr, sz);
+}
+
+static Void_t*
+#if __STD_C
+memalign_hook_ini(size_t alignment, size_t sz, const __malloc_ptr_t caller)
+#else
+memalign_hook_ini(alignment, sz, caller)
+     size_t alignment; size_t sz; const __malloc_ptr_t caller;
+#endif
+{
+  __memalign_hook = NULL;
+  ptmalloc_init();
+  return public_mEMALIGn(alignment, sz);
+}
+
+#endif /* !(USE_STARTER & 2) */
+
+static int check_action = DEFAULT_CHECK_ACTION;
+
+/* Whether we are using malloc checking.  */
+static int using_malloc_checking;
+
+/* A flag that is set by malloc_set_state, to signal that malloc checking
+   must not be enabled on the request from the user (via the MALLOC_CHECK_
+   environment variable).  It is reset by __malloc_check_init to tell
+   malloc_set_state that the user has requested malloc checking.
+
+   The purpose of this flag is to make sure that malloc checking is not
+   enabled when the heap to be restored was constructed without malloc
+   checking, and thus does not contain the required magic bytes.
+   Otherwise the heap would be corrupted by calls to free and realloc.  If
+   it turns out that the heap was created with malloc checking and the
+   user has requested it malloc_set_state just calls __malloc_check_init
+   again to enable it.  On the other hand, reusing such a heap without
+   further malloc checking is safe.  */
+static int disallow_malloc_check;
+
+/* Activate a standard set of debugging hooks. */
+void
+__malloc_check_init()
+{
+  if (disallow_malloc_check) {
+    disallow_malloc_check = 0;
+    return;
+  }
+  using_malloc_checking = 1;
+  __malloc_hook = malloc_check;
+  __free_hook = free_check;
+  __realloc_hook = realloc_check;
+  __memalign_hook = memalign_check;
+  if(check_action & 1)
+    fprintf(stderr, "malloc: using debugging hooks\n");
+}
+
+/* A simple, standard set of debugging hooks.  Overhead is `only' one
+   byte per chunk; still this will catch most cases of double frees or
+   overruns.  The goal here is to avoid obscure crashes due to invalid
+   usage, unlike in the MALLOC_DEBUG code. */
+
+#define MAGICBYTE(p) ( ( ((size_t)p >> 3) ^ ((size_t)p >> 11)) & 0xFF )
+
+/* Instrument a chunk with overrun detector byte(s) and convert it
+   into a user pointer with requested size sz. */
+
+static Void_t*
+internal_function
+#if __STD_C
+mem2mem_check(Void_t *ptr, size_t sz)
+#else
+mem2mem_check(ptr, sz) Void_t *ptr; size_t sz;
+#endif
+{
+  mchunkptr p;
+  unsigned char* m_ptr = (unsigned char*)BOUNDED_N(ptr, sz);
+  size_t i;
+
+  if (!ptr)
+    return ptr;
+  p = mem2chunk(ptr);
+  for(i = chunksize(p) - (chunk_is_mmapped(p) ? 2*SIZE_SZ+1 : SIZE_SZ+1);
+      i > sz;
+      i -= 0xFF) {
+    if(i-sz < 0x100) {
+      m_ptr[i] = (unsigned char)(i-sz);
+      break;
+    }
+    m_ptr[i] = 0xFF;
+  }
+  m_ptr[sz] = MAGICBYTE(p);
+  return (Void_t*)m_ptr;
+}
+
+/* Convert a pointer to be free()d or realloc()ed to a valid chunk
+   pointer.  If the provided pointer is not valid, return NULL. */
+
+static mchunkptr
+internal_function
+#if __STD_C
+mem2chunk_check(Void_t* mem)
+#else
+mem2chunk_check(mem) Void_t* mem;
+#endif
+{
+  mchunkptr p;
+  INTERNAL_SIZE_T sz, c;
+  unsigned char magic;
+
+  if(!aligned_OK(mem)) return NULL;
+  p = mem2chunk(mem);
+  if (!chunk_is_mmapped(p)) {
+    /* Must be a chunk in conventional heap memory. */
+    int contig = contiguous(&main_arena);
+    sz = chunksize(p);
+    if((contig &&
+	((char*)p<mp_.sbrk_base ||
+	 ((char*)p + sz)>=(mp_.sbrk_base+main_arena.system_mem) )) ||
+       sz<MINSIZE || sz&MALLOC_ALIGN_MASK || !inuse(p) ||
+       ( !prev_inuse(p) && (p->prev_size&MALLOC_ALIGN_MASK ||
+                            (contig && (char*)prev_chunk(p)<mp_.sbrk_base) ||
+                            next_chunk(prev_chunk(p))!=p) ))
+      return NULL;
+    magic = MAGICBYTE(p);
+    for(sz += SIZE_SZ-1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) {
+      if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL;
+    }
+    ((unsigned char*)p)[sz] ^= 0xFF;
+  } else {
+    unsigned long offset, page_mask = malloc_getpagesize-1;
+
+    /* mmap()ed chunks have MALLOC_ALIGNMENT or higher power-of-two
+       alignment relative to the beginning of a page.  Check this
+       first. */
+    offset = (unsigned long)mem & page_mask;
+    if((offset!=MALLOC_ALIGNMENT && offset!=0 && offset!=0x10 &&
+        offset!=0x20 && offset!=0x40 && offset!=0x80 && offset!=0x100 &&
+        offset!=0x200 && offset!=0x400 && offset!=0x800 && offset!=0x1000 &&
+        offset<0x2000) ||
+       !chunk_is_mmapped(p) || (p->size & PREV_INUSE) ||
+       ( (((unsigned long)p - p->prev_size) & page_mask) != 0 ) ||
+       ( (sz = chunksize(p)), ((p->prev_size + sz) & page_mask) != 0 ) )
+      return NULL;
+    magic = MAGICBYTE(p);
+    for(sz -= 1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) {
+      if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL;
+    }
+    ((unsigned char*)p)[sz] ^= 0xFF;
+  }
+  return p;
+}
+
+/* Check for corruption of the top chunk, and try to recover if
+   necessary. */
+
+static int
+internal_function
+#if __STD_C
+top_check(void)
+#else
+top_check()
+#endif
+{
+  mchunkptr t = top(&main_arena);
+  char* brk, * new_brk;
+  INTERNAL_SIZE_T front_misalign, sbrk_size;
+  unsigned long pagesz = malloc_getpagesize;
+
+  if (t == initial_top(&main_arena) ||
+      (!chunk_is_mmapped(t) &&
+       chunksize(t)>=MINSIZE &&
+       prev_inuse(t) &&
+       (!contiguous(&main_arena) ||
+	(char*)t + chunksize(t) == mp_.sbrk_base + main_arena.system_mem)))
+    return 0;
+
+  if(check_action & 1)
+    fprintf(stderr, "malloc: top chunk is corrupt\n");
+  if(check_action & 2)
+    abort();
+
+  /* Try to set up a new top chunk. */
+  brk = MORECORE(0);
+  front_misalign = (unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+  if (front_misalign > 0)
+    front_misalign = MALLOC_ALIGNMENT - front_misalign;
+  sbrk_size = front_misalign + mp_.top_pad + MINSIZE;
+  sbrk_size += pagesz - ((unsigned long)(brk + sbrk_size) & (pagesz - 1));
+  new_brk = (char*)(MORECORE (sbrk_size));
+  if (new_brk == (char*)(MORECORE_FAILURE)) return -1;
+  /* Call the `morecore' hook if necessary.  */
+  if (__after_morecore_hook)
+    (*__after_morecore_hook) ();
+  main_arena.system_mem = (new_brk - mp_.sbrk_base) + sbrk_size;
+
+  top(&main_arena) = (mchunkptr)(brk + front_misalign);
+  set_head(top(&main_arena), (sbrk_size - front_misalign) | PREV_INUSE);
+
+  return 0;
+}
+
+static Void_t*
+#if __STD_C
+malloc_check(size_t sz, const Void_t *caller)
+#else
+malloc_check(sz, caller) size_t sz; const Void_t *caller;
+#endif
+{
+  Void_t *victim;
+
+  (void)mutex_lock(&main_arena.mutex);
+  victim = (top_check() >= 0) ? _int_malloc(&main_arena, sz+1) : NULL;
+  (void)mutex_unlock(&main_arena.mutex);
+  return mem2mem_check(victim, sz);
+}
+
+static void
+#if __STD_C
+free_check(Void_t* mem, const Void_t *caller)
+#else
+free_check(mem, caller) Void_t* mem; const Void_t *caller;
+#endif
+{
+  mchunkptr p;
+
+  if(!mem) return;
+  (void)mutex_lock(&main_arena.mutex);
+  p = mem2chunk_check(mem);
+  if(!p) {
+    (void)mutex_unlock(&main_arena.mutex);
+    if(check_action & 1)
+      fprintf(stderr, "free(): invalid pointer %p!\n", mem);
+    if(check_action & 2)
+      abort();
+    return;
+  }
+#if HAVE_MMAP
+  if (chunk_is_mmapped(p)) {
+    (void)mutex_unlock(&main_arena.mutex);
+    munmap_chunk(p);
+    return;
+  }
+#endif
+#if 0 /* Erase freed memory. */
+  memset(mem, 0, chunksize(p) - (SIZE_SZ+1));
+#endif
+  _int_free(&main_arena, mem);
+  (void)mutex_unlock(&main_arena.mutex);
+}
+
+static Void_t*
+#if __STD_C
+realloc_check(Void_t* oldmem, size_t bytes, const Void_t *caller)
+#else
+realloc_check(oldmem, bytes, caller)
+     Void_t* oldmem; size_t bytes; const Void_t *caller;
+#endif
+{
+  mchunkptr oldp;
+  INTERNAL_SIZE_T nb, oldsize;
+  Void_t* newmem = 0;
+
+  if (oldmem == 0) return malloc_check(bytes, NULL);
+  (void)mutex_lock(&main_arena.mutex);
+  oldp = mem2chunk_check(oldmem);
+  (void)mutex_unlock(&main_arena.mutex);
+  if(!oldp) {
+    if(check_action & 1)
+      fprintf(stderr, "realloc(): invalid pointer %p!\n", oldmem);
+    if(check_action & 2)
+      abort();
+    return malloc_check(bytes, NULL);
+  }
+  oldsize = chunksize(oldp);
+
+  checked_request2size(bytes+1, nb);
+  (void)mutex_lock(&main_arena.mutex);
+
+#if HAVE_MMAP
+  if (chunk_is_mmapped(oldp)) {
+#if HAVE_MREMAP
+    mchunkptr newp = mremap_chunk(oldp, nb);
+    if(newp)
+      newmem = chunk2mem(newp);
+    else
+#endif
+    {
+      /* Note the extra SIZE_SZ overhead. */
+      if(oldsize - SIZE_SZ >= nb)
+	newmem = oldmem; /* do nothing */
+      else {
+        /* Must alloc, copy, free. */
+        if (top_check() >= 0)
+	  newmem = _int_malloc(&main_arena, bytes+1);
+        if (newmem) {
+          MALLOC_COPY(BOUNDED_N(newmem, bytes+1), oldmem, oldsize - 2*SIZE_SZ);
+          munmap_chunk(oldp);
+        }
+      }
+    }
+  } else {
+#endif /* HAVE_MMAP */
+    if (top_check() >= 0)
+      newmem = _int_realloc(&main_arena, oldmem, bytes+1);
+#if 0 /* Erase freed memory. */
+    if(newmem)
+      newp = mem2chunk(newmem);
+    nb = chunksize(newp);
+    if(oldp<newp || oldp>=chunk_at_offset(newp, nb)) {
+      memset((char*)oldmem + 2*sizeof(mbinptr), 0,
+             oldsize - (2*sizeof(mbinptr)+2*SIZE_SZ+1));
+    } else if(nb > oldsize+SIZE_SZ) {
+      memset((char*)BOUNDED_N(chunk2mem(newp), bytes) + oldsize,
+	     0, nb - (oldsize+SIZE_SZ));
+    }
+#endif
+#if HAVE_MMAP
+  }
+#endif
+  (void)mutex_unlock(&main_arena.mutex);
+
+  return mem2mem_check(newmem, bytes);
+}
+
+static Void_t*
+#if __STD_C
+memalign_check(size_t alignment, size_t bytes, const Void_t *caller)
+#else
+memalign_check(alignment, bytes, caller)
+     size_t alignment; size_t bytes; const Void_t *caller;
+#endif
+{
+  INTERNAL_SIZE_T nb;
+  Void_t* mem;
+
+  if (alignment <= MALLOC_ALIGNMENT) return malloc_check(bytes, NULL);
+  if (alignment <  MINSIZE) alignment = MINSIZE;
+
+  checked_request2size(bytes+1, nb);
+  (void)mutex_lock(&main_arena.mutex);
+  mem = (top_check() >= 0) ? _int_memalign(&main_arena, alignment, bytes+1) :
+    NULL;
+  (void)mutex_unlock(&main_arena.mutex);
+  return mem2mem_check(mem, bytes);
+}
+
+#if !defined NO_THREADS && USE_STARTER
+
+/* The following hooks are used when the global initialization in
+   ptmalloc_init() hasn't completed yet. */
+
+static Void_t*
+#if __STD_C
+malloc_starter(size_t sz, const Void_t *caller)
+#else
+malloc_starter(sz, caller) size_t sz; const Void_t *caller;
+#endif
+{
+  Void_t* victim;
+
+  ptmalloc_init_minimal();
+  victim = _int_malloc(&main_arena, sz);
+
+  return victim ? BOUNDED_N(victim, sz) : 0;
+}
+
+static Void_t*
+#if __STD_C
+memalign_starter(size_t align, size_t sz, const Void_t *caller)
+#else
+memalign_starter(align, sz, caller) size_t align, sz; const Void_t *caller;
+#endif
+{
+  Void_t* victim;
+
+  ptmalloc_init_minimal();
+  victim = _int_memalign(&main_arena, align, sz);
+
+  return victim ? BOUNDED_N(victim, sz) : 0;
+}
+
+static void
+#if __STD_C
+free_starter(Void_t* mem, const Void_t *caller)
+#else
+free_starter(mem, caller) Void_t* mem; const Void_t *caller;
+#endif
+{
+  mchunkptr p;
+
+  if(!mem) return;
+  p = mem2chunk(mem);
+#if HAVE_MMAP
+  if (chunk_is_mmapped(p)) {
+    munmap_chunk(p);
+    return;
+  }
+#endif
+  _int_free(&main_arena, mem);
+}
+
+#endif /* !defined NO_THREADS && USE_STARTER */
+
+
+/* Get/set state: malloc_get_state() records the current state of all
+   malloc variables (_except_ for the actual heap contents and `hook'
+   function pointers) in a system dependent, opaque data structure.
+   This data structure is dynamically allocated and can be free()d
+   after use.  malloc_set_state() restores the state of all malloc
+   variables to the previously obtained state.  This is especially
+   useful when using this malloc as part of a shared library, and when
+   the heap contents are saved/restored via some other method.  The
+   primary example for this is GNU Emacs with its `dumping' procedure.
+   `Hook' function pointers are never saved or restored by these
+   functions, with two exceptions: If malloc checking was in use when
+   malloc_get_state() was called, then malloc_set_state() calls
+   __malloc_check_init() if possible; if malloc checking was not in
+   use in the recorded state but the user requested malloc checking,
+   then the hooks are reset to 0.  */
+
+#define MALLOC_STATE_MAGIC   0x444c4541l
+#define MALLOC_STATE_VERSION (0*0x100l + 2l) /* major*0x100 + minor */
+
+struct malloc_save_state {
+  long          magic;
+  long          version;
+  mbinptr       av[NBINS * 2 + 2];
+  char*         sbrk_base;
+  int           sbrked_mem_bytes;
+  unsigned long trim_threshold;
+  unsigned long top_pad;
+  unsigned int  n_mmaps_max;
+  unsigned long mmap_threshold;
+  int           check_action;
+  unsigned long max_sbrked_mem;
+  unsigned long max_total_mem;
+  unsigned int  n_mmaps;
+  unsigned int  max_n_mmaps;
+  unsigned long mmapped_mem;
+  unsigned long max_mmapped_mem;
+  int           using_malloc_checking;
+};
+
+Void_t*
+public_gET_STATe(void)
+{
+  struct malloc_save_state* ms;
+  int i;
+  mbinptr b;
+
+  ms = (struct malloc_save_state*)public_mALLOc(sizeof(*ms));
+  if (!ms)
+    return 0;
+  (void)mutex_lock(&main_arena.mutex);
+  malloc_consolidate(&main_arena);
+  ms->magic = MALLOC_STATE_MAGIC;
+  ms->version = MALLOC_STATE_VERSION;
+  ms->av[0] = 0;
+  ms->av[1] = 0; /* used to be binblocks, now no longer used */
+  ms->av[2] = top(&main_arena);
+  ms->av[3] = 0; /* used to be undefined */
+  for(i=1; i<NBINS; i++) {
+    b = bin_at(&main_arena, i);
+    if(first(b) == b)
+      ms->av[2*i+2] = ms->av[2*i+3] = 0; /* empty bin */
+    else {
+      ms->av[2*i+2] = first(b);
+      ms->av[2*i+3] = last(b);
+    }
+  }
+  ms->sbrk_base = mp_.sbrk_base;
+  ms->sbrked_mem_bytes = main_arena.system_mem;
+  ms->trim_threshold = mp_.trim_threshold;
+  ms->top_pad = mp_.top_pad;
+  ms->n_mmaps_max = mp_.n_mmaps_max;
+  ms->mmap_threshold = mp_.mmap_threshold;
+  ms->check_action = check_action;
+  ms->max_sbrked_mem = main_arena.max_system_mem;
+#ifdef NO_THREADS
+  ms->max_total_mem = mp_.max_total_mem;
+#else
+  ms->max_total_mem = 0;
+#endif
+  ms->n_mmaps = mp_.n_mmaps;
+  ms->max_n_mmaps = mp_.max_n_mmaps;
+  ms->mmapped_mem = mp_.mmapped_mem;
+  ms->max_mmapped_mem = mp_.max_mmapped_mem;
+  ms->using_malloc_checking = using_malloc_checking;
+  (void)mutex_unlock(&main_arena.mutex);
+  return (Void_t*)ms;
+}
+
+int
+public_sET_STATe(Void_t* msptr)
+{
+  struct malloc_save_state* ms = (struct malloc_save_state*)msptr;
+  int i;
+  mbinptr b;
+
+  disallow_malloc_check = 1;
+  ptmalloc_init();
+  if(ms->magic != MALLOC_STATE_MAGIC) return -1;
+  /* Must fail if the major version is too high. */
+  if((ms->version & ~0xffl) > (MALLOC_STATE_VERSION & ~0xffl)) return -2;
+  (void)mutex_lock(&main_arena.mutex);
+  /* There are no fastchunks.  */
+  clear_fastchunks(&main_arena);
+  set_max_fast(&main_arena, DEFAULT_MXFAST);
+  for (i=0; i<NFASTBINS; ++i)
+    main_arena.fastbins[i] = 0;
+  for (i=0; i<BINMAPSIZE; ++i)
+    main_arena.binmap[i] = 0;
+  top(&main_arena) = ms->av[2];
+  main_arena.last_remainder = 0;
+  for(i=1; i<NBINS; i++) {
+    b = bin_at(&main_arena, i);
+    if(ms->av[2*i+2] == 0) {
+      assert(ms->av[2*i+3] == 0);
+      first(b) = last(b) = b;
+    } else {
+      if(i<NSMALLBINS || (largebin_index(chunksize(ms->av[2*i+2]))==i &&
+			  largebin_index(chunksize(ms->av[2*i+3]))==i)) {
+	first(b) = ms->av[2*i+2];
+	last(b) = ms->av[2*i+3];
+	/* Make sure the links to the bins within the heap are correct.  */
+	first(b)->bk = b;
+	last(b)->fd = b;
+	/* Set bit in binblocks.  */
+	mark_bin(&main_arena, i);
+      } else {
+	/* Oops, index computation from chunksize must have changed.
+           Link the whole list into unsorted_chunks.  */
+	first(b) = last(b) = b;
+	b = unsorted_chunks(&main_arena);
+	ms->av[2*i+2]->bk = b;
+	ms->av[2*i+3]->fd = b->fd;
+	b->fd->bk = ms->av[2*i+3];
+	b->fd = ms->av[2*i+2];
+      }
+    }
+  }
+  mp_.sbrk_base = ms->sbrk_base;
+  main_arena.system_mem = ms->sbrked_mem_bytes;
+  mp_.trim_threshold = ms->trim_threshold;
+  mp_.top_pad = ms->top_pad;
+  mp_.n_mmaps_max = ms->n_mmaps_max;
+  mp_.mmap_threshold = ms->mmap_threshold;
+  check_action = ms->check_action;
+  main_arena.max_system_mem = ms->max_sbrked_mem;
+#ifdef NO_THREADS
+  mp_.max_total_mem = ms->max_total_mem;
+#endif
+  mp_.n_mmaps = ms->n_mmaps;
+  mp_.max_n_mmaps = ms->max_n_mmaps;
+  mp_.mmapped_mem = ms->mmapped_mem;
+  mp_.max_mmapped_mem = ms->max_mmapped_mem;
+  /* add version-dependent code here */
+  if (ms->version >= 1) {
+    /* Check whether it is safe to enable malloc checking, or whether
+       it is necessary to disable it.  */
+    if (ms->using_malloc_checking && !using_malloc_checking &&
+        !disallow_malloc_check)
+      __malloc_check_init ();
+    else if (!ms->using_malloc_checking && using_malloc_checking) {
+      __malloc_hook = 0;
+      __free_hook = 0;
+      __realloc_hook = 0;
+      __memalign_hook = 0;
+      using_malloc_checking = 0;
+    }
+  }
+  check_malloc_state(&main_arena);
+
+  (void)mutex_unlock(&main_arena.mutex);
+  return 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/lran2.h b/opal/memory/ptmalloc2/lran2.h
new file mode 100644
index 0000000000..cea9920282
--- /dev/null
+++ b/opal/memory/ptmalloc2/lran2.h
@@ -0,0 +1,51 @@
+/* lran2.h
+ * by Wolfram Gloger 1996.
+ *
+ * A small, portable pseudo-random number generator.
+ */
+
+#ifndef _LRAN2_H
+#define _LRAN2_H
+
+#define LRAN2_MAX 714025l /* constants for portable */
+#define IA	  1366l	  /* random number generator */
+#define IC	  150889l /* (see e.g. `Numerical Recipes') */
+
+struct lran2_st {
+    long x, y, v[97];
+};
+
+static void
+lran2_init(struct lran2_st* d, long seed)
+{
+    long x;
+    int j;
+
+    x = (IC - seed) % LRAN2_MAX;
+    if(x < 0) x = -x;
+    for(j=0; j<97; j++) {
+	x = (IA*x + IC) % LRAN2_MAX;
+	d->v[j] = x;
+    }
+    d->x = (IA*x + IC) % LRAN2_MAX;
+    d->y = d->x;
+}
+
+#ifdef __GNUC__
+__inline__
+#endif
+static long
+lran2(struct lran2_st* d)
+{
+    int j = (d->y % 97);
+
+    d->y = d->v[j];
+    d->x = (IA*d->x + IC) % LRAN2_MAX;
+    d->v[j] = d->x;
+    return d->y;
+}
+
+#undef IA
+#undef IC
+
+#endif
diff --git a/opal/memory/ptmalloc2/malloc-stats.c b/opal/memory/ptmalloc2/malloc-stats.c
new file mode 100644
index 0000000000..617d8e8e6b
--- /dev/null
+++ b/opal/memory/ptmalloc2/malloc-stats.c
@@ -0,0 +1,161 @@
+/* Malloc implementation for multiple threads; statistics printing.
+   Copyright (C) 2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>, 2004.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* $Id: $ */
+
+#include <stdio.h>    /* needed for malloc_stats */
+
+#include <malloc-machine.h>
+
+#include "malloc.h"
+
+/*
+  Define HAVE_MMAP as true to optionally make malloc() use mmap() to
+  allocate very large blocks.  These will be returned to the
+  operating system immediately after a free(). Also, if mmap
+  is available, it is used as a backup strategy in cases where
+  MORECORE fails to provide space from system.
+
+  This malloc is best tuned to work with mmap for large requests.
+  If you do not have mmap, operations involving very large chunks (1MB
+  or so) may be slower than you'd like.
+*/
+
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif
+
+#ifdef USE_DL_PREFIX
+
+#define public_mSTATs    dlmalloc_stats
+
+#else /* USE_DL_PREFIX */
+#ifdef _LIBC
+
+#define public_mSTATs    __malloc_stats
+
+#else /* !_LIBC */
+
+#define public_mSTATs    malloc_stats
+
+#endif /* _LIBC */
+#endif /* USE_DL_PREFIX */
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+void     public_mSTATs __MALLOC_P((void));
+
+/*
+  ------------------------------ malloc_stats ------------------------------
+*/
+
+void public_mSTATs()
+{
+  int i;
+  mstate ar_ptr;
+  struct malloc_global_info mgi;
+  struct malloc_arena_info mai;
+  unsigned long in_use_b, system_b, avail_b;
+#if THREAD_STATS
+  long stat_lock_direct = 0, stat_lock_loop = 0, stat_lock_wait = 0;
+#endif
+
+#if 0
+  if(__malloc_initialized < 0)
+    ptmalloc_init ();
+#endif
+  _int_get_global_info(&mgi);
+  system_b = in_use_b = mgi.mmapped_mem;
+#ifdef _LIBC
+  _IO_flockfile (stderr);
+  int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+  ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+#endif
+  for (i=0; (ar_ptr = _int_get_arena(i)); i++) {
+    _int_get_arena_info(ar_ptr, &mai);
+    avail_b = mai.fastavail + mai.binavail + mai.top_size;
+    fprintf(stderr, "Arena %d:\n", i);
+    fprintf(stderr, "system bytes     = %10lu\n",
+	    (unsigned long)mai.system_mem);
+    fprintf(stderr, "in use bytes     = %10lu\n",
+	    (unsigned long)(mai.system_mem - avail_b));
+#if MALLOC_DEBUG > 1
+    if (i > 0)
+      dump_heap(heap_for_ptr(top(ar_ptr)));
+#endif
+    system_b += mai.system_mem;
+    in_use_b += mai.system_mem - avail_b;
+#if THREAD_STATS
+    stat_lock_direct += mai.stat_lock_direct;
+    stat_lock_loop += mai.stat_lock_loop;
+    stat_lock_wait += mai.stat_lock_wait;
+#endif
+  }
+#if HAVE_MMAP
+  fprintf(stderr, "Total (incl. mmap):\n");
+#else
+  fprintf(stderr, "Total:\n");
+#endif
+  fprintf(stderr, "system bytes     = %10lu\n", system_b);
+  fprintf(stderr, "in use bytes     = %10lu\n", in_use_b);
+#ifdef NO_THREADS
+  fprintf(stderr, "max system bytes = %10lu\n",
+	  (unsigned long)mgi.max_total_mem);
+#endif
+#if HAVE_MMAP
+  fprintf(stderr, "max mmap regions = %10u\n", (unsigned int)mgi.max_n_mmaps);
+  fprintf(stderr, "max mmap bytes   = %10lu\n",
+	  (unsigned long)mgi.max_mmapped_mem);
+#endif
+#if THREAD_STATS
+  fprintf(stderr, "heaps created    = %10d\n",  mgi.stat_n_heaps);
+  fprintf(stderr, "locked directly  = %10ld\n", stat_lock_direct);
+  fprintf(stderr, "locked in loop   = %10ld\n", stat_lock_loop);
+  fprintf(stderr, "locked waiting   = %10ld\n", stat_lock_wait);
+  fprintf(stderr, "locked total     = %10ld\n",
+          stat_lock_direct + stat_lock_loop + stat_lock_wait);
+#endif
+#ifdef _LIBC
+  ((_IO_FILE *) stderr)->_flags2 |= old_flags2;
+  _IO_funlockfile (stderr);
+#endif
+}
+
+#ifdef _LIBC
+weak_alias (__malloc_stats, malloc_stats)
+#endif
diff --git a/opal/memory/ptmalloc2/malloc.c b/opal/memory/ptmalloc2/malloc.c
new file mode 100644
index 0000000000..ce91eecf75
--- /dev/null
+++ b/opal/memory/ptmalloc2/malloc.c
@@ -0,0 +1,5439 @@
+/* Malloc implementation for multiple threads without lock contention.
+   Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>
+   and Doug Lea <dl@cs.oswego.edu>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/*
+  This is a version (aka ptmalloc2) of malloc/free/realloc written by
+  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
+
+* Version ptmalloc2-20011215
+  $Id: malloc.c,v 1.20 2004/11/04 17:31:04 wg Exp $
+  based on:
+  VERSION 2.7.1pre1 Sat May 12 07:41:21 2001  Doug Lea  (dl at gee)
+
+   Note: There may be an updated version of this malloc obtainable at
+           http://www.malloc.de/malloc/ptmalloc2.tar.gz
+         Check before installing!
+
+* Quickstart
+
+  In order to compile this implementation, a Makefile is provided with
+  the ptmalloc2 distribution, which has pre-defined targets for some
+  popular systems (e.g. "make posix" for Posix threads).  All that is
+  typically required with regard to compiler flags is the selection of
+  an appropriate malloc-machine.h include file via -I directives.
+  Many/most systems will additionally require USE_TSD_DATA_HACK to be
+  defined, so this is the default for "make posix".
+
+* Why use this malloc?
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and tunable.
+  Consistent balance across these factors results in a good general-purpose
+  allocator for malloc-intensive programs.
+
+  The main properties of the algorithms are:
+  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
+    with ties normally decided via FIFO (i.e. least recently used).
+  * For small (<= 64 bytes by default) requests, it is a caching
+    allocator, that maintains pools of quickly recycled chunks.
+  * In between, and for combinations of large and small requests, it does
+    the best it can trying to meet both goals at once.
+  * For very large requests (>= 128KB by default), it relies on system
+    memory mapping facilities, if supported.
+
+  For a longer but slightly out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+  You may already by default be using a C library containing a malloc
+  that is  based on some version of this malloc (for example in
+  linux). You might still want to use the one in this file in order to
+  customize settings or to avoid overheads associated with library
+  versions.
+
+* Contents, described in more detail in "description of public routines" below.
+
+  Standard (ANSI/SVID/...)  functions:
+    malloc(size_t n);
+    calloc(size_t n_elements, size_t element_size);
+    free(Void_t* p);
+    realloc(Void_t* p, size_t n);
+    memalign(size_t alignment, size_t n);
+    valloc(size_t n);
+    mallinfo()
+    mallopt(int parameter_number, int parameter_value)
+
+  Additional functions:
+    independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]);
+    independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
+    pvalloc(size_t n);
+    cfree(Void_t* p);
+    malloc_trim(size_t pad);
+    malloc_usable_size(Void_t* p);
+    malloc_stats();
+
+* Vital statistics:
+
+  Supported pointer representation:       4 or 8 bytes
+  Supported size_t  representation:       4 or 8 bytes
+       Note that size_t is allowed to be 4 bytes even if pointers are 8.
+       You can adjust this by defining INTERNAL_SIZE_T
+
+  Alignment:                              2 * sizeof(size_t) (default)
+       (i.e., 8 byte alignment with 4byte size_t). This suffices for
+       nearly all current machines and C compilers. However, you can
+       define MALLOC_ALIGNMENT to be wider than this if necessary.
+
+  Minimum overhead per allocated chunk:   4 or 8 bytes
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
+                          8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
+
+       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
+       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
+       needed; 4 (8) for a trailing size field and 8 (16) bytes for
+       free list pointers. Thus, the minimum allocatable size is
+       16/24/32 bytes.
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is 2 *
+       sizeof(size_t) bytes plus the remainder from a system page (the
+       minimal mmap unit); typically 4096 or 8192 bytes.
+
+  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
+                           8-byte size_t: 2^64 minus about two pages
+
+       It is assumed that (possibly signed) size_t values suffice to
+       represent chunk sizes. `Possibly signed' is due to the fact
+       that `size_t' may be defined on a system as either a signed or
+       an unsigned type. The ISO C standard says that it must be
+       unsigned, but a few systems are known not to adhere to this.
+       Additionally, even when size_t is unsigned, sbrk (which is by
+       default used to obtain memory from system) accepts signed
+       arguments, and may not be able to handle size_t-wide arguments
+       with negative sign bit.  Generally, values that would
+       appear as negative after accounting for overhead and alignment
+       are supported only via mmap(), which does not have this
+       limitation.
+
+       Requests for sizes outside the allowed range will perform an optional
+       failure action and then return null. (Requests may also
+       also fail because a system is out of memory.)
+
+  Thread-safety: thread-safe unless NO_THREADS is defined
+
+  Compliance: I believe it is compliant with the 1997 Single Unix Specification
+       (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Synopsis of compile-time options:
+
+    People have reported using previous versions of this malloc on all
+    versions of Unix, sometimes by tweaking some of the defines
+    below. It has been tested most extensively on Solaris and
+    Linux. It is also reported to work on WIN32 platforms.
+    People also report using it in stand-alone embedded systems.
+
+    The implementation is in straight, hand-tuned ANSI C.  It is not
+    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
+    usable, this code should be compiled using an optimizing compiler
+    (for example gcc -O3) that can simplify expressions and control
+    paths. (FAQ: some macros import variables as arguments rather than
+    declare locals because people reported that some debuggers
+    otherwise get confused.)
+
+    OPTION                     DEFAULT VALUE
+
+    Compilation Environment options:
+
+    __STD_C                    derived from C compiler defines
+    WIN32                      NOT defined
+    HAVE_MEMCPY                defined
+    USE_MEMCPY                 1 if HAVE_MEMCPY is defined
+    HAVE_MMAP                  defined as 1
+    MMAP_CLEARS                1
+    HAVE_MREMAP                0 unless linux defined
+    USE_ARENAS                 the same as HAVE_MMAP
+    USE_STARTER                1
+    malloc_getpagesize         derived from system #includes, or 4096 if not
+    HAVE_USR_INCLUDE_MALLOC_H  NOT defined
+    LACKS_UNISTD_H             NOT defined unless WIN32
+    LACKS_SYS_PARAM_H          NOT defined unless WIN32
+    LACKS_SYS_MMAN_H           NOT defined unless WIN32
+
+    Changing default word sizes:
+
+    INTERNAL_SIZE_T            size_t
+    MALLOC_ALIGNMENT           2 * sizeof(INTERNAL_SIZE_T)
+
+    Configuration and functionality options:
+
+    USE_DL_PREFIX              NOT defined
+    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
+    USE_MALLOC_LOCK            NOT defined
+    MALLOC_DEBUG               NOT defined
+    REALLOC_ZERO_BYTES_FREES   1
+    MALLOC_FAILURE_ACTION      errno = ENOMEM, if __STD_C defined, else no-op
+    TRIM_FASTBINS              0
+    FIRST_SORTED_BIN_SIZE      512
+
+    Options for customizing MORECORE:
+
+    MORECORE                   sbrk
+    MORECORE_FAILURE           -1
+    MORECORE_CONTIGUOUS        1
+    MORECORE_CANNOT_TRIM       NOT defined
+    MORECORE_CLEARS            1
+    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
+
+    Tuning options that are also dynamically changeable via mallopt:
+
+    DEFAULT_MXFAST             64
+    DEFAULT_TRIM_THRESHOLD     128 * 1024
+    DEFAULT_TOP_PAD            0
+    DEFAULT_MMAP_THRESHOLD     128 * 1024
+    DEFAULT_MMAP_MAX           65536
+
+    There are several other #defined constants and macros that you
+    probably don't want to touch unless you are extending or adapting malloc.  */
+
+/*
+  __STD_C should be nonzero if using ANSI-standard C compiler, a C++
+  compiler, or a C compiler sufficiently close to ANSI to get away
+  with it.
+*/
+
+#ifndef __STD_C
+#if defined(__STDC__) || defined(__cplusplus)
+#define __STD_C     1
+#else
+#define __STD_C     0
+#endif
+#endif /*__STD_C*/
+
+
+/*
+  Void_t* is the pointer type that malloc should say it returns
+*/
+
+#ifndef Void_t
+#if (__STD_C || defined(WIN32))
+#define Void_t      void
+#else
+#define Void_t      char
+#endif
+#endif /*Void_t*/
+
+#if __STD_C
+#include <stddef.h>   /* for size_t */
+#include <stdlib.h>   /* for getenv(), abort() */
+#else
+#include <sys/types.h>
+#endif
+
+#include <malloc-machine.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */
+
+/* #define  LACKS_UNISTD_H */
+
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>
+#endif
+
+/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */
+
+/* #define  LACKS_SYS_PARAM_H */
+
+
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>    /* needed for optional MALLOC_FAILURE_ACTION */
+
+
+/*
+  Debugging:
+
+  Because freed chunks may be overwritten with bookkeeping fields, this
+  malloc will often die when freed memory is overwritten by user
+  programs.  This can be very effective (albeit in an annoying way)
+  in helping track down dangling pointers.
+
+  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
+  enabled that will catch more memory errors. You probably won't be
+  able to make much sense of the actual assertion errors, but they
+  should help you locate incorrectly overwritten memory.  The checking
+  is fairly extensive, and will slow down execution
+  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
+  will attempt to check every non-mmapped allocated and free chunk in
+  the course of computing the summmaries. (By nature, mmapped regions
+  cannot be checked very much automatically.)
+
+  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
+  this code. The assertions in the check routines spell out in more
+  detail the assumptions and invariants underlying the algorithms.
+
+  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
+  checking that all accesses to malloced memory stay within their
+  bounds. However, there are several add-ons and adaptations of this
+  or other mallocs available that do this.
+*/
+
+#if MALLOC_DEBUG
+#include <assert.h>
+#else
+#undef assert
+#define assert(x) ((void)0)
+#endif
+
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to unsigned long, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT       (2 * SIZE_SZ)
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  This is required by the C standard. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#ifndef REALLOC_ZERO_BYTES_FREES
+#define REALLOC_ZERO_BYTES_FREES 1
+#endif
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/*
+  USE_DL_PREFIX will prefix all public routines with the string 'dl'.
+  This is necessary when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+*/
+
+/* #define USE_DL_PREFIX */
+
+
+/*
+   Two-phase name translation.
+   All of the actual routines are given mangled names.
+   When wrappers are used, they become the public callable versions.
+   When DL_PREFIX is used, the callable names are prefixed.
+*/
+
+#ifdef USE_DL_PREFIX
+#define public_cALLOc    dlcalloc
+#define public_fREe      dlfree
+#define public_cFREe     dlcfree
+#define public_mALLOc    dlmalloc
+#define public_mEMALIGn  dlmemalign
+#define public_rEALLOc   dlrealloc
+#define public_vALLOc    dlvalloc
+#define public_pVALLOc   dlpvalloc
+#define public_mALLINFo  dlmallinfo
+#define public_mALLOPt   dlmallopt
+#define public_mTRIm     dlmalloc_trim
+#define public_mSTATs    dlmalloc_stats
+#define public_mUSABLe   dlmalloc_usable_size
+#define public_iCALLOc   dlindependent_calloc
+#define public_iCOMALLOc dlindependent_comalloc
+#define public_gET_STATe dlget_state
+#define public_sET_STATe dlset_state
+#else /* USE_DL_PREFIX */
+#ifdef _LIBC
+
+/* Special defines for the GNU C library.  */
+#define public_cALLOc    __libc_calloc
+#define public_fREe      __libc_free
+#define public_cFREe     __libc_cfree
+#define public_mALLOc    __libc_malloc
+#define public_mEMALIGn  __libc_memalign
+#define public_rEALLOc   __libc_realloc
+#define public_vALLOc    __libc_valloc
+#define public_pVALLOc   __libc_pvalloc
+#define public_mALLINFo  __libc_mallinfo
+#define public_mALLOPt   __libc_mallopt
+#define public_mTRIm     __malloc_trim
+#define public_mSTATs    __malloc_stats
+#define public_mUSABLe   __malloc_usable_size
+#define public_iCALLOc   __libc_independent_calloc
+#define public_iCOMALLOc __libc_independent_comalloc
+#define public_gET_STATe __malloc_get_state
+#define public_sET_STATe __malloc_set_state
+#define malloc_getpagesize __getpagesize()
+#define open             __open
+#define mmap             __mmap
+#define munmap           __munmap
+#define mremap           __mremap
+#define mprotect         __mprotect
+#define MORECORE         (*__morecore)
+#define MORECORE_FAILURE 0
+
+Void_t * __default_morecore (ptrdiff_t);
+Void_t *(*__morecore)(ptrdiff_t) = __default_morecore;
+
+#else /* !_LIBC */
+#define public_cALLOc    calloc
+#define public_fREe      free
+#define public_cFREe     cfree
+#define public_mALLOc    malloc
+#define public_mEMALIGn  memalign
+#define public_rEALLOc   realloc
+#define public_vALLOc    valloc
+#define public_pVALLOc   pvalloc
+#define public_mALLINFo  mallinfo
+#define public_mALLOPt   mallopt
+#define public_mTRIm     malloc_trim
+#define public_mSTATs    malloc_stats
+#define public_mUSABLe   malloc_usable_size
+#define public_iCALLOc   independent_calloc
+#define public_iCOMALLOc independent_comalloc
+#define public_gET_STATe malloc_get_state
+#define public_sET_STATe malloc_set_state
+#endif /* _LIBC */
+#endif /* USE_DL_PREFIX */
+
+
+#if !defined _LIBC && (!defined __GNUC__ || __GNUC__<3)
+#define __builtin_expect(expr, val) (expr)
+#endif
+
+/*
+  HAVE_MEMCPY should be defined if you are not otherwise using
+  ANSI STD C, but still have memcpy and memset in your C library
+  and want to use them in calloc and realloc. Otherwise simple
+  macro versions are defined below.
+
+  USE_MEMCPY should be defined as 1 if you actually want to
+  have memset and memcpy called. People report that the macro
+  versions are faster than libc versions on some systems.
+
+  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
+  (of <= 36 bytes) are manually unrolled in realloc and calloc.
+*/
+
+#define HAVE_MEMCPY
+
+#ifndef USE_MEMCPY
+#ifdef HAVE_MEMCPY
+#define USE_MEMCPY 1
+#else
+#define USE_MEMCPY 0
+#endif
+#endif
+
+
+#if (__STD_C || defined(HAVE_MEMCPY))
+
+#ifdef _LIBC
+# include <string.h>
+#else
+#ifdef WIN32
+/* On Win32 memset and memcpy are already declared in windows.h */
+#else
+#if __STD_C
+void* memset(void*, int, size_t);
+void* memcpy(void*, const void*, size_t);
+#else
+Void_t* memset();
+Void_t* memcpy();
+#endif
+#endif
+#endif
+#endif
+
+/*
+  MALLOC_FAILURE_ACTION is the action to take before "return 0" when
+  malloc fails to be able to return memory, either because memory is
+  exhausted or because of illegal arguments.
+
+  By default, sets errno if running on STD_C platform, else does nothing.
+*/
+
+#ifndef MALLOC_FAILURE_ACTION
+#if __STD_C
+#define MALLOC_FAILURE_ACTION \
+   errno = ENOMEM;
+
+#else
+#define MALLOC_FAILURE_ACTION
+#endif
+#endif
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+#ifdef LACKS_UNISTD_H
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+#if __STD_C
+extern Void_t*     sbrk(ptrdiff_t);
+#else
+extern Void_t*     sbrk();
+#endif
+#endif
+#endif
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+/*  MORECORE_CLEARS           (default 1)
+     The degree to which the routine mapped to MORECORE zeroes out
+     memory: never (0), only for newly allocated space (1) or always
+     (2).  The distinction between (1) and (2) is necessary because on
+     some systems, if the application first decrements and then
+     increments the break value, the contents of the reallocated space
+     are unspecified.
+*/
+
+#ifndef MORECORE_CLEARS
+#define MORECORE_CLEARS 1
+#endif
+
+
+/*
+  Define HAVE_MMAP as true to optionally make malloc() use mmap() to
+  allocate very large blocks.  These will be returned to the
+  operating system immediately after a free(). Also, if mmap
+  is available, it is used as a backup strategy in cases where
+  MORECORE fails to provide space from system.
+
+  This malloc is best tuned to work with mmap for large requests.
+  If you do not have mmap, operations involving very large chunks (1MB
+  or so) may be slower than you'd like.
+*/
+
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+
+/*
+   Standard unix mmap using /dev/zero clears memory so calloc doesn't
+   need to.
+*/
+
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif
+
+#else /* no mmap */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 0
+#endif
+#endif
+
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup (which is done only if
+   HAVE_MMAP).  The value must be a multiple of page size.  This
+   backup strategy generally applies only when systems have "holes" in
+   address space, so sbrk cannot perform contiguous expansion, but
+   there is still space available on system.  On systems for which
+   this is known to be useful (i.e. most linux kernels), this occurs
+   only when programs allocate huge amounts of memory.  Between this,
+   and the fact that mmap regions tend to be limited, the size should
+   be large, to avoid too many mmap calls and thus avoid running out
+   of kernel resources.
+*/
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.  This is currently only possible on Linux with
+  kernel versions newer than 1.3.77.
+*/
+
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#else
+#define HAVE_MREMAP 0
+#endif
+
+#endif /* HAVE_MMAP */
+
+/* Define USE_ARENAS to enable support for multiple `arenas'.  These
+   are allocated using mmap(), are necessary for threads and
+   occasionally useful to overcome address space limitations affecting
+   sbrk(). */
+
+#ifndef USE_ARENAS
+#define USE_ARENAS HAVE_MMAP
+#endif
+
+/* USE_STARTER determines if and when the special "starter" hook
+   functions are used: not at all (0), during ptmalloc_init (first bit
+   set), or from the beginning until an explicit call to ptmalloc_init
+   (second bit set).  This is necessary if thread-related
+   initialization functions (e.g.  pthread_key_create) require
+   malloc() calls (set USE_STARTER=1), or if those functions initially
+   cannot be used at all (set USE_STARTER=2 and perform an explicit
+   ptmalloc_init() when the thread library is ready, typically at the
+   start of main()). */
+
+#ifndef USE_STARTER
+# ifndef _LIBC
+#  define USE_STARTER 1
+# else
+#  if USE___THREAD || (defined USE_TLS && !defined SHARED)
+    /* These routines are never needed in this configuration.  */
+#   define USE_STARTER 0
+#  else
+#   define USE_STARTER (USE_TLS ? 4 : 1)
+#  endif
+# endif
+#endif
+
+
+/*
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  Note that this value is
+  cached during initialization into a field of malloc_state. So even
+  if malloc_getpagesize is a function, it is only called once.
+
+  The following mechanics for getpagesize were adapted from bsd/gnu
+  getpagesize.h. If none of the system-probes here apply, a value of
+  4096 is used, which should be OK: If they don't apply, then using
+  the actual value probably doesn't impact performance.
+*/
+
+
+#ifndef malloc_getpagesize
+
+#ifndef LACKS_UNISTD_H
+#  include <unistd.h>
+#endif
+
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize()
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize (4096)
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
+  version is declared below.  These must be precisely the same for
+  mallinfo() to work.  The original SVID version of this struct,
+  defined on most systems with mallinfo, declares all fields as
+  ints. But some others define as unsigned long. If your system
+  defines the fields using a type of different width than listed here,
+  you must #include your system version and #define
+  HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#endif
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+#if __STD_C
+Void_t*  public_mALLOc(size_t);
+#else
+Void_t*  public_mALLOc();
+#endif
+#ifdef libc_hidden_proto
+libc_hidden_proto (public_mALLOc)
+#endif
+
+/*
+  free(Void_t* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+#if __STD_C
+void     public_fREe(Void_t*);
+#else
+void     public_fREe();
+#endif
+#ifdef libc_hidden_proto
+libc_hidden_proto (public_fREe)
+#endif
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+#if __STD_C
+Void_t*  public_cALLOc(size_t, size_t);
+#else
+Void_t*  public_cALLOc();
+#endif
+
+/*
+  realloc(Void_t* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+#if __STD_C
+Void_t*  public_rEALLOc(Void_t*, size_t);
+#else
+Void_t*  public_rEALLOc();
+#endif
+#ifdef libc_hidden_proto
+libc_hidden_proto (public_rEALLOc)
+#endif
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+#if __STD_C
+Void_t*  public_mEMALIGn(size_t, size_t);
+#else
+Void_t*  public_mEMALIGn();
+#endif
+#ifdef libc_hidden_proto
+libc_hidden_proto (public_mEMALIGn)
+#endif
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+#if __STD_C
+Void_t*  public_vALLOc(size_t);
+#else
+Void_t*  public_vALLOc();
+#endif
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any
+  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+#if __STD_C
+int      public_mALLOPt(int, int);
+#else
+int      public_mALLOPt();
+#endif
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+               have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+#if __STD_C
+struct mallinfo public_mALLINFo(void);
+#else
+struct mallinfo public_mALLINFo();
+#endif
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use regular calloc and assign pointers into this
+  space to represent elements.  (In this case though, you cannot
+  independently free elements.)
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+#if __STD_C
+Void_t** public_iCALLOc(size_t, size_t, Void_t**);
+#else
+Void_t** public_iCALLOc();
+#endif
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use a single regular malloc, and assign pointers at
+  particular offsets in the aggregate space. (In this case though, you
+  cannot independently free elements.)
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+#if __STD_C
+Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**);
+#else
+Void_t** public_iCOMALLOc();
+#endif
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+#if __STD_C
+Void_t*  public_pVALLOc(size_t);
+#else
+Void_t*  public_pVALLOc();
+#endif
+
+/*
+  cfree(Void_t* p);
+  Equivalent to free(p).
+
+  cfree is needed/defined on some systems that pair it with calloc,
+  for odd historical reasons (such as: cfree is used in example
+  code in the first edition of K&R).
+*/
+#if __STD_C
+void     public_cFREe(Void_t*);
+#else
+void     public_cFREe();
+#endif
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  rreturn 0.
+*/
+#if __STD_C
+int      public_mTRIm(size_t);
+#else
+int      public_mTRIm();
+#endif
+
+/*
+  malloc_usable_size(Void_t* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+#if __STD_C
+size_t   public_mUSABLe(Void_t*);
+#else
+size_t   public_mUSABLe();
+#endif
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+#if __STD_C
+void     public_mSTATs(void);
+#else
+void     public_mSTATs();
+#endif
+
+/*
+  malloc_get_state(void);
+
+  Returns the state of all malloc variables in an opaque data
+  structure.
+*/
+#if __STD_C
+Void_t*  public_gET_STATe(void);
+#else
+Void_t*  public_gET_STATe();
+#endif
+
+/*
+  malloc_set_state(Void_t* state);
+
+  Restore the state of all malloc variables from data obtained with
+  malloc_get_state().
+*/
+#if __STD_C
+int      public_sET_STATe(Void_t*);
+#else
+int      public_sET_STATe();
+#endif
+
+#ifdef _LIBC
+/*
+  posix_memalign(void **memptr, size_t alignment, size_t size);
+
+  POSIX wrapper like memalign(), checking for validity of size.
+*/
+int      __posix_memalign(void **, size_t, size_t);
+#endif
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 8 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     64
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value It must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (128 * 1024)
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+  some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.  If
+  HAVE_MMAP is not set, the default value is 0, and attempts to set it
+  to non-zero values in mallopt will fail.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#if HAVE_MMAP
+#define DEFAULT_MMAP_MAX       (65536)
+#else
+#define DEFAULT_MMAP_MAX       (0)
+#endif
+#endif
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#include "malloc.h"
+
+#ifndef BOUNDED_N
+#define BOUNDED_N(ptr, sz) (ptr)
+#endif
+#ifndef RETURN_ADDRESS
+#define RETURN_ADDRESS(X_) (NULL)
+#endif
+
+/* On some platforms we can compile internal, not exported functions better.
+   Let the environment provide a macro and define it to be empty if it
+   is not available.  */
+#ifndef internal_function
+# define internal_function
+#endif
+
+/* Forward declarations.  */
+struct malloc_chunk;
+typedef struct malloc_chunk* mchunkptr;
+
+/* Internal routines.  */
+
+#if __STD_C
+
+Void_t*         _int_malloc(mstate, size_t);
+void            _int_free(mstate, Void_t*);
+Void_t*         _int_realloc(mstate, Void_t*, size_t);
+Void_t*         _int_memalign(mstate, size_t, size_t);
+Void_t*         _int_valloc(mstate, size_t);
+static Void_t*  _int_pvalloc(mstate, size_t);
+/*static Void_t*  cALLOc(size_t, size_t);*/
+static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**);
+static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**);
+static int      mTRIm(size_t);
+static size_t   mUSABLe(Void_t*);
+static int      mALLOPt(int, int);
+
+static Void_t* internal_function mem2mem_check(Void_t *p, size_t sz);
+static int internal_function top_check(void);
+static void internal_function munmap_chunk(mchunkptr p);
+#if HAVE_MREMAP
+static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
+#endif
+
+static Void_t*   malloc_check(size_t sz, const Void_t *caller);
+static void      free_check(Void_t* mem, const Void_t *caller);
+static Void_t*   realloc_check(Void_t* oldmem, size_t bytes,
+			       const Void_t *caller);
+static Void_t*   memalign_check(size_t alignment, size_t bytes,
+				const Void_t *caller);
+#ifndef NO_THREADS
+# if USE_STARTER
+static Void_t*   malloc_starter(size_t sz, const Void_t *caller);
+static Void_t*   memalign_starter(size_t aln, size_t sz, const Void_t *caller);
+static void      free_starter(Void_t* mem, const Void_t *caller);
+# endif
+static Void_t*   malloc_atfork(size_t sz, const Void_t *caller);
+static void      free_atfork(Void_t* mem, const Void_t *caller);
+#endif
+
+#else
+
+Void_t*         _int_malloc();
+void            _int_free();
+Void_t*         _int_realloc();
+Void_t*         _int_memalign();
+Void_t*         _int_valloc();
+Void_t*         _int_pvalloc();
+/*static Void_t*  cALLOc();*/
+static Void_t** _int_icalloc();
+static Void_t** _int_icomalloc();
+static int      mTRIm();
+static size_t   mUSABLe();
+static int      mALLOPt();
+
+#endif
+
+
+
+
+/* ------------- Optional versions of memcopy ---------------- */
+
+
+#if USE_MEMCPY
+
+/*
+  Note: memcpy is ONLY invoked with non-overlapping regions,
+  so the (usually slower) memmove is not needed.
+*/
+
+#define MALLOC_COPY(dest, src, nbytes)  memcpy(dest, src, nbytes)
+#define MALLOC_ZERO(dest, nbytes)       memset(dest, 0,   nbytes)
+
+#else /* !USE_MEMCPY */
+
+/* Use Duff's device for good zeroing/copying performance. */
+
+#define MALLOC_ZERO(charp, nbytes)                                            \
+do {                                                                          \
+  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \
+  unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
+  long mcn;                                                                   \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mzp++ = 0;                                             \
+    case 7:           *mzp++ = 0;                                             \
+    case 6:           *mzp++ = 0;                                             \
+    case 5:           *mzp++ = 0;                                             \
+    case 4:           *mzp++ = 0;                                             \
+    case 3:           *mzp++ = 0;                                             \
+    case 2:           *mzp++ = 0;                                             \
+    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \
+  }                                                                           \
+} while(0)
+
+#define MALLOC_COPY(dest,src,nbytes)                                          \
+do {                                                                          \
+  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \
+  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \
+  unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
+  long mcn;                                                                   \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \
+    case 7:           *mcdst++ = *mcsrc++;                                    \
+    case 6:           *mcdst++ = *mcsrc++;                                    \
+    case 5:           *mcdst++ = *mcsrc++;                                    \
+    case 4:           *mcdst++ = *mcsrc++;                                    \
+    case 3:           *mcdst++ = *mcsrc++;                                    \
+    case 2:           *mcdst++ = *mcsrc++;                                    \
+    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \
+  }                                                                           \
+} while(0)
+
+#endif
+
+/* ------------------ MMAP support ------------------  */
+
+
+#if HAVE_MMAP
+
+#include <fcntl.h>
+#ifndef LACKS_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+#if !defined(MAP_FAILED)
+# define MAP_FAILED ((char*)-1)
+#endif
+
+#ifndef MAP_NORESERVE
+# ifdef MAP_AUTORESRV
+#  define MAP_NORESERVE MAP_AUTORESRV
+# else
+#  define MAP_NORESERVE 0
+# endif
+#endif
+
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS,
+   so the following is unlikely to be needed, but is
+   supplied just in case.
+*/
+
+#ifndef MAP_ANONYMOUS
+
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+
+#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \
+ (dev_zero_fd = open("/dev/zero", O_RDWR), \
+  mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \
+   mmap((addr), (size), (prot), (flags), dev_zero_fd, 0))
+
+#else
+
+#define MMAP(addr, size, prot, flags) \
+ (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
+
+#endif
+
+
+#endif /* HAVE_MMAP */
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
+  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk, if allocated            | |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             User data starts here...                          .
+            .                                                               .
+            .             (malloc_usable_space() bytes)                     .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk                                     |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+        trailing size field since there is no next contiguous chunk
+        that would have to index off it. After initialization, `top'
+        is forced to always exist.  If it would become less than
+        MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+        bit (IS_MMAPPED) set in their size fields.  Because they are
+        allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((Void_t*)((char*)(p) + 2*SIZE_SZ))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (sizeof(struct malloc_chunk))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0)
+
+
+/*
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+*/
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long)(req) >=                                        \
+   (unsigned long)(INTERNAL_SIZE_T)(-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE(req)) {                                \
+    MALLOC_FAILURE_ACTION;                                        \
+    return 0;                                                     \
+  }                                                               \
+  (sz) = request2size(req);
+
+/*
+  --------------- Physical chunk operations ---------------
+*/
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+
+/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained
+   from a non-main arena.  This is only set immediately before handing
+   the chunk to the user, if necessary.  */
+#define NON_MAIN_ARENA 0x4
+
+/* check for chunk from non-main arena */
+#define chunk_non_main_arena(p) ((p)->size & NON_MAIN_ARENA)
+
+
+/*
+  Bits to mask off when extracting size
+
+  Note: IS_MMAPPED is intentionally not masked off from size field in
+  macros for which mmapped chunks should never be seen. This should
+  cause helpful core dumps to occur if it is tried by accident by
+  people extending or adapting this malloc.
+*/
+#define SIZE_BITS (PREV_INUSE|IS_MMAPPED|NON_MAIN_ARENA)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) ))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)\
+((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
+
+#define clear_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
+
+
+/*
+  -------------------- Internal data structures --------------------
+
+   All internal state is held in an instance of malloc_state defined
+   below. There are no other static variables, except in two optional
+   cases:
+   * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
+   * If HAVE_MMAP is true, but mmap doesn't support
+     MAP_ANONYMOUS, a dummy file descriptor for mmap.
+
+   Beware of lots of tricks that minimize the total bookkeeping space
+   requirements. The result is a little over 1K bytes (for 4byte
+   pointers and size_t.)
+*/
+
+/*
+  Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.
+*/
+
+typedef struct malloc_chunk* mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1)))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(P, BK, FD) {                                            \
+  FD = P->fd;                                                          \
+  BK = P->bk;                                                          \
+  FD->bk = BK;                                                         \
+  BK->fd = FD;                                                         \
+}
+
+/*
+  Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    There is actually a little bit of slop in the numbers in bin_index
+    for the sake of speed. This makes no difference elsewhere.
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+*/
+
+#define NBINS             128
+#define NSMALLBINS         64
+#define SMALLBIN_WIDTH      8
+#define MIN_LARGE_SIZE    512
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
+
+#define smallbin_index(sz)     (((unsigned)(sz)) >> 3)
+
+#define largebin_index(sz)                                                   \
+(((((unsigned long)(sz)) >>  6) <= 32)?  56 + (((unsigned long)(sz)) >>  6): \
+ ((((unsigned long)(sz)) >>  9) <= 20)?  91 + (((unsigned long)(sz)) >>  9): \
+ ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \
+ ((((unsigned long)(sz)) >> 15) <=  4)? 119 + (((unsigned long)(sz)) >> 15): \
+ ((((unsigned long)(sz)) >> 18) <=  2)? 124 + (((unsigned long)(sz)) >> 18): \
+                                        126)
+
+#define bin_index(sz) \
+ ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
+
+/*
+  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
+  first bin that is maintained in sorted order. This must
+  be the smallest size corresponding to a given bin.
+
+  Normally, this should be MIN_LARGE_SIZE. But you can weaken
+  best fit guarantees to sometimes speed up malloc by increasing value.
+  Doing this means that malloc may choose a chunk that is 
+  non-best-fitting by up to the width of the bin.
+
+  Some useful cutoff values:
+      512 - all bins sorted
+     2560 - leaves bins <=     64 bytes wide unsorted  
+    12288 - leaves bins <=    512 bytes wide unsorted
+    65536 - leaves bins <=   4096 bytes wide unsorted
+   262144 - leaves bins <=  32768 bytes wide unsorted
+       -1 - no bins sorted (not recommended!)
+*/
+
+#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE 
+/* #define FIRST_SORTED_BIN_SIZE 65536 */
+
+/*
+  Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+
+    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
+    does not have to be taken into account in size comparisons.
+*/
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at(M, 1))
+
+/*
+  Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sYSMALLOc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+*/
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks(M))
+
+/*
+  Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+*/
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
+
+#define mark_bin(m,i)    ((m)->binmap[idx2block(i)] |=  idx2bit(i))
+#define unmark_bin(m,i)  ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
+#define get_binmap(m,i)  ((m)->binmap[idx2block(i)] &   idx2bit(i))
+
+/*
+  Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+*/
+
+typedef struct malloc_chunk* mfastbinptr;
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz)        ((((unsigned int)(sz)) >> 3) - 2)
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     80
+
+#define NFASTBINS  (fastbin_index(request2size(MAX_FAST_SIZE))+1)
+
+/*
+  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+  that triggers automatic consolidation of possibly-surrounding
+  fastbin chunks. This is a heuristic, so the exact value should not
+  matter too much. It is defined at half the default trim threshold as a
+  compromise heuristic to only attempt consolidation if it is likely
+  to lead to trimming. However, it is not dynamically tunable, since
+  consolidation reduces fragmentation surrounding large chunks even
+  if trimming is not used.
+*/
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
+
+/*
+  Since the lowest 2 bits in max_fast don't matter in size comparisons,
+  they are used as flags.
+*/
+
+/*
+  FASTCHUNKS_BIT held in max_fast indicates that there are probably
+  some fastbin chunks. It is set true on entering a chunk into any
+  fastbin, and cleared only in malloc_consolidate.
+
+  The truth value is inverted so that have_fastchunks will be true
+  upon startup (since statics are zero-filled), simplifying
+  initialization checks.
+*/
+
+#define FASTCHUNKS_BIT        (1U)
+
+#define have_fastchunks(M)     (((M)->max_fast &  FASTCHUNKS_BIT) == 0)
+#define clear_fastchunks(M)    ((M)->max_fast |=  FASTCHUNKS_BIT)
+#define set_fastchunks(M)      ((M)->max_fast &= ~FASTCHUNKS_BIT)
+
+/*
+  NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
+  regions.  Otherwise, contiguity is exploited in merging together,
+  when possible, results from consecutive MORECORE calls.
+
+  The initial value comes from MORECORE_CONTIGUOUS, but is
+  changed dynamically if mmap is ever used as an sbrk substitute.
+*/
+
+#define NONCONTIGUOUS_BIT     (2U)
+
+#define contiguous(M)          (((M)->max_fast &  NONCONTIGUOUS_BIT) == 0)
+#define noncontiguous(M)       (((M)->max_fast &  NONCONTIGUOUS_BIT) != 0)
+#define set_noncontiguous(M)   ((M)->max_fast |=  NONCONTIGUOUS_BIT)
+#define set_contiguous(M)      ((M)->max_fast &= ~NONCONTIGUOUS_BIT)
+
+/*
+   Set value of max_fast.
+   Use impossibly small value if 0.
+   Precondition: there are no existing fastbin chunks.
+   Setting the value clears fastchunk bit but preserves noncontiguous bit.
+*/
+
+#define set_max_fast(M, s) \
+  (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
+  FASTCHUNKS_BIT | \
+  ((M)->max_fast &  NONCONTIGUOUS_BIT)
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+*/
+
+struct malloc_state {
+  /* Serialize access.  */
+  mutex_t mutex;
+
+  /* Statistics for locking.  Only used if THREAD_STATS is defined.  */
+  long stat_lock_direct, stat_lock_loop, stat_lock_wait;
+  long pad0_[1]; /* try to give the mutex its own cacheline */
+
+  /* The maximum chunk size to be eligible for fastbin */
+  INTERNAL_SIZE_T  max_fast;   /* low 2 bits used as flags */
+
+  /* Fastbins */
+  mfastbinptr      fastbins[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr        top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr        last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr        bins[NBINS * 2];
+
+  /* Bitmap of bins */
+  unsigned int     binmap[BINMAPSIZE];
+
+  /* Linked list */
+  struct malloc_state *next;
+
+  /* Memory allocated from the system in this arena.  */
+  INTERNAL_SIZE_T system_mem;
+  INTERNAL_SIZE_T max_system_mem;
+};
+
+struct malloc_par {
+  /* Tunable parameters */
+  unsigned long    trim_threshold;
+  INTERNAL_SIZE_T  top_pad;
+  INTERNAL_SIZE_T  mmap_threshold;
+
+  /* Memory map support */
+  int              n_mmaps;
+  int              n_mmaps_max;
+  int              max_n_mmaps;
+
+  /* Cache malloc_getpagesize */
+  unsigned int     pagesize;
+
+  /* Statistics */
+  INTERNAL_SIZE_T  mmapped_mem;
+  /*INTERNAL_SIZE_T  sbrked_mem;*/
+  /*INTERNAL_SIZE_T  max_sbrked_mem;*/
+  INTERNAL_SIZE_T  max_mmapped_mem;
+  INTERNAL_SIZE_T  max_total_mem; /* only kept for NO_THREADS */
+
+  /* First address handed out by MORECORE/sbrk.  */
+  char*            sbrk_base;
+};
+
+/* There are several instances of this struct ("arenas") in this
+   malloc.  If you are adapting this malloc in a way that does NOT use
+   a static or mmapped malloc_state, you MUST explicitly zero-fill it
+   before using. This malloc relies on the property that malloc_state
+   is initialized to all zeroes (as is true of C statics).  */
+
+static struct malloc_state main_arena;
+
+/* There is only one instance of the malloc parameters.  */
+
+static struct malloc_par mp_;
+
+/*
+  Initialize a malloc_state struct.
+
+  This is called only from within malloc_consolidate, which needs
+  be called in the same contexts anyway.  It is never called directly
+  outside of malloc_consolidate because some optimizing compilers try
+  to inline it at all call points, which turns out not to be an
+  optimization at all. (Inlining it in malloc_consolidate is fine though.)
+*/
+
+#if __STD_C
+static void malloc_init_state(mstate av)
+#else
+static void malloc_init_state(av) mstate av;
+#endif
+{
+  int     i;
+  mbinptr bin;
+
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i) {
+    bin = bin_at(av,i);
+    bin->fd = bin->bk = bin;
+  }
+
+#if MORECORE_CONTIGUOUS
+  if (av != &main_arena)
+#endif
+    set_noncontiguous(av);
+
+  set_max_fast(av, DEFAULT_MXFAST);
+
+  av->top            = initial_top(av);
+}
+
+/*
+   Other internal utilities operating on mstates
+*/
+
+#if __STD_C
+static Void_t*  sYSMALLOc(INTERNAL_SIZE_T, mstate);
+static int      sYSTRIm(size_t, mstate);
+static void     malloc_consolidate(mstate);
+static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**);
+#else
+static Void_t*  sYSMALLOc();
+static int      sYSTRIm();
+static void     malloc_consolidate();
+static Void_t** iALLOc();
+#endif
+
+
+/* -------------- Early definitions for debugging hooks ---------------- */
+
+/* Define and initialize the hook variables.  These weak definitions must
+   appear before any use of the variables in a function (arena.c uses one).  */
+#ifndef weak_variable
+#ifndef _LIBC
+#define weak_variable /**/
+#else
+/* In GNU libc we want the hook variables to be weak definitions to
+   avoid a problem with Emacs.  */
+#define weak_variable weak_function
+#endif
+#endif
+
+#if !(USE_STARTER & 2)
+# define free_hook_ini     NULL
+/* Forward declarations.  */
+static Void_t* malloc_hook_ini __MALLOC_P ((size_t sz,
+					    const __malloc_ptr_t caller));
+static Void_t* realloc_hook_ini __MALLOC_P ((Void_t* ptr, size_t sz,
+					     const __malloc_ptr_t caller));
+static Void_t* memalign_hook_ini __MALLOC_P ((size_t alignment, size_t sz,
+					      const __malloc_ptr_t caller));
+#else
+# define free_hook_ini     free_starter
+# define malloc_hook_ini   malloc_starter
+# define realloc_hook_ini  NULL
+# define memalign_hook_ini memalign_starter
+#endif
+
+void weak_variable (*__malloc_initialize_hook) __MALLOC_PMT ((void)) = NULL;
+void weak_variable (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr,
+                                                 const __malloc_ptr_t))
+     = free_hook_ini;
+__malloc_ptr_t weak_variable (*__malloc_hook)
+ __MALLOC_PMT ((size_t __size, const __malloc_ptr_t)) = malloc_hook_ini;
+__malloc_ptr_t weak_variable (*__realloc_hook)
+ __MALLOC_PMT ((__malloc_ptr_t __ptr, size_t __size, const __malloc_ptr_t))
+     = realloc_hook_ini;
+__malloc_ptr_t weak_variable (*__memalign_hook)
+ __MALLOC_PMT ((size_t __alignment, size_t __size, const __malloc_ptr_t))
+     = memalign_hook_ini;
+void weak_variable (*__after_morecore_hook) __MALLOC_P ((void)) = NULL;
+
+
+/* ------------------- Support for multiple arenas -------------------- */
+#include "arena.c"
+
+/*
+  Debugging support
+
+  These routines make a number of assertions about the states
+  of data structures that should be true at all times. If any
+  are not true, it's very likely that a user program has somehow
+  trashed memory. (It's also possible that there is a coding error
+  in malloc. In which case, please report it!)
+*/
+
+#if ! MALLOC_DEBUG
+
+#define check_chunk(A,P)
+#define check_free_chunk(A,P)
+#define check_inuse_chunk(A,P)
+#define check_remalloced_chunk(A,P,N)
+#define check_malloced_chunk(A,P,N)
+#define check_malloc_state(A)
+
+#else
+
+#define check_chunk(A,P)              do_check_chunk(A,P)
+#define check_free_chunk(A,P)         do_check_free_chunk(A,P)
+#define check_inuse_chunk(A,P)        do_check_inuse_chunk(A,P)
+#define check_remalloced_chunk(A,P,N) do_check_remalloced_chunk(A,P,N)
+#define check_malloced_chunk(A,P,N)   do_check_malloced_chunk(A,P,N)
+#define check_malloc_state(A)         do_check_malloc_state(A)
+
+/*
+  Properties of all chunks
+*/
+
+#if __STD_C
+static void do_check_chunk(mstate av, mchunkptr p)
+#else
+static void do_check_chunk(av, p) mstate av; mchunkptr p;
+#endif
+{
+  unsigned long sz = chunksize(p);
+  /* min and max possible addresses assuming contiguous allocation */
+  char* max_address = (char*)(av->top) + chunksize(av->top);
+  char* min_address = max_address - av->system_mem;
+
+  if (!chunk_is_mmapped(p)) {
+
+    /* Has legal address ... */
+    if (p != av->top) {
+      if (contiguous(av)) {
+        assert(((char*)p) >= min_address);
+        assert(((char*)p + sz) <= ((char*)(av->top)));
+      }
+    }
+    else {
+      /* top size is always at least MINSIZE */
+      assert((unsigned long)(sz) >= MINSIZE);
+      /* top predecessor always marked inuse */
+      assert(prev_inuse(p));
+    }
+
+  }
+  else {
+#if HAVE_MMAP
+    /* address is outside main heap  */
+    if (contiguous(av) && av->top != initial_top(av)) {
+      assert(((char*)p) < min_address || ((char*)p) > max_address);
+    }
+    /* chunk is page-aligned */
+    assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0);
+    /* mem is aligned */
+    assert(aligned_OK(chunk2mem(p)));
+#else
+    /* force an appropriate assert violation if debug set */
+    assert(!chunk_is_mmapped(p));
+#endif
+  }
+}
+
+/*
+  Properties of free chunks
+*/
+
+#if __STD_C
+static void do_check_free_chunk(mstate av, mchunkptr p)
+#else
+static void do_check_free_chunk(av, p) mstate av; mchunkptr p;
+#endif
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+  mchunkptr next = chunk_at_offset(p, sz);
+
+  do_check_chunk(av, p);
+
+  /* Chunk must claim to be free ... */
+  assert(!inuse(p));
+  assert (!chunk_is_mmapped(p));
+
+  /* Unless a special marker, must have OK fields */
+  if ((unsigned long)(sz) >= MINSIZE)
+  {
+    assert((sz & MALLOC_ALIGN_MASK) == 0);
+    assert(aligned_OK(chunk2mem(p)));
+    /* ... matching footer field */
+    assert(next->prev_size == sz);
+    /* ... and is fully consolidated */
+    assert(prev_inuse(p));
+    assert (next == av->top || inuse(next));
+
+    /* ... and has minimally sane links */
+    assert(p->fd->bk == p);
+    assert(p->bk->fd == p);
+  }
+  else /* markers are always of size SIZE_SZ */
+    assert(sz == SIZE_SZ);
+}
+
+/*
+  Properties of inuse chunks
+*/
+
+#if __STD_C
+static void do_check_inuse_chunk(mstate av, mchunkptr p)
+#else
+static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p;
+#endif
+{
+  mchunkptr next;
+
+  do_check_chunk(av, p);
+
+  if (chunk_is_mmapped(p))
+    return; /* mmapped chunks have no next/prev */
+
+  /* Check whether it claims to be in use ... */
+  assert(inuse(p));
+
+  next = next_chunk(p);
+
+  /* ... and is surrounded by OK chunks.
+    Since more things can be checked with free chunks than inuse ones,
+    if an inuse chunk borders them and debug is on, it's worth doing them.
+  */
+  if (!prev_inuse(p))  {
+    /* Note that we cannot even look at prev unless it is not inuse */
+    mchunkptr prv = prev_chunk(p);
+    assert(next_chunk(prv) == p);
+    do_check_free_chunk(av, prv);
+  }
+
+  if (next == av->top) {
+    assert(prev_inuse(next));
+    assert(chunksize(next) >= MINSIZE);
+  }
+  else if (!inuse(next))
+    do_check_free_chunk(av, next);
+}
+
+/*
+  Properties of chunks recycled from fastbins
+*/
+
+#if __STD_C
+static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+#else
+static void do_check_remalloced_chunk(av, p, s)
+mstate av; mchunkptr p; INTERNAL_SIZE_T s;
+#endif
+{
+  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+
+  if (!chunk_is_mmapped(p)) {
+    assert(av == arena_for_chunk(p));
+    if (chunk_non_main_arena(p))
+      assert(av != &main_arena);
+    else
+      assert(av == &main_arena);
+  }
+
+  do_check_inuse_chunk(av, p);
+
+  /* Legal size ... */
+  assert((sz & MALLOC_ALIGN_MASK) == 0);
+  assert((unsigned long)(sz) >= MINSIZE);
+  /* ... and alignment */
+  assert(aligned_OK(chunk2mem(p)));
+  /* chunk is less than MINSIZE more than request */
+  assert((long)(sz) - (long)(s) >= 0);
+  assert((long)(sz) - (long)(s + MINSIZE) < 0);
+}
+
+/*
+  Properties of nonrecycled chunks at the point they are malloced
+*/
+
+#if __STD_C
+static void do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
+#else
+static void do_check_malloced_chunk(av, p, s)
+mstate av; mchunkptr p; INTERNAL_SIZE_T s;
+#endif
+{
+  /* same as recycled case ... */
+  do_check_remalloced_chunk(av, p, s);
+
+  /*
+    ... plus,  must obey implementation invariant that prev_inuse is
+    always true of any allocated chunk; i.e., that each allocated
+    chunk borders either a previously allocated and still in-use
+    chunk, or the base of its memory arena. This is ensured
+    by making all allocations from the the `lowest' part of any found
+    chunk.  This does not necessarily hold however for chunks
+    recycled via fastbins.
+  */
+
+  assert(prev_inuse(p));
+}
+
+
+/*
+  Properties of malloc_state.
+
+  This may be useful for debugging malloc, as well as detecting user
+  programmer errors that somehow write into malloc_state.
+
+  If you are extending or experimenting with this malloc, you can
+  probably figure out how to hack this routine to print out or
+  display chunk addresses, sizes, bins, and other instrumentation.
+*/
+
+static void do_check_malloc_state(mstate av)
+{
+  int i;
+  mchunkptr p;
+  mchunkptr q;
+  mbinptr b;
+  unsigned int binbit;
+  int empty;
+  unsigned int idx;
+  INTERNAL_SIZE_T size;
+  unsigned long total = 0;
+  int max_fast_bin;
+
+  /* internal size_t must be no wider than pointer type */
+  assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*));
+
+  /* alignment is a power of 2 */
+  assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0);
+
+  /* cannot run remaining checks until fully initialized */
+  if (av->top == 0 || av->top == initial_top(av))
+    return;
+
+  /* pagesize is a power of 2 */
+  assert((mp_.pagesize & (mp_.pagesize-1)) == 0);
+
+  /* A contiguous main_arena is consistent with sbrk_base.  */
+  if (av == &main_arena && contiguous(av))
+    assert((char*)mp_.sbrk_base + av->system_mem ==
+	   (char*)av->top + chunksize(av->top));
+
+  /* properties of fastbins */
+
+  /* max_fast is in allowed range */
+  assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE));
+
+  max_fast_bin = fastbin_index(av->max_fast);
+
+  for (i = 0; i < NFASTBINS; ++i) {
+    p = av->fastbins[i];
+
+    /* all bins past max_fast are empty */
+    if (i > max_fast_bin)
+      assert(p == 0);
+
+    while (p != 0) {
+      /* each chunk claims to be inuse */
+      do_check_inuse_chunk(av, p);
+      total += chunksize(p);
+      /* chunk belongs in this bin */
+      assert(fastbin_index(chunksize(p)) == i);
+      p = p->fd;
+    }
+  }
+
+  if (total != 0)
+    assert(have_fastchunks(av));
+  else if (!have_fastchunks(av))
+    assert(total == 0);
+
+  /* check normal bins */
+  for (i = 1; i < NBINS; ++i) {
+    b = bin_at(av,i);
+
+    /* binmap is accurate (except for bin 1 == unsorted_chunks) */
+    if (i >= 2) {
+      binbit = get_binmap(av,i);
+      empty = last(b) == b;
+      if (!binbit)
+        assert(empty);
+      else if (!empty)
+        assert(binbit);
+    }
+
+    for (p = last(b); p != b; p = p->bk) {
+      /* each chunk claims to be free */
+      do_check_free_chunk(av, p);
+      size = chunksize(p);
+      total += size;
+      if (i >= 2) {
+        /* chunk belongs in bin */
+        idx = bin_index(size);
+        assert(idx == i);
+        /* lists are sorted */
+        if ((unsigned long) size >= (unsigned long)(FIRST_SORTED_BIN_SIZE)) {
+	  assert(p->bk == b ||
+		 (unsigned long)chunksize(p->bk) >=
+		 (unsigned long)chunksize(p));
+	}
+      }
+      /* chunk is followed by a legal chain of inuse chunks */
+      for (q = next_chunk(p);
+           (q != av->top && inuse(q) &&
+             (unsigned long)(chunksize(q)) >= MINSIZE);
+           q = next_chunk(q))
+        do_check_inuse_chunk(av, q);
+    }
+  }
+
+  /* top chunk is OK */
+  check_chunk(av, av->top);
+
+  /* sanity checks for statistics */
+
+#ifdef NO_THREADS
+  assert(total <= (unsigned long)(mp_.max_total_mem));
+  assert(mp_.n_mmaps >= 0);
+#endif
+  assert(mp_.n_mmaps <= mp_.n_mmaps_max);
+  assert(mp_.n_mmaps <= mp_.max_n_mmaps);
+
+  assert((unsigned long)(av->system_mem) <=
+         (unsigned long)(av->max_system_mem));
+
+  assert((unsigned long)(mp_.mmapped_mem) <=
+         (unsigned long)(mp_.max_mmapped_mem));
+
+#ifdef NO_THREADS
+  assert((unsigned long)(mp_.max_total_mem) >=
+         (unsigned long)(mp_.mmapped_mem) + (unsigned long)(av->system_mem));
+#endif
+}
+#endif
+
+
+/* ----------------- Support for debugging hooks -------------------- */
+#include "hooks.c"
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+  sysmalloc handles malloc cases requiring more memory from the system.
+  On entry, it is assumed that av->top does not have enough
+  space to service request for nb bytes, thus requiring that av->top
+  be extended or replaced.
+*/
+
+#if __STD_C
+static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av)
+#else
+static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av;
+#endif
+{
+  mchunkptr       old_top;        /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char*           old_end;        /* its end address */
+
+  long            size;           /* arg to first MORECORE or mmap call */
+  char*           brk;            /* return value from MORECORE */
+
+  long            correction;     /* arg to 2nd MORECORE call */
+  char*           snd_brk;        /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char*           aligned_brk;    /* aligned offset into brk */
+
+  mchunkptr       p;              /* the allocated/returned chunk */
+  mchunkptr       remainder;      /* remainder from allocation */
+  unsigned long   remainder_size; /* its size */
+
+  unsigned long   sum;            /* for updating stats */
+
+  size_t          pagemask  = mp_.pagesize - 1;
+
+
+#if HAVE_MMAP
+
+  /*
+    If have mmap, and the request size meets the mmap threshold, and
+    the system supports mmap, and there are few enough currently
+    allocated mmapped regions, try to directly map this request
+    rather than expanding top.
+  */
+
+  if ((unsigned long)(nb) >= (unsigned long)(mp_.mmap_threshold) &&
+      (mp_.n_mmaps < mp_.n_mmaps_max)) {
+
+    char* mm;             /* return value from mmap call*/
+
+    /*
+      Round up size to nearest page.  For mmapped chunks, the overhead
+      is one SIZE_SZ unit larger than for normal chunks, because there
+      is no following chunk whose prev_size field could be used.
+    */
+    size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
+
+    /* Don't try if size wraps around 0 */
+    if ((unsigned long)(size) > (unsigned long)(nb)) {
+
+      mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
+
+      if (mm != MAP_FAILED) {
+
+        /*
+          The offset to the start of the mmapped region is stored
+          in the prev_size field of the chunk. This allows us to adjust
+          returned start address to meet alignment requirements here
+          and in memalign(), and still be able to compute proper
+          address argument for later munmap in free() and realloc().
+        */
+
+        front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
+        if (front_misalign > 0) {
+          correction = MALLOC_ALIGNMENT - front_misalign;
+          p = (mchunkptr)(mm + correction);
+          p->prev_size = correction;
+          set_head(p, (size - correction) |IS_MMAPPED);
+        }
+        else {
+          p = (mchunkptr)mm;
+          set_head(p, size|IS_MMAPPED);
+        }
+
+        /* update statistics */
+
+        if (++mp_.n_mmaps > mp_.max_n_mmaps)
+          mp_.max_n_mmaps = mp_.n_mmaps;
+
+        sum = mp_.mmapped_mem += size;
+        if (sum > (unsigned long)(mp_.max_mmapped_mem))
+          mp_.max_mmapped_mem = sum;
+#ifdef NO_THREADS
+        sum += av->system_mem;
+        if (sum > (unsigned long)(mp_.max_total_mem))
+          mp_.max_total_mem = sum;
+#endif
+
+        check_chunk(av, p);
+
+        return chunk2mem(p);
+      }
+    }
+  }
+#endif
+
+  /* Record incoming configuration of top */
+
+  old_top  = av->top;
+  old_size = chunksize(old_top);
+  old_end  = (char*)(chunk_at_offset(old_top, old_size));
+
+  brk = snd_brk = (char*)(MORECORE_FAILURE);
+
+  /*
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+  */
+
+  assert((old_top == initial_top(av) && old_size == 0) ||
+         ((unsigned long) (old_size) >= MINSIZE &&
+          prev_inuse(old_top) &&
+	  ((unsigned long)old_end & pagemask) == 0));
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert((unsigned long)(old_size) < (unsigned long)(nb + MINSIZE));
+
+  /* Precondition: all fastbins are consolidated */
+  assert(!have_fastchunks(av));
+
+
+  if (av != &main_arena) {
+
+    heap_info *old_heap, *heap;
+    size_t old_heap_size;
+
+    /* First try to extend the current heap. */
+    old_heap = heap_for_ptr(old_top);
+    old_heap_size = old_heap->size;
+    if (grow_heap(old_heap, MINSIZE + nb - old_size) == 0) {
+      av->system_mem += old_heap->size - old_heap_size;
+      arena_mem += old_heap->size - old_heap_size;
+#if 0
+      if(mmapped_mem + arena_mem + sbrked_mem > max_total_mem)
+        max_total_mem = mmapped_mem + arena_mem + sbrked_mem;
+#endif
+      set_head(old_top, (((char *)old_heap + old_heap->size) - (char *)old_top)
+	       | PREV_INUSE);
+    }
+    else if ((heap = new_heap(nb + (MINSIZE + sizeof(*heap)), mp_.top_pad))) {
+      /* Use a newly allocated heap.  */
+      heap->ar_ptr = av;
+      heap->prev = old_heap;
+      av->system_mem += heap->size;
+      arena_mem += heap->size;
+#if 0
+      if((unsigned long)(mmapped_mem + arena_mem + sbrked_mem) > max_total_mem)
+	max_total_mem = mmapped_mem + arena_mem + sbrked_mem;
+#endif
+      /* Set up the new top.  */
+      top(av) = chunk_at_offset(heap, sizeof(*heap));
+      set_head(top(av), (heap->size - sizeof(*heap)) | PREV_INUSE);
+
+      /* Setup fencepost and free the old top chunk. */
+      /* The fencepost takes at least MINSIZE bytes, because it might
+	 become the top chunk again later.  Note that a footer is set
+	 up, too, although the chunk is marked in use. */
+      old_size -= MINSIZE;
+      set_head(chunk_at_offset(old_top, old_size + 2*SIZE_SZ), 0|PREV_INUSE);
+      if (old_size >= MINSIZE) {
+	set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE);
+	set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ));
+	set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA);
+	_int_free(av, chunk2mem(old_top));
+      } else {
+	set_head(old_top, (old_size + 2*SIZE_SZ)|PREV_INUSE);
+	set_foot(old_top, (old_size + 2*SIZE_SZ));
+      }
+    }
+
+  } else { /* av == main_arena */
+
+
+  /* Request enough space for nb + pad + overhead */
+
+  size = nb + mp_.top_pad + MINSIZE;
+
+  /*
+    If contiguous, we can subtract out existing space that we hope to
+    combine with new space. We add it back later only if
+    we don't actually get contiguous space.
+  */
+
+  if (contiguous(av))
+    size -= old_size;
+
+  /*
+    Round to a multiple of page size.
+    If MORECORE is not contiguous, this ensures that we only call it
+    with whole-page arguments.  And if MORECORE is contiguous and
+    this is not first time through, this preserves page-alignment of
+    previous calls. Otherwise, we correct to page-align below.
+  */
+
+  size = (size + pagemask) & ~pagemask;
+
+  /*
+    Don't try to call MORECORE if argument is so big as to appear
+    negative. Note that since mmap takes size_t arg, it may succeed
+    below even if we cannot call MORECORE.
+  */
+
+  if (size > 0)
+    brk = (char*)(MORECORE(size));
+
+  if (brk != (char*)(MORECORE_FAILURE)) {
+    /* Call the `morecore' hook if necessary.  */
+    if (__after_morecore_hook)
+      (*__after_morecore_hook) ();
+  } else {
+  /*
+    If have mmap, try using it as a backup when MORECORE fails or
+    cannot be used. This is worth doing on systems that have "holes" in
+    address space, so sbrk cannot extend to give contiguous space, but
+    space is available elsewhere.  Note that we ignore mmap max count
+    and threshold limits, since the space will not be used as a
+    segregated mmap region.
+  */
+
+#if HAVE_MMAP
+    /* Cannot merge with old top, so add its size back in */
+    if (contiguous(av))
+      size = (size + old_size + pagemask) & ~pagemask;
+
+    /* If we are relying on mmap as backup, then use larger units */
+    if ((unsigned long)(size) < (unsigned long)(MMAP_AS_MORECORE_SIZE))
+      size = MMAP_AS_MORECORE_SIZE;
+
+    /* Don't try if size wraps around 0 */
+    if ((unsigned long)(size) > (unsigned long)(nb)) {
+
+      char *mbrk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
+
+      if (mbrk != MAP_FAILED) {
+
+        /* We do not need, and cannot use, another sbrk call to find end */
+        brk = mbrk;
+        snd_brk = brk + size;
+
+        /*
+           Record that we no longer have a contiguous sbrk region.
+           After the first time mmap is used as backup, we do not
+           ever rely on contiguous space since this could incorrectly
+           bridge regions.
+        */
+        set_noncontiguous(av);
+      }
+    }
+#endif
+  }
+
+  if (brk != (char*)(MORECORE_FAILURE)) {
+    if (mp_.sbrk_base == 0)
+      mp_.sbrk_base = brk;
+    av->system_mem += size;
+
+    /*
+      If MORECORE extends previous space, we can likewise extend top size.
+    */
+
+    if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE))
+      set_head(old_top, (size + old_size) | PREV_INUSE);
+
+    else if (contiguous(av) && old_size && brk < old_end) {
+      /* Oops!  Someone else killed our space..  Can't touch anything.  */
+      assert(0);
+    }
+
+    /*
+      Otherwise, make adjustments:
+
+      * If the first time through or noncontiguous, we need to call sbrk
+        just to find out where the end of memory lies.
+
+      * We need to ensure that all returned chunks from malloc will meet
+        MALLOC_ALIGNMENT
+
+      * If there was an intervening foreign sbrk, we need to adjust sbrk
+        request size to account for fact that we will not be able to
+        combine new space with existing space in old_top.
+
+      * Almost all systems internally allocate whole pages at a time, in
+        which case we might as well use the whole last page of request.
+        So we allocate enough more memory to hit a page boundary now,
+        which in turn causes future contiguous calls to page-align.
+    */
+
+    else {
+      front_misalign = 0;
+      end_misalign = 0;
+      correction = 0;
+      aligned_brk = brk;
+
+      /* handle contiguous cases */
+      if (contiguous(av)) {
+
+	/* Count foreign sbrk as system_mem.  */
+	if (old_size)
+	  av->system_mem += brk - old_end;
+
+        /* Guarantee alignment of first new chunk made from this space */
+
+        front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+        if (front_misalign > 0) {
+
+          /*
+            Skip over some bytes to arrive at an aligned position.
+            We don't need to specially mark these wasted front bytes.
+            They will never be accessed anyway because
+            prev_inuse of av->top (and any chunk created from its start)
+            is always true after initialization.
+          */
+
+          correction = MALLOC_ALIGNMENT - front_misalign;
+          aligned_brk += correction;
+        }
+
+        /*
+          If this isn't adjacent to existing space, then we will not
+          be able to merge with old_top space, so must add to 2nd request.
+        */
+
+        correction += old_size;
+
+        /* Extend the end address to hit a page boundary */
+        end_misalign = (INTERNAL_SIZE_T)(brk + size + correction);
+        correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign;
+
+        assert(correction >= 0);
+        snd_brk = (char*)(MORECORE(correction));
+
+        /*
+          If can't allocate correction, try to at least find out current
+          brk.  It might be enough to proceed without failing.
+
+          Note that if second sbrk did NOT fail, we assume that space
+          is contiguous with first sbrk. This is a safe assumption unless
+          program is multithreaded but doesn't use locks and a foreign sbrk
+          occurred between our first and second calls.
+        */
+
+        if (snd_brk == (char*)(MORECORE_FAILURE)) {
+          correction = 0;
+          snd_brk = (char*)(MORECORE(0));
+        } else
+	  /* Call the `morecore' hook if necessary.  */
+	  if (__after_morecore_hook)
+	    (*__after_morecore_hook) ();
+      }
+
+      /* handle non-contiguous cases */
+      else {
+        /* MORECORE/mmap must correctly align */
+        assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0);
+
+        /* Find out current end of memory */
+        if (snd_brk == (char*)(MORECORE_FAILURE)) {
+          snd_brk = (char*)(MORECORE(0));
+        }
+      }
+
+      /* Adjust top based on results of second sbrk */
+      if (snd_brk != (char*)(MORECORE_FAILURE)) {
+        av->top = (mchunkptr)aligned_brk;
+        set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+        av->system_mem += correction;
+
+        /*
+          If not the first time through, we either have a
+          gap due to foreign sbrk or a non-contiguous region.  Insert a
+          double fencepost at old_top to prevent consolidation with space
+          we don't own. These fenceposts are artificial chunks that are
+          marked as inuse and are in any case too small to use.  We need
+          two to make sizes and alignments work out.
+        */
+
+        if (old_size != 0) {
+          /*
+             Shrink old_top to insert fenceposts, keeping size a
+             multiple of MALLOC_ALIGNMENT. We know there is at least
+             enough space in old_top to do this.
+          */
+          old_size = (old_size - 4*SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+          set_head(old_top, old_size | PREV_INUSE);
+
+          /*
+            Note that the following assignments completely overwrite
+            old_top when old_size was previously MINSIZE.  This is
+            intentional. We need the fencepost, even if old_top otherwise gets
+            lost.
+          */
+          chunk_at_offset(old_top, old_size            )->size =
+            (2*SIZE_SZ)|PREV_INUSE;
+
+          chunk_at_offset(old_top, old_size + 2*SIZE_SZ)->size =
+            (2*SIZE_SZ)|PREV_INUSE;
+
+          /* If possible, release the rest. */
+          if (old_size >= MINSIZE) {
+            _int_free(av, chunk2mem(old_top));
+          }
+
+        }
+      }
+    }
+
+    /* Update statistics */
+#ifdef NO_THREADS
+    sum = av->system_mem + mp_.mmapped_mem;
+    if (sum > (unsigned long)(mp_.max_total_mem))
+      mp_.max_total_mem = sum;
+#endif
+
+  }
+
+  } /* if (av !=  &main_arena) */
+
+  if ((unsigned long)av->system_mem > (unsigned long)(av->max_system_mem))
+    av->max_system_mem = av->system_mem;
+  check_malloc_state(av);
+
+  /* finally, do the allocation */
+  p = av->top;
+  size = chunksize(p);
+
+  /* check that one of the above allocation paths succeeded */
+  if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
+    remainder_size = size - nb;
+    remainder = chunk_at_offset(p, nb);
+    av->top = remainder;
+    set_head(p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
+    set_head(remainder, remainder_size | PREV_INUSE);
+    check_malloced_chunk(av, p, nb);
+    return chunk2mem(p);
+  }
+
+  /* catch all failure paths */
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+
+/*
+  sYSTRIm is an inverse of sorts to sYSMALLOc.  It gives memory back
+  to the system (via negative arguments to sbrk) if there is unused
+  memory at the `high' end of the malloc pool. It is called
+  automatically by free() when top space exceeds the trim
+  threshold. It is also called by the public malloc_trim routine.  It
+  returns 1 if it actually released any memory, else 0.
+*/
+
+#if __STD_C
+static int sYSTRIm(size_t pad, mstate av)
+#else
+static int sYSTRIm(pad, av) size_t pad; mstate av;
+#endif
+{
+  long  top_size;        /* Amount of top-most memory */
+  long  extra;           /* Amount to release */
+  long  released;        /* Amount actually released */
+  char* current_brk;     /* address returned by pre-check sbrk call */
+  char* new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesz;
+
+  pagesz = mp_.pagesize;
+  top_size = chunksize(av->top);
+
+  /* Release in pagesize units, keeping at least one page */
+  extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz;
+
+  if (extra > 0) {
+
+    /*
+      Only proceed if end of memory is where we last set it.
+      This avoids problems if there were foreign sbrk calls.
+    */
+    current_brk = (char*)(MORECORE(0));
+    if (current_brk == (char*)(av->top) + top_size) {
+
+      /*
+        Attempt to release memory. We ignore MORECORE return value,
+        and instead call again to find out where new end of memory is.
+        This avoids problems if first call releases less than we asked,
+        of if failure somehow altered brk value. (We could still
+        encounter problems if it altered brk in some very bad way,
+        but the only thing we can do is adjust anyway, which will cause
+        some downstream failure.)
+      */
+
+      MORECORE(-extra);
+      /* Call the `morecore' hook if necessary.  */
+      if (__after_morecore_hook)
+	(*__after_morecore_hook) ();
+      new_brk = (char*)(MORECORE(0));
+
+      if (new_brk != (char*)MORECORE_FAILURE) {
+        released = (long)(current_brk - new_brk);
+
+        if (released != 0) {
+          /* Success. Adjust top. */
+          av->system_mem -= released;
+          set_head(av->top, (top_size - released) | PREV_INUSE);
+          check_malloc_state(av);
+          return 1;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+#ifdef HAVE_MMAP
+
+static void
+internal_function
+#if __STD_C
+munmap_chunk(mchunkptr p)
+#else
+munmap_chunk(p) mchunkptr p;
+#endif
+{
+  INTERNAL_SIZE_T size = chunksize(p);
+  int ret;
+
+  assert (chunk_is_mmapped(p));
+#if 0
+  assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem));
+  assert((mp_.n_mmaps > 0));
+#endif
+  assert(((p->prev_size + size) & (mp_.pagesize-1)) == 0);
+
+  mp_.n_mmaps--;
+  mp_.mmapped_mem -= (size + p->prev_size);
+
+  ret = munmap((char *)p - p->prev_size, size + p->prev_size);
+
+  /* munmap returns non-zero on failure */
+  assert(ret == 0);
+}
+
+#if HAVE_MREMAP
+
+static mchunkptr
+internal_function
+#if __STD_C
+mremap_chunk(mchunkptr p, size_t new_size)
+#else
+mremap_chunk(p, new_size) mchunkptr p; size_t new_size;
+#endif
+{
+  size_t page_mask = mp_.pagesize - 1;
+  INTERNAL_SIZE_T offset = p->prev_size;
+  INTERNAL_SIZE_T size = chunksize(p);
+  char *cp;
+
+  assert (chunk_is_mmapped(p));
+#if 0
+  assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem));
+  assert((mp_.n_mmaps > 0));
+#endif
+  assert(((size + offset) & (mp_.pagesize-1)) == 0);
+
+  /* Note the extra SIZE_SZ overhead as in mmap_chunk(). */
+  new_size = (new_size + offset + SIZE_SZ + page_mask) & ~page_mask;
+
+  cp = (char *)mremap((char *)p - offset, size + offset, new_size,
+                      MREMAP_MAYMOVE);
+
+  if (cp == MAP_FAILED) return 0;
+
+  p = (mchunkptr)(cp + offset);
+
+  assert(aligned_OK(chunk2mem(p)));
+
+  assert((p->prev_size == offset));
+  set_head(p, (new_size - offset)|IS_MMAPPED);
+
+  mp_.mmapped_mem -= size + offset;
+  mp_.mmapped_mem += new_size;
+  if ((unsigned long)mp_.mmapped_mem > (unsigned long)mp_.max_mmapped_mem)
+    mp_.max_mmapped_mem = mp_.mmapped_mem;
+#ifdef NO_THREADS
+  if ((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) >
+      mp_.max_total_mem)
+    mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem;
+#endif
+  return p;
+}
+
+#endif /* HAVE_MREMAP */
+
+#endif /* HAVE_MMAP */
+
+/*------------------------ Public wrappers. --------------------------------*/
+
+Void_t*
+public_mALLOc(size_t bytes)
+{
+  mstate ar_ptr;
+  Void_t *victim;
+
+  __malloc_ptr_t (*hook) __MALLOC_P ((size_t, __const __malloc_ptr_t)) =
+    __malloc_hook;
+  if (hook != NULL)
+    return (*hook)(bytes, RETURN_ADDRESS (0));
+
+  arena_get(ar_ptr, bytes);
+  if(!ar_ptr)
+    return 0;
+  victim = _int_malloc(ar_ptr, bytes);
+  if(!victim) {
+    /* Maybe the failure is due to running out of mmapped areas. */
+    if(ar_ptr != &main_arena) {
+      (void)mutex_unlock(&ar_ptr->mutex);
+      (void)mutex_lock(&main_arena.mutex);
+      victim = _int_malloc(&main_arena, bytes);
+      (void)mutex_unlock(&main_arena.mutex);
+    } else {
+#if USE_ARENAS
+      /* ... or sbrk() has failed and there is still a chance to mmap() */
+      ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes);
+      (void)mutex_unlock(&main_arena.mutex);
+      if(ar_ptr) {
+        victim = _int_malloc(ar_ptr, bytes);
+        (void)mutex_unlock(&ar_ptr->mutex);
+      }
+#endif
+    }
+  } else
+    (void)mutex_unlock(&ar_ptr->mutex);
+  assert(!victim || chunk_is_mmapped(mem2chunk(victim)) ||
+	 ar_ptr == arena_for_chunk(mem2chunk(victim)));
+  return victim;
+}
+#ifdef libc_hidden_def
+libc_hidden_def(public_mALLOc)
+#endif
+
+void
+public_fREe(Void_t* mem)
+{
+  mstate ar_ptr;
+  mchunkptr p;                          /* chunk corresponding to mem */
+
+  void (*hook) __MALLOC_P ((__malloc_ptr_t, __const __malloc_ptr_t)) =
+    __free_hook;
+  if (hook != NULL) {
+    (*hook)(mem, RETURN_ADDRESS (0));
+    return;
+  }
+
+  if (mem == 0)                              /* free(0) has no effect */
+    return;
+
+  p = mem2chunk(mem);
+
+#if HAVE_MMAP
+  if (chunk_is_mmapped(p))                       /* release mmapped memory. */
+  {
+    munmap_chunk(p);
+    return;
+  }
+#endif
+
+  ar_ptr = arena_for_chunk(p);
+#if THREAD_STATS
+  if(!mutex_trylock(&ar_ptr->mutex))
+    ++(ar_ptr->stat_lock_direct);
+  else {
+    (void)mutex_lock(&ar_ptr->mutex);
+    ++(ar_ptr->stat_lock_wait);
+  }
+#else
+  (void)mutex_lock(&ar_ptr->mutex);
+#endif
+  _int_free(ar_ptr, mem);
+  (void)mutex_unlock(&ar_ptr->mutex);
+}
+#ifdef libc_hidden_def
+libc_hidden_def (public_fREe)
+#endif
+
+Void_t*
+public_rEALLOc(Void_t* oldmem, size_t bytes)
+{
+  mstate ar_ptr;
+  INTERNAL_SIZE_T    nb;      /* padded request size */
+
+  mchunkptr oldp;             /* chunk corresponding to oldmem */
+  INTERNAL_SIZE_T    oldsize; /* its size */
+
+  Void_t* newp;             /* chunk to return */
+
+  __malloc_ptr_t (*hook) __MALLOC_P ((__malloc_ptr_t, size_t,
+				      __const __malloc_ptr_t)) =
+    __realloc_hook;
+  if (hook != NULL)
+    return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0 && oldmem != NULL) { public_fREe(oldmem); return 0; }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0) return public_mALLOc(bytes);
+
+  oldp    = mem2chunk(oldmem);
+  oldsize = chunksize(oldp);
+
+  checked_request2size(bytes, nb);
+
+#if HAVE_MMAP
+  if (chunk_is_mmapped(oldp))
+  {
+    Void_t* newmem;
+
+#if HAVE_MREMAP
+    newp = mremap_chunk(oldp, nb);
+    if(newp) return chunk2mem(newp);
+#endif
+    /* Note the extra SIZE_SZ overhead. */
+    if(oldsize - SIZE_SZ >= nb) return oldmem; /* do nothing */
+    /* Must alloc, copy, free. */
+    newmem = public_mALLOc(bytes);
+    if (newmem == 0) return 0; /* propagate failure */
+    MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ);
+    munmap_chunk(oldp);
+    return newmem;
+  }
+#endif
+
+  ar_ptr = arena_for_chunk(oldp);
+#if THREAD_STATS
+  if(!mutex_trylock(&ar_ptr->mutex))
+    ++(ar_ptr->stat_lock_direct);
+  else {
+    (void)mutex_lock(&ar_ptr->mutex);
+    ++(ar_ptr->stat_lock_wait);
+  }
+#else
+  (void)mutex_lock(&ar_ptr->mutex);
+#endif
+
+#ifndef NO_THREADS
+  /* As in malloc(), remember this arena for the next allocation. */
+  tsd_setspecific(arena_key, (Void_t *)ar_ptr);
+#endif
+
+  newp = _int_realloc(ar_ptr, oldmem, bytes);
+
+  (void)mutex_unlock(&ar_ptr->mutex);
+  assert(!newp || chunk_is_mmapped(mem2chunk(newp)) ||
+	 ar_ptr == arena_for_chunk(mem2chunk(newp)));
+  return newp;
+}
+#ifdef libc_hidden_def
+libc_hidden_def (public_rEALLOc)
+#endif
+
+Void_t*
+public_mEMALIGn(size_t alignment, size_t bytes)
+{
+  mstate ar_ptr;
+  Void_t *p;
+
+  __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, size_t,
+					__const __malloc_ptr_t)) =
+    __memalign_hook;
+  if (hook != NULL)
+    return (*hook)(alignment, bytes, RETURN_ADDRESS (0));
+
+  /* If need less alignment than we give anyway, just relay to malloc */
+  if (alignment <= MALLOC_ALIGNMENT) return public_mALLOc(bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+  if (alignment <  MINSIZE) alignment = MINSIZE;
+
+  arena_get(ar_ptr, bytes + alignment + MINSIZE);
+  if(!ar_ptr)
+    return 0;
+  p = _int_memalign(ar_ptr, alignment, bytes);
+  (void)mutex_unlock(&ar_ptr->mutex);
+  if(!p) {
+    /* Maybe the failure is due to running out of mmapped areas. */
+    if(ar_ptr != &main_arena) {
+      (void)mutex_lock(&main_arena.mutex);
+      p = _int_memalign(&main_arena, alignment, bytes);
+      (void)mutex_unlock(&main_arena.mutex);
+    } else {
+#if USE_ARENAS
+      /* ... or sbrk() has failed and there is still a chance to mmap() */
+      ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes);
+      if(ar_ptr) {
+        p = _int_memalign(ar_ptr, alignment, bytes);
+        (void)mutex_unlock(&ar_ptr->mutex);
+      }
+#endif
+    }
+  }
+  assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
+	 ar_ptr == arena_for_chunk(mem2chunk(p)));
+  return p;
+}
+#ifdef libc_hidden_def
+libc_hidden_def (public_mEMALIGn)
+#endif
+
+Void_t*
+public_vALLOc(size_t bytes)
+{
+  mstate ar_ptr;
+  Void_t *p;
+
+  if(__malloc_initialized < 0)
+    ptmalloc_init ();
+  arena_get(ar_ptr, bytes + mp_.pagesize + MINSIZE);
+  if(!ar_ptr)
+    return 0;
+  p = _int_valloc(ar_ptr, bytes);
+  (void)mutex_unlock(&ar_ptr->mutex);
+  return p;
+}
+
+Void_t*
+public_pVALLOc(size_t bytes)
+{
+  mstate ar_ptr;
+  Void_t *p;
+
+  if(__malloc_initialized < 0)
+    ptmalloc_init ();
+  arena_get(ar_ptr, bytes + 2*mp_.pagesize + MINSIZE);
+  p = _int_pvalloc(ar_ptr, bytes);
+  (void)mutex_unlock(&ar_ptr->mutex);
+  return p;
+}
+
+Void_t*
+public_cALLOc(size_t n, size_t elem_size)
+{
+  mstate av;
+  mchunkptr oldtop, p;
+  INTERNAL_SIZE_T bytes, sz, csz, oldtopsize;
+  Void_t* mem;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T* d;
+  __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, __const __malloc_ptr_t)) =
+    __malloc_hook;
+
+  /* size_t is unsigned so the behavior on overflow is defined.  */
+  bytes = n * elem_size;
+#define HALF_INTERNAL_SIZE_T \
+  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
+  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0)) {
+    if (elem_size != 0 && bytes / elem_size != n) {
+      MALLOC_FAILURE_ACTION;
+      return 0;
+    }
+  }
+
+  if (hook != NULL) {
+    sz = bytes;
+    mem = (*hook)(sz, RETURN_ADDRESS (0));
+    if(mem == 0)
+      return 0;
+#ifdef HAVE_MEMCPY
+    return memset(mem, 0, sz);
+#else
+    while(sz > 0) ((char*)mem)[--sz] = 0; /* rather inefficient */
+    return mem;
+#endif
+  }
+
+  sz = bytes;
+
+  arena_get(av, sz);
+  if(!av)
+    return 0;
+
+  /* Check if we hand out the top chunk, in which case there may be no
+     need to clear. */
+#if MORECORE_CLEARS
+  oldtop = top(av);
+  oldtopsize = chunksize(top(av));
+#if MORECORE_CLEARS < 2
+  /* Only newly allocated memory is guaranteed to be cleared.  */
+  if (av == &main_arena &&
+      oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *)oldtop)
+    oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *)oldtop);
+#endif
+#endif
+  mem = _int_malloc(av, sz);
+
+  /* Only clearing follows, so we can unlock early. */
+  (void)mutex_unlock(&av->mutex);
+
+  assert(!mem || chunk_is_mmapped(mem2chunk(mem)) ||
+	 av == arena_for_chunk(mem2chunk(mem)));
+
+  if (mem == 0) {
+    /* Maybe the failure is due to running out of mmapped areas. */
+    if(av != &main_arena) {
+      (void)mutex_lock(&main_arena.mutex);
+      mem = _int_malloc(&main_arena, sz);
+      (void)mutex_unlock(&main_arena.mutex);
+    } else {
+#if USE_ARENAS
+      /* ... or sbrk() has failed and there is still a chance to mmap() */
+      (void)mutex_lock(&main_arena.mutex);
+      av = arena_get2(av->next ? av : 0, sz);
+      (void)mutex_unlock(&main_arena.mutex);
+      if(av) {
+        mem = _int_malloc(av, sz);
+        (void)mutex_unlock(&av->mutex);
+      }
+#endif
+    }
+    if (mem == 0) return 0;
+  }
+  p = mem2chunk(mem);
+
+  /* Two optional cases in which clearing not necessary */
+#if HAVE_MMAP
+  if (chunk_is_mmapped(p))
+    return mem;
+#endif
+
+  csz = chunksize(p);
+
+#if MORECORE_CLEARS
+  if (p == oldtop && csz > oldtopsize) {
+    /* clear only the bytes from non-freshly-sbrked memory */
+    csz = oldtopsize;
+  }
+#endif
+
+  /* Unroll clear of <= 36 bytes (72 if 8byte sizes).  We know that
+     contents have an odd number of INTERNAL_SIZE_T-sized words;
+     minimally 3.  */
+  d = (INTERNAL_SIZE_T*)mem;
+  clearsize = csz - SIZE_SZ;
+  nclears = clearsize / sizeof(INTERNAL_SIZE_T);
+  assert(nclears >= 3);
+
+  if (nclears > 9)
+    MALLOC_ZERO(d, clearsize);
+
+  else {
+    *(d+0) = 0;
+    *(d+1) = 0;
+    *(d+2) = 0;
+    if (nclears > 4) {
+      *(d+3) = 0;
+      *(d+4) = 0;
+      if (nclears > 6) {
+	*(d+5) = 0;
+	*(d+6) = 0;
+	if (nclears > 8) {
+	  *(d+7) = 0;
+	  *(d+8) = 0;
+	}
+      }
+    }
+  }
+
+  return mem;
+}
+
+Void_t**
+public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks)
+{
+  mstate ar_ptr;
+  Void_t** m;
+
+  arena_get(ar_ptr, n*elem_size);
+  if(!ar_ptr)
+    return 0;
+
+  m = _int_icalloc(ar_ptr, n, elem_size, chunks);
+  (void)mutex_unlock(&ar_ptr->mutex);
+  return m;
+}
+
+Void_t**
+public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks)
+{
+  mstate ar_ptr;
+  Void_t** m;
+
+  arena_get(ar_ptr, 0);
+  if(!ar_ptr)
+    return 0;
+
+  m = _int_icomalloc(ar_ptr, n, sizes, chunks);
+  (void)mutex_unlock(&ar_ptr->mutex);
+  return m;
+}
+
+#ifndef _LIBC
+
+void
+public_cFREe(Void_t* m)
+{
+  public_fREe(m);
+}
+
+#endif /* _LIBC */
+
+int
+public_mTRIm(size_t s)
+{
+  int result;
+
+  (void)mutex_lock(&main_arena.mutex);
+  result = mTRIm(s);
+  (void)mutex_unlock(&main_arena.mutex);
+  return result;
+}
+
+size_t
+public_mUSABLe(Void_t* m)
+{
+  size_t result;
+
+  result = mUSABLe(m);
+  return result;
+}
+
+/* This exists mainly for backward compatibility.  Calling
+   _int_get_arena_info() directly is more useful.  */
+struct mallinfo
+public_mALLINFo()
+{
+  struct malloc_arena_info mai;
+  struct mallinfo m;
+  size_t avail;
+
+  if(__malloc_initialized < 0)
+    ptmalloc_init ();
+  _int_get_arena_info(&main_arena, &mai);
+  /* Account for top */
+  avail = mai.fastavail + mai.binavail + mai.top_size;
+  m.smblks = mai.nfastblocks;
+  m.ordblks = mai.nbinblocks + 1;
+  m.fordblks = avail;
+  m.uordblks = mai.system_mem - avail;
+  m.arena = mai.system_mem;
+  m.hblks = mp_.n_mmaps;
+  m.hblkhd = mp_.mmapped_mem;
+  m.fsmblks = mai.fastavail;
+  m.keepcost = mai.top_size;
+  m.usmblks = mp_.max_total_mem;
+  return m;
+}
+
+int
+public_mALLOPt(int p, int v)
+{
+  int result;
+  result = mALLOPt(p, v);
+  return result;
+}
+
+/*
+  ------------------------------ malloc ------------------------------
+*/
+
+Void_t*
+_int_malloc(mstate av, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int    idx;              /* associated bin index */
+  mbinptr         bin;              /* associated bin */
+  mfastbinptr*    fb;               /* associated fastbin */
+
+  mchunkptr       victim;           /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int             victim_index;     /* its bin index */
+
+  mchunkptr       remainder;        /* remainder from a split */
+  unsigned long   remainder_size;   /* its size */
+
+  unsigned int    block;            /* bit map traverser */
+  unsigned int    bit;              /* bit map traverser */
+  unsigned int    map;              /* current word of binmap */
+
+  mchunkptr       fwd;              /* misc temp for linking */
+  mchunkptr       bck;              /* misc temp for linking */
+
+  /*
+    Convert request size to internal form by adding SIZE_SZ bytes
+    overhead plus possibly more to obtain necessary alignment and/or
+    to obtain a size of at least MINSIZE, the smallest allocatable
+    size. Also, checked_request2size traps (returning 0) request sizes
+    that are so large that they wrap around zero when padded and
+    aligned.
+  */
+
+  checked_request2size(bytes, nb);
+
+  /*
+    If the size qualifies as a fastbin, first check corresponding bin.
+    This code is safe to execute even if av is not yet initialized, so we
+    can try it without checking, which saves some time on this fast path.
+  */
+
+  if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) {
+    fb = &(av->fastbins[(fastbin_index(nb))]);
+    if ( (victim = *fb) != 0) {
+      *fb = victim->fd;
+      check_remalloced_chunk(av, victim, nb);
+      return chunk2mem(victim);
+    }
+  }
+
+  /*
+    If a small request, check regular bin.  Since these "smallbins"
+    hold one size each, no searching within bins is necessary.
+    (For a large request, we need to wait until unsorted chunks are
+    processed to find best fit. But for small ones, fits are exact
+    anyway, so we can check now, which is faster.)
+  */
+
+  if (in_smallbin_range(nb)) {
+    idx = smallbin_index(nb);
+    bin = bin_at(av,idx);
+
+    if ( (victim = last(bin)) != bin) {
+      if (victim == 0) /* initialization check */
+        malloc_consolidate(av);
+      else {
+        bck = victim->bk;
+        set_inuse_bit_at_offset(victim, nb);
+        bin->bk = bck;
+        bck->fd = bin;
+
+        if (av != &main_arena)
+	  victim->size |= NON_MAIN_ARENA;
+        check_malloced_chunk(av, victim, nb);
+        return chunk2mem(victim);
+      }
+    }
+  }
+
+  /*
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+  */
+
+  else {
+    idx = largebin_index(nb);
+    if (have_fastchunks(av))
+      malloc_consolidate(av);
+  }
+
+  /*
+    Process recently freed or remaindered chunks, taking one only if
+    it is exact fit, or, if this a small request, the chunk is remainder from
+    the most recent non-exact fit.  Place other traversed chunks in
+    bins.  Note that this step is the only place in any routine where
+    chunks are placed in bins.
+
+    The outer loop here is needed because we might not realize until
+    near the end of malloc that we should have consolidated, so must
+    do so and retry. This happens at most once, and only when we would
+    otherwise need to expand memory to service a "small" request.
+  */
+
+  for(;;) {
+
+    while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
+      bck = victim->bk;
+      size = chunksize(victim);
+
+      /*
+         If a small request, try to use last remainder if it is the
+         only chunk in unsorted bin.  This helps promote locality for
+         runs of consecutive small requests. This is the only
+         exception to best-fit, and applies only when there is
+         no exact fit for a small chunk.
+      */
+
+      if (in_smallbin_range(nb) &&
+          bck == unsorted_chunks(av) &&
+          victim == av->last_remainder &&
+          (unsigned long)(size) > (unsigned long)(nb + MINSIZE)) {
+
+        /* split and reattach remainder */
+        remainder_size = size - nb;
+        remainder = chunk_at_offset(victim, nb);
+        unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+        av->last_remainder = remainder;
+        remainder->bk = remainder->fd = unsorted_chunks(av);
+
+        set_head(victim, nb | PREV_INUSE |
+		 (av != &main_arena ? NON_MAIN_ARENA : 0));
+        set_head(remainder, remainder_size | PREV_INUSE);
+        set_foot(remainder, remainder_size);
+
+        check_malloced_chunk(av, victim, nb);
+        return chunk2mem(victim);
+      }
+
+      /* remove from unsorted list */
+      unsorted_chunks(av)->bk = bck;
+      bck->fd = unsorted_chunks(av);
+
+      /* Take now instead of binning if exact fit */
+
+      if (size == nb) {
+        set_inuse_bit_at_offset(victim, size);
+	if (av != &main_arena)
+	  victim->size |= NON_MAIN_ARENA;
+        check_malloced_chunk(av, victim, nb);
+        return chunk2mem(victim);
+      }
+
+      /* place chunk in bin */
+
+      if (in_smallbin_range(size)) {
+        victim_index = smallbin_index(size);
+        bck = bin_at(av, victim_index);
+        fwd = bck->fd;
+      }
+      else {
+        victim_index = largebin_index(size);
+        bck = bin_at(av, victim_index);
+        fwd = bck->fd;
+
+        if (fwd != bck) {
+          /* if smaller than smallest, place first */
+	  assert((bck->bk->size & NON_MAIN_ARENA) == 0);
+          if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) {
+            fwd = bck;
+            bck = bck->bk;
+          }
+          else if ((unsigned long)(size) >= 
+                   (unsigned long)(FIRST_SORTED_BIN_SIZE)) {
+
+            /* maintain large bins in sorted order */
+            size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */
+	    assert((fwd->size & NON_MAIN_ARENA) == 0);
+            while ((unsigned long)(size) < (unsigned long)(fwd->size)) {
+              fwd = fwd->fd;
+	      assert((fwd->size & NON_MAIN_ARENA) == 0);
+	    }
+            bck = fwd->bk;
+          }
+        }
+      }
+
+      mark_bin(av, victim_index);
+      victim->bk = bck;
+      victim->fd = fwd;
+      fwd->bk = victim;
+      bck->fd = victim;
+    }
+
+    /*
+      If a large request, scan through the chunks of current bin in
+      sorted order to find smallest that fits.  This is the only step
+      where an unbounded number of chunks might be scanned without doing
+      anything useful with them. However the lists tend to be short.
+    */
+
+    if (!in_smallbin_range(nb)) {
+      bin = bin_at(av, idx);
+
+      for (victim = last(bin); victim != bin; victim = victim->bk) {
+	size = chunksize(victim);
+
+	if ((unsigned long)(size) >= (unsigned long)(nb)) {
+	  remainder_size = size - nb;
+	  unlink(victim, bck, fwd);
+
+	  /* Exhaust */
+	  if (remainder_size < MINSIZE)  {
+	    set_inuse_bit_at_offset(victim, size);
+	    if (av != &main_arena)
+	      victim->size |= NON_MAIN_ARENA;
+	    check_malloced_chunk(av, victim, nb);
+	    return chunk2mem(victim);
+	  }
+	  /* Split */
+	  else {
+	    remainder = chunk_at_offset(victim, nb);
+	    unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+	    remainder->bk = remainder->fd = unsorted_chunks(av);
+	    set_head(victim, nb | PREV_INUSE |
+		     (av != &main_arena ? NON_MAIN_ARENA : 0));
+	    set_head(remainder, remainder_size | PREV_INUSE);
+	    set_foot(remainder, remainder_size);
+	    check_malloced_chunk(av, victim, nb);
+	    return chunk2mem(victim);
+	  }
+	}
+      }
+    }
+
+    /*
+      Search for a chunk by scanning bins, starting with next largest
+      bin. This search is strictly by best-fit; i.e., the smallest
+      (with ties going to approximately the least recently used) chunk
+      that fits is selected.
+
+      The bitmap avoids needing to check that most blocks are nonempty.
+      The particular case of skipping all bins during warm-up phases
+      when no chunks have been returned yet is faster than it might look.
+    */
+
+    ++idx;
+    bin = bin_at(av,idx);
+    block = idx2block(idx);
+    map = av->binmap[block];
+    bit = idx2bit(idx);
+
+    for (;;) {
+
+      /* Skip rest of block if there are no more set bits in this block.  */
+      if (bit > map || bit == 0) {
+        do {
+          if (++block >= BINMAPSIZE)  /* out of bins */
+            goto use_top;
+        } while ( (map = av->binmap[block]) == 0);
+
+        bin = bin_at(av, (block << BINMAPSHIFT));
+        bit = 1;
+      }
+
+      /* Advance to bin with set bit. There must be one. */
+      while ((bit & map) == 0) {
+        bin = next_bin(bin);
+        bit <<= 1;
+        assert(bit != 0);
+      }
+
+      /* Inspect the bin. It is likely to be non-empty */
+      victim = last(bin);
+
+      /*  If a false alarm (empty bin), clear the bit. */
+      if (victim == bin) {
+        av->binmap[block] = map &= ~bit; /* Write through */
+        bin = next_bin(bin);
+        bit <<= 1;
+      }
+
+      else {
+        size = chunksize(victim);
+
+        /*  We know the first chunk in this bin is big enough to use. */
+        assert((unsigned long)(size) >= (unsigned long)(nb));
+
+        remainder_size = size - nb;
+
+        /* unlink */
+        bck = victim->bk;
+        bin->bk = bck;
+        bck->fd = bin;
+
+        /* Exhaust */
+        if (remainder_size < MINSIZE) {
+          set_inuse_bit_at_offset(victim, size);
+	  if (av != &main_arena)
+	    victim->size |= NON_MAIN_ARENA;
+          check_malloced_chunk(av, victim, nb);
+          return chunk2mem(victim);
+        }
+
+        /* Split */
+        else {
+          remainder = chunk_at_offset(victim, nb);
+
+          unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+          remainder->bk = remainder->fd = unsorted_chunks(av);
+          /* advertise as last remainder */
+          if (in_smallbin_range(nb))
+            av->last_remainder = remainder;
+
+          set_head(victim, nb | PREV_INUSE |
+		   (av != &main_arena ? NON_MAIN_ARENA : 0));
+          set_head(remainder, remainder_size | PREV_INUSE);
+          set_foot(remainder, remainder_size);
+          check_malloced_chunk(av, victim, nb);
+          return chunk2mem(victim);
+        }
+      }
+    }
+
+  use_top:
+    /*
+      If large enough, split off the chunk bordering the end of memory
+      (held in av->top). Note that this is in accord with the best-fit
+      search rule.  In effect, av->top is treated as larger (and thus
+      less well fitting) than any other available chunk since it can
+      be extended to be as large as necessary (up to system
+      limitations).
+
+      We require that av->top always exists (i.e., has size >=
+      MINSIZE) after initialization, so if it would otherwise be
+      exhuasted by current request, it is replenished. (The main
+      reason for ensuring it exists is that we may need MINSIZE space
+      to put in fenceposts in sysmalloc.)
+    */
+
+    victim = av->top;
+    size = chunksize(victim);
+
+    if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset(victim, nb);
+      av->top = remainder;
+      set_head(victim, nb | PREV_INUSE |
+	       (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head(remainder, remainder_size | PREV_INUSE);
+
+      check_malloced_chunk(av, victim, nb);
+      return chunk2mem(victim);
+    }
+
+    /*
+      If there is space available in fastbins, consolidate and retry,
+      to possibly avoid expanding memory. This can occur only if nb is
+      in smallbin range so we didn't consolidate upon entry.
+    */
+
+    else if (have_fastchunks(av)) {
+      assert(in_smallbin_range(nb));
+      malloc_consolidate(av);
+      idx = smallbin_index(nb); /* restore original bin index */
+    }
+
+    /*
+       Otherwise, relay to handle system-dependent cases
+    */
+    else
+      return sYSMALLOc(nb, av);
+  }
+}
+
+/*
+  ------------------------------ free ------------------------------
+*/
+
+void
+_int_free(mstate av, Void_t* mem)
+{
+  mchunkptr       p;           /* chunk corresponding to mem */
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr*    fb;          /* associated fastbin */
+  mchunkptr       nextchunk;   /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  int             nextinuse;   /* true if nextchunk is used */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  mchunkptr       bck;         /* misc temp for linking */
+  mchunkptr       fwd;         /* misc temp for linking */
+
+
+  /* free(0) has no effect */
+  if (mem != 0) {
+    p = mem2chunk(mem);
+    size = chunksize(p);
+
+    check_inuse_chunk(av, p);
+
+    /*
+      If eligible, place chunk on a fastbin so it can be found
+      and used quickly in malloc.
+    */
+
+    if ((unsigned long)(size) <= (unsigned long)(av->max_fast)
+
+#if TRIM_FASTBINS
+        /*
+           If TRIM_FASTBINS set, don't place chunks
+           bordering top into fastbins
+        */
+        && (chunk_at_offset(p, size) != av->top)
+#endif
+        ) {
+
+      set_fastchunks(av);
+      fb = &(av->fastbins[fastbin_index(size)]);
+      p->fd = *fb;
+      *fb = p;
+    }
+
+    /*
+       Consolidate other non-mmapped chunks as they arrive.
+    */
+
+    else if (!chunk_is_mmapped(p)) {
+      nextchunk = chunk_at_offset(p, size);
+      nextsize = chunksize(nextchunk);
+      assert(nextsize > 0);
+
+      /* consolidate backward */
+      if (!prev_inuse(p)) {
+        prevsize = p->prev_size;
+        size += prevsize;
+        p = chunk_at_offset(p, -((long) prevsize));
+        unlink(p, bck, fwd);
+      }
+
+      if (nextchunk != av->top) {
+        /* get and clear inuse bit */
+        nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+        /* consolidate forward */
+        if (!nextinuse) {
+          unlink(nextchunk, bck, fwd);
+          size += nextsize;
+        } else
+	  clear_inuse_bit_at_offset(nextchunk, 0);
+
+        /*
+          Place the chunk in unsorted chunk list. Chunks are
+          not placed into regular bins until after they have
+          been given one chance to be used in malloc.
+        */
+
+        bck = unsorted_chunks(av);
+        fwd = bck->fd;
+        p->bk = bck;
+        p->fd = fwd;
+        bck->fd = p;
+        fwd->bk = p;
+
+        set_head(p, size | PREV_INUSE);
+        set_foot(p, size);
+
+        check_free_chunk(av, p);
+      }
+
+      /*
+         If the chunk borders the current high end of memory,
+         consolidate into top
+      */
+
+      else {
+        size += nextsize;
+        set_head(p, size | PREV_INUSE);
+        av->top = p;
+        check_chunk(av, p);
+      }
+
+      /*
+        If freeing a large space, consolidate possibly-surrounding
+        chunks. Then, if the total unused topmost memory exceeds trim
+        threshold, ask malloc_trim to reduce top.
+
+        Unless max_fast is 0, we don't know if there are fastbins
+        bordering top, so we cannot tell for sure whether threshold
+        has been reached unless fastbins are consolidated.  But we
+        don't want to consolidate on each free.  As a compromise,
+        consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+        is reached.
+      */
+
+      if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+        if (have_fastchunks(av))
+          malloc_consolidate(av);
+
+	if (av == &main_arena) {
+#ifndef MORECORE_CANNOT_TRIM
+	  if ((unsigned long)(chunksize(av->top)) >=
+	      (unsigned long)(mp_.trim_threshold))
+	    sYSTRIm(mp_.top_pad, av);
+#endif
+	} else {
+	  /* Always try heap_trim(), even if the top chunk is not
+             large, because the corresponding heap might go away.  */
+	  heap_info *heap = heap_for_ptr(top(av));
+
+	  assert(heap->ar_ptr == av);
+	  heap_trim(heap, mp_.top_pad);
+	}
+      }
+
+    }
+    /*
+      If the chunk was allocated via mmap, release via munmap(). Note
+      that if HAVE_MMAP is false but chunk_is_mmapped is true, then
+      user must have overwritten memory. There's nothing we can do to
+      catch this error unless MALLOC_DEBUG is set, in which case
+      check_inuse_chunk (above) will have triggered error.
+    */
+
+    else {
+#if HAVE_MMAP
+      int ret;
+      INTERNAL_SIZE_T offset = p->prev_size;
+      mp_.n_mmaps--;
+      mp_.mmapped_mem -= (size + offset);
+      ret = munmap((char*)p - offset, size + offset);
+      /* munmap returns non-zero on failure */
+      assert(ret == 0);
+#endif
+    }
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+#if __STD_C
+static void malloc_consolidate(mstate av)
+#else
+static void malloc_consolidate(av) mstate av;
+#endif
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  mchunkptr       p;                  /* current chunk being consolidated */
+  mchunkptr       nextp;              /* next chunk to consolidate */
+  mchunkptr       unsorted_bin;       /* bin header */
+  mchunkptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  mchunkptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  int             nextinuse;
+  mchunkptr       bck;
+  mchunkptr       fwd;
+
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+
+  if (av->max_fast != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+
+    maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
+    fb = &(av->fastbins[0]);
+    do {
+      if ( (p = *fb) != 0) {
+        *fb = 0;
+
+        do {
+          check_inuse_chunk(av, p);
+          nextp = p->fd;
+
+          /* Slightly streamlined version of consolidation code in free() */
+          size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+          nextchunk = chunk_at_offset(p, size);
+          nextsize = chunksize(nextchunk);
+
+          if (!prev_inuse(p)) {
+            prevsize = p->prev_size;
+            size += prevsize;
+            p = chunk_at_offset(p, -((long) prevsize));
+            unlink(p, bck, fwd);
+          }
+
+          if (nextchunk != av->top) {
+            nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+
+            if (!nextinuse) {
+              size += nextsize;
+              unlink(nextchunk, bck, fwd);
+            } else
+	      clear_inuse_bit_at_offset(nextchunk, 0);
+
+            first_unsorted = unsorted_bin->fd;
+            unsorted_bin->fd = p;
+            first_unsorted->bk = p;
+
+            set_head(p, size | PREV_INUSE);
+            p->bk = unsorted_bin;
+            p->fd = first_unsorted;
+            set_foot(p, size);
+          }
+
+          else {
+            size += nextsize;
+            set_head(p, size | PREV_INUSE);
+            av->top = p;
+          }
+
+        } while ( (p = nextp) != 0);
+
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    malloc_init_state(av);
+    check_malloc_state(av);
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+Void_t*
+_int_realloc(mstate av, Void_t* oldmem, size_t bytes)
+{
+  INTERNAL_SIZE_T  nb;              /* padded request size */
+
+  mchunkptr        oldp;            /* chunk corresponding to oldmem */
+  INTERNAL_SIZE_T  oldsize;         /* its size */
+
+  mchunkptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  Void_t*          newmem;          /* corresponding user mem */
+
+  mchunkptr        next;            /* next contiguous chunk after oldp */
+
+  mchunkptr        remainder;       /* extra space at end of newp */
+  unsigned long    remainder_size;  /* its size */
+
+  mchunkptr        bck;             /* misc temp for linking */
+  mchunkptr        fwd;             /* misc temp for linking */
+
+  unsigned long    copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+
+#if REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0) {
+    _int_free(av, oldmem);
+    return 0;
+  }
+#endif
+
+  /* realloc of null is supposed to be same as malloc */
+  if (oldmem == 0) return _int_malloc(av, bytes);
+
+  checked_request2size(bytes, nb);
+
+  oldp    = mem2chunk(oldmem);
+  oldsize = chunksize(oldp);
+
+  check_inuse_chunk(av, oldp);
+
+  if (!chunk_is_mmapped(oldp)) {
+
+    if ((unsigned long)(oldsize) >= (unsigned long)(nb)) {
+      /* already big enough; split below */
+      newp = oldp;
+      newsize = oldsize;
+    }
+
+    else {
+      next = chunk_at_offset(oldp, oldsize);
+
+      /* Try to expand forward into top */
+      if (next == av->top &&
+          (unsigned long)(newsize = oldsize + chunksize(next)) >=
+          (unsigned long)(nb + MINSIZE)) {
+        set_head_size(oldp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+        av->top = chunk_at_offset(oldp, nb);
+        set_head(av->top, (newsize - nb) | PREV_INUSE);
+	check_inuse_chunk(av, oldp);
+        return chunk2mem(oldp);
+      }
+
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next != av->top &&
+               !inuse(next) &&
+               (unsigned long)(newsize = oldsize + chunksize(next)) >=
+               (unsigned long)(nb)) {
+        newp = oldp;
+        unlink(next, bck, fwd);
+      }
+
+      /* allocate, copy, free */
+      else {
+        newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK);
+        if (newmem == 0)
+          return 0; /* propagate failure */
+
+        newp = mem2chunk(newmem);
+        newsize = chunksize(newp);
+
+        /*
+          Avoid copy if newp is next chunk after oldp.
+        */
+        if (newp == next) {
+          newsize += oldsize;
+          newp = oldp;
+        }
+        else {
+          /*
+            Unroll copy of <= 36 bytes (72 if 8byte sizes)
+            We know that contents have an odd number of
+            INTERNAL_SIZE_T-sized words; minimally 3.
+          */
+
+          copysize = oldsize - SIZE_SZ;
+          s = (INTERNAL_SIZE_T*)(oldmem);
+          d = (INTERNAL_SIZE_T*)(newmem);
+          ncopies = copysize / sizeof(INTERNAL_SIZE_T);
+          assert(ncopies >= 3);
+
+          if (ncopies > 9)
+            MALLOC_COPY(d, s, copysize);
+
+          else {
+            *(d+0) = *(s+0);
+            *(d+1) = *(s+1);
+            *(d+2) = *(s+2);
+            if (ncopies > 4) {
+              *(d+3) = *(s+3);
+              *(d+4) = *(s+4);
+              if (ncopies > 6) {
+                *(d+5) = *(s+5);
+                *(d+6) = *(s+6);
+                if (ncopies > 8) {
+                  *(d+7) = *(s+7);
+                  *(d+8) = *(s+8);
+                }
+              }
+            }
+          }
+
+          _int_free(av, oldmem);
+          check_inuse_chunk(av, newp);
+          return chunk2mem(newp);
+        }
+      }
+    }
+
+    /* If possible, free extra space in old or extended chunk */
+
+    assert((unsigned long)(newsize) >= (unsigned long)(nb));
+
+    remainder_size = newsize - nb;
+
+    if (remainder_size < MINSIZE) { /* not enough extra to split off */
+      set_head_size(newp, newsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_inuse_bit_at_offset(newp, newsize);
+    }
+    else { /* split remainder */
+      remainder = chunk_at_offset(newp, nb);
+      set_head_size(newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head(remainder, remainder_size | PREV_INUSE |
+	       (av != &main_arena ? NON_MAIN_ARENA : 0));
+      /* Mark remainder as inuse so free() won't complain */
+      set_inuse_bit_at_offset(remainder, remainder_size);
+      _int_free(av, chunk2mem(remainder));
+    }
+
+    check_inuse_chunk(av, newp);
+    return chunk2mem(newp);
+  }
+
+  /*
+    Handle mmap cases
+  */
+
+  else {
+#if HAVE_MMAP
+
+#if HAVE_MREMAP
+    INTERNAL_SIZE_T offset = oldp->prev_size;
+    size_t pagemask = mp_.pagesize - 1;
+    char *cp;
+    unsigned long sum;
+
+    /* Note the extra SIZE_SZ overhead */
+    newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask;
+
+    /* don't need to remap if still within same page */
+    if (oldsize == newsize - offset)
+      return oldmem;
+
+    cp = (char*)mremap((char*)oldp - offset, oldsize + offset, newsize, 1);
+
+    if (cp != MAP_FAILED) {
+
+      newp = (mchunkptr)(cp + offset);
+      set_head(newp, (newsize - offset)|IS_MMAPPED);
+
+      assert(aligned_OK(chunk2mem(newp)));
+      assert((newp->prev_size == offset));
+
+      /* update statistics */
+      sum = mp_.mmapped_mem += newsize - oldsize;
+      if (sum > (unsigned long)(mp_.max_mmapped_mem))
+        mp_.max_mmapped_mem = sum;
+#ifdef NO_THREADS
+      sum += main_arena.system_mem;
+      if (sum > (unsigned long)(mp_.max_total_mem))
+        mp_.max_total_mem = sum;
+#endif
+
+      return chunk2mem(newp);
+    }
+#endif
+
+    /* Note the extra SIZE_SZ overhead. */
+    if ((unsigned long)(oldsize) >= (unsigned long)(nb + SIZE_SZ))
+      newmem = oldmem; /* do nothing */
+    else {
+      /* Must alloc, copy, free. */
+      newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK);
+      if (newmem != 0) {
+        MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ);
+        _int_free(av, oldmem);
+      }
+    }
+    return newmem;
+
+#else
+    /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */
+    check_malloc_state(av);
+    MALLOC_FAILURE_ACTION;
+    return 0;
+#endif
+  }
+}
+
+/*
+  ------------------------------ memalign ------------------------------
+*/
+
+Void_t*
+_int_memalign(mstate av, size_t alignment, size_t bytes)
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char*           m;              /* memory returned by malloc call */
+  mchunkptr       p;              /* corresponding chunk */
+  char*           brk;            /* alignment point within p */
+  mchunkptr       newp;           /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  mchunkptr       remainder;      /* spare room at end to split off */
+  unsigned long   remainder_size; /* its size */
+  INTERNAL_SIZE_T size;
+
+  /* If need less alignment than we give anyway, just relay to malloc */
+
+  if (alignment <= MALLOC_ALIGNMENT) return _int_malloc(av, bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+
+  if (alignment <  MINSIZE) alignment = MINSIZE;
+
+  /* Make sure alignment is power of 2 (in case MINSIZE is not).  */
+  if ((alignment & (alignment - 1)) != 0) {
+    size_t a = MALLOC_ALIGNMENT * 2;
+    while ((unsigned long)a < (unsigned long)alignment) a <<= 1;
+    alignment = a;
+  }
+
+  checked_request2size(bytes, nb);
+
+  /*
+    Strategy: find a spot within that chunk that meets the alignment
+    request, and then possibly free the leading and trailing space.
+  */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m  = (char*)(_int_malloc(av, nb + alignment + MINSIZE));
+
+  if (m == 0) return 0; /* propagate failure */
+
+  p = mem2chunk(m);
+
+  if ((((unsigned long)(m)) % alignment) != 0) { /* misaligned */
+
+    /*
+      Find an aligned spot inside chunk.  Since we need to give back
+      leading space in a chunk of at least MINSIZE, if the first
+      calculation places us at a spot with less than MINSIZE leader,
+      we can move to the next aligned spot -- we've allocated enough
+      total room so that this is always possible.
+    */
+
+    brk = (char*)mem2chunk(((unsigned long)(m + alignment - 1)) &
+                           -((signed long) alignment));
+    if ((unsigned long)(brk - (char*)(p)) < MINSIZE)
+      brk += alignment;
+
+    newp = (mchunkptr)brk;
+    leadsize = brk - (char*)(p);
+    newsize = chunksize(p) - leadsize;
+
+    /* For mmapped chunks, just adjust offset */
+    if (chunk_is_mmapped(p)) {
+      newp->prev_size = p->prev_size + leadsize;
+      set_head(newp, newsize|IS_MMAPPED);
+      return chunk2mem(newp);
+    }
+
+    /* Otherwise, give back leader, use the rest */
+    set_head(newp, newsize | PREV_INUSE |
+	     (av != &main_arena ? NON_MAIN_ARENA : 0));
+    set_inuse_bit_at_offset(newp, newsize);
+    set_head_size(p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+    _int_free(av, chunk2mem(p));
+    p = newp;
+
+    assert (newsize >= nb &&
+            (((unsigned long)(chunk2mem(p))) % alignment) == 0);
+  }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped(p)) {
+    size = chunksize(p);
+    if ((unsigned long)(size) > (unsigned long)(nb + MINSIZE)) {
+      remainder_size = size - nb;
+      remainder = chunk_at_offset(p, nb);
+      set_head(remainder, remainder_size | PREV_INUSE |
+	       (av != &main_arena ? NON_MAIN_ARENA : 0));
+      set_head_size(p, nb);
+      _int_free(av, chunk2mem(remainder));
+    }
+  }
+
+  check_inuse_chunk(av, p);
+  return chunk2mem(p);
+}
+
+#if 0
+/*
+  ------------------------------ calloc ------------------------------
+*/
+
+#if __STD_C
+Void_t* cALLOc(size_t n_elements, size_t elem_size)
+#else
+Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size;
+#endif
+{
+  mchunkptr p;
+  unsigned long clearsize;
+  unsigned long nclears;
+  INTERNAL_SIZE_T* d;
+
+  Void_t* mem = mALLOc(n_elements * elem_size);
+
+  if (mem != 0) {
+    p = mem2chunk(mem);
+
+#if MMAP_CLEARS
+    if (!chunk_is_mmapped(p)) /* don't need to clear mmapped space */
+#endif
+    {
+      /*
+        Unroll clear of <= 36 bytes (72 if 8byte sizes)
+        We know that contents have an odd number of
+        INTERNAL_SIZE_T-sized words; minimally 3.
+      */
+
+      d = (INTERNAL_SIZE_T*)mem;
+      clearsize = chunksize(p) - SIZE_SZ;
+      nclears = clearsize / sizeof(INTERNAL_SIZE_T);
+      assert(nclears >= 3);
+
+      if (nclears > 9)
+        MALLOC_ZERO(d, clearsize);
+
+      else {
+        *(d+0) = 0;
+        *(d+1) = 0;
+        *(d+2) = 0;
+        if (nclears > 4) {
+          *(d+3) = 0;
+          *(d+4) = 0;
+          if (nclears > 6) {
+            *(d+5) = 0;
+            *(d+6) = 0;
+            if (nclears > 8) {
+              *(d+7) = 0;
+              *(d+8) = 0;
+            }
+          }
+        }
+      }
+    }
+  }
+  return mem;
+}
+#endif /* 0 */
+
+/*
+  ------------------------- independent_calloc -------------------------
+*/
+
+Void_t**
+#if __STD_C
+_int_icalloc(mstate av, size_t n_elements, size_t elem_size, Void_t* chunks[])
+#else
+_int_icalloc(av, n_elements, elem_size, chunks)
+mstate av; size_t n_elements; size_t elem_size; Void_t* chunks[];
+#endif
+{
+  size_t sz = elem_size; /* serves as 1-element array */
+  /* opts arg of 3 means all elements are same size, and should be cleared */
+  return iALLOc(av, n_elements, &sz, 3, chunks);
+}
+
+/*
+  ------------------------- independent_comalloc -------------------------
+*/
+
+Void_t**
+#if __STD_C
+_int_icomalloc(mstate av, size_t n_elements, size_t sizes[], Void_t* chunks[])
+#else
+_int_icomalloc(av, n_elements, sizes, chunks)
+mstate av; size_t n_elements; size_t sizes[]; Void_t* chunks[];
+#endif
+{
+  return iALLOc(av, n_elements, sizes, 0, chunks);
+}
+
+
+/*
+  ------------------------------ ialloc ------------------------------
+  ialloc provides common support for independent_X routines, handling all of
+  the combinations that can result.
+
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+
+
+static Void_t**
+#if __STD_C
+iALLOc(mstate av, size_t n_elements, size_t* sizes, int opts, Void_t* chunks[])
+#else
+iALLOc(av, n_elements, sizes, opts, chunks)
+mstate av; size_t n_elements; size_t* sizes; int opts; Void_t* chunks[];
+#endif
+{
+  INTERNAL_SIZE_T element_size;   /* chunksize of each element, if all same */
+  INTERNAL_SIZE_T contents_size;  /* total size of elements */
+  INTERNAL_SIZE_T array_size;     /* request size of pointer array */
+  Void_t*         mem;            /* malloced aggregate space */
+  mchunkptr       p;              /* corresponding chunk */
+  INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */
+  Void_t**        marray;         /* either "chunks" or malloced ptr array */
+  mchunkptr       array_chunk;    /* chunk for malloced ptr array */
+  int             mmx;            /* to disable mmap */
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T size_flags;
+  size_t          i;
+
+  /* Ensure initialization/consolidation */
+  if (have_fastchunks(av)) malloc_consolidate(av);
+
+  /* compute array length, if needed */
+  if (chunks != 0) {
+    if (n_elements == 0)
+      return chunks; /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+  }
+  else {
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0)
+      return (Void_t**) _int_malloc(av, 0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(Void_t*)));
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) { /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+  }
+  else { /* add up all the sizes */
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i)
+      contents_size += request2size(sizes[i]);
+  }
+
+  /* subtract out alignment bytes from total to minimize overallocation */
+  size = contents_size + array_size - MALLOC_ALIGN_MASK;
+
+  /*
+     Allocate the aggregate chunk.
+     But first disable mmap so malloc won't use it, since
+     we would not be able to later free/realloc space internal
+     to a segregated mmap region.
+  */
+  mmx = mp_.n_mmaps_max;   /* disable mmap */
+  mp_.n_mmaps_max = 0;
+  mem = _int_malloc(av, size);
+  mp_.n_mmaps_max = mmx;   /* reset mmap */
+  if (mem == 0)
+    return 0;
+
+  p = mem2chunk(mem);
+  assert(!chunk_is_mmapped(p));
+  remainder_size = chunksize(p);
+
+  if (opts & 0x2) {       /* optionally clear the elements */
+    MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size);
+  }
+
+  size_flags = PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0);
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+    array_chunk = chunk_at_offset(p, contents_size);
+    marray = (Void_t**) (chunk2mem(array_chunk));
+    set_head(array_chunk, (remainder_size - contents_size) | size_flags);
+    remainder_size = contents_size;
+  }
+
+  /* split out elements */
+  for (i = 0; ; ++i) {
+    marray[i] = chunk2mem(p);
+    if (i != n_elements-1) {
+      if (element_size != 0)
+        size = element_size;
+      else
+        size = request2size(sizes[i]);
+      remainder_size -= size;
+      set_head(p, size | size_flags);
+      p = chunk_at_offset(p, size);
+    }
+    else { /* the final element absorbs any overallocation slop */
+      set_head(p, remainder_size | size_flags);
+      break;
+    }
+  }
+
+#if MALLOC_DEBUG
+  if (marray != chunks) {
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0)
+      assert(remainder_size == element_size);
+    else
+      assert(remainder_size == request2size(sizes[i]));
+    check_inuse_chunk(av, mem2chunk(marray));
+  }
+
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(av, mem2chunk(marray[i]));
+#endif
+
+  return marray;
+}
+
+
+/*
+  ------------------------------ valloc ------------------------------
+*/
+
+Void_t*
+#if __STD_C
+_int_valloc(mstate av, size_t bytes)
+#else
+_int_valloc(av, bytes) mstate av; size_t bytes;
+#endif
+{
+  /* Ensure initialization/consolidation */
+  if (have_fastchunks(av)) malloc_consolidate(av);
+  return _int_memalign(av, mp_.pagesize, bytes);
+}
+
+/*
+  ------------------------------ pvalloc ------------------------------
+*/
+
+
+Void_t*
+#if __STD_C
+_int_pvalloc(mstate av, size_t bytes)
+#else
+_int_pvalloc(av, bytes) mstate av, size_t bytes;
+#endif
+{
+  size_t pagesz;
+
+  /* Ensure initialization/consolidation */
+  if (have_fastchunks(av)) malloc_consolidate(av);
+  pagesz = mp_.pagesize;
+  return _int_memalign(av, pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
+}
+
+
+/*
+  ------------------------------ malloc_trim ------------------------------
+*/
+
+#if __STD_C
+int mTRIm(size_t pad)
+#else
+int mTRIm(pad) size_t pad;
+#endif
+{
+  mstate av = &main_arena; /* already locked */
+
+  /* Ensure initialization/consolidation */
+  malloc_consolidate(av);
+
+#ifndef MORECORE_CANNOT_TRIM
+  return sYSTRIm(pad, av);
+#else
+  return 0;
+#endif
+}
+
+
+/*
+  ------------------------- malloc_usable_size -------------------------
+*/
+
+#if __STD_C
+size_t mUSABLe(Void_t* mem)
+#else
+size_t mUSABLe(mem) Void_t* mem;
+#endif
+{
+  mchunkptr p;
+  if (mem != 0) {
+    p = mem2chunk(mem);
+    if (chunk_is_mmapped(p))
+      return chunksize(p) - 2*SIZE_SZ;
+    else if (inuse(p))
+      return chunksize(p) - SIZE_SZ;
+  }
+  return 0;
+}
+
+/*
+  ---------------------- internal mallinfo -----------------------------
+*/
+
+void _int_get_arena_info(mstate av, struct malloc_arena_info *mai)
+{
+  size_t i;
+  mbinptr b;
+  mchunkptr p;
+  size_t binavail = 0;
+  size_t fastavail = 0;
+  int nbinblocks = 0;
+  int nfastblocks = 0;
+
+  (void)mutex_lock(&av->mutex);
+
+  /* Ensure initialization */
+  if (av->top == 0)  malloc_consolidate(av);
+
+  check_malloc_state(av);
+
+  /* traverse fastbins */
+  for (i = 0; i < NFASTBINS; ++i) {
+    for (p = av->fastbins[i]; p != 0; p = p->fd) {
+      ++nfastblocks;
+      fastavail += chunksize(p);
+    }
+  }
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i) {
+    b = bin_at(av, i);
+    for (p = last(b); p != b; p = p->bk) {
+      ++nbinblocks;
+      binavail += chunksize(p);
+    }
+  }
+
+  mai->nfastblocks = nfastblocks;
+  mai->nbinblocks = nbinblocks;
+  mai->fastavail = fastavail;
+  mai->binavail = binavail;
+  mai->top_size = chunksize(av->top);
+  mai->system_mem = av->system_mem;
+  mai->max_system_mem = av->max_system_mem;
+  mai->stat_lock_direct = av->stat_lock_direct;
+  mai->stat_lock_loop   = av->stat_lock_loop;
+  mai->stat_lock_wait   = av->stat_lock_wait;
+
+  (void)mutex_unlock(&av->mutex);
+}
+
+void
+_int_get_global_info (struct malloc_global_info *mgi)
+{
+  mgi->n_mmaps = mp_.n_mmaps;
+  mgi->max_n_mmaps = mp_.max_n_mmaps;
+  mgi->mmapped_mem = mp_.mmapped_mem;
+  mgi->max_mmapped_mem = mp_.max_mmapped_mem;
+  mgi->max_total_mem = mp_.max_total_mem;
+#if THREAD_STATS
+  mgi->stat_n_heaps = stat_n_heaps;
+#else
+  mgi->stat_n_heaps = 0;
+#endif
+}
+
+/*
+  ------------------------------ malloc_stats ------------------------------
+*/
+
+/* Now in separate file, malloc-stats.c. */
+
+/*
+  ------------------------------ mallopt ------------------------------
+*/
+
+#if __STD_C
+int mALLOPt(int param_number, int value)
+#else
+int mALLOPt(param_number, value) int param_number; int value;
+#endif
+{
+  mstate av = &main_arena;
+  int res = 1;
+
+  if(__malloc_initialized < 0)
+    ptmalloc_init ();
+  (void)mutex_lock(&av->mutex);
+  /* Ensure initialization/consolidation */
+  malloc_consolidate(av);
+
+  switch(param_number) {
+  case M_MXFAST:
+    if (value >= 0 && value <= MAX_FAST_SIZE) {
+      set_max_fast(av, value);
+    }
+    else
+      res = 0;
+    break;
+
+  case M_TRIM_THRESHOLD:
+    mp_.trim_threshold = value;
+    break;
+
+  case M_TOP_PAD:
+    mp_.top_pad = value;
+    break;
+
+  case M_MMAP_THRESHOLD:
+#if USE_ARENAS
+    /* Forbid setting the threshold too high. */
+    if((unsigned long)value > HEAP_MAX_SIZE/2)
+      res = 0;
+    else
+#endif
+      mp_.mmap_threshold = value;
+    break;
+
+  case M_MMAP_MAX:
+#if !HAVE_MMAP
+    if (value != 0)
+      res = 0;
+    else
+#endif
+      mp_.n_mmaps_max = value;
+    break;
+
+  case M_CHECK_ACTION:
+    check_action = value;
+    break;
+  }
+  (void)mutex_unlock(&av->mutex);
+  return res;
+}
+
+
+/*
+  -------------------- Alternative MORECORE functions --------------------
+*/
+
+
+/*
+  General Requirements for MORECORE.
+
+  The MORECORE function must have the following properties:
+
+  If MORECORE_CONTIGUOUS is false:
+
+    * MORECORE must allocate in multiples of pagesize. It will
+      only be called with arguments that are multiples of pagesize.
+
+    * MORECORE(0) must return an address that is at least
+      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
+
+  else (i.e. If MORECORE_CONTIGUOUS is true):
+
+    * Consecutive calls to MORECORE with positive arguments
+      return increasing addresses, indicating that space has been
+      contiguously extended.
+
+    * MORECORE need not allocate in multiples of pagesize.
+      Calls to MORECORE need not have args of multiples of pagesize.
+
+    * MORECORE need not page-align.
+
+  In either case:
+
+    * MORECORE may allocate more memory than requested. (Or even less,
+      but this will generally result in a malloc failure.)
+
+    * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call. This malloc does NOT call MORECORE(0)
+      until at least one call with positive arguments is made, so
+      the initial value returned is not important.
+
+    * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+
+    * MORECORE need not handle negative arguments -- it may instead
+      just return MORECORE_FAILURE when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  There is some variation across systems about the type of the
+  argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
+  actually be size_t, because sbrk supports negative args, so it is
+  normally the signed type of the same width as size_t (sometimes
+  declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
+  matter though. Internally, we use "long" as arguments, which should
+  work across all reasonable possibilities.
+
+  Additionally, if MORECORE ever returns failure for a positive
+  request, and HAVE_MMAP is true, then mmap is used as a noncontiguous
+  system allocator. This is a useful backup strategy for systems with
+  holes in address spaces -- in this case sbrk cannot contiguously
+  expand the heap, but mmap may be able to map noncontiguous space.
+
+  If you'd like mmap to ALWAYS be used, you can define MORECORE to be
+  a function that always returns MORECORE_FAILURE.
+
+  If you are using this malloc with something other than sbrk (or its
+  emulation) to supply memory regions, you probably want to set
+  MORECORE_CONTIGUOUS as false.  As an example, here is a custom
+  allocator kindly contributed for pre-OSX macOS.  It uses virtually
+  but not necessarily physically contiguous non-paged memory (locked
+  in, present and won't get swapped out).  You can use it by
+  uncommenting this section, adding some #includes, and setting up the
+  appropriate defines above:
+
+      #define MORECORE osMoreCore
+      #define MORECORE_CONTIGUOUS 0
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MORECORE_FAILURE;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MORECORE_FAILURE;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+#ifdef _LIBC
+# include <sys/param.h>
+
+/* We need a wrapper function for one of the additions of POSIX.  */
+int
+__posix_memalign (void **memptr, size_t alignment, size_t size)
+{
+  void *mem;
+  __malloc_ptr_t (*hook) __MALLOC_PMT ((size_t, size_t,
+					__const __malloc_ptr_t)) =
+    __memalign_hook;
+
+  /* Test whether the SIZE argument is valid.  It must be a power of
+     two multiple of sizeof (void *).  */
+  if (alignment % sizeof (void *) != 0
+      || !powerof2 (alignment / sizeof (void *)) != 0
+      || alignment == 0)
+    return EINVAL;
+
+  /* Call the hook here, so that caller is posix_memalign's caller
+     and not posix_memalign itself.  */
+  if (hook != NULL)
+    mem = (*hook)(alignment, size, RETURN_ADDRESS (0));
+  else
+    mem = public_mEMALIGn (alignment, size);
+
+  if (mem != NULL) {
+    *memptr = mem;
+    return 0;
+  }
+
+  return ENOMEM;
+}
+weak_alias (__posix_memalign, posix_memalign)
+
+strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
+strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
+strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
+strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)
+strong_alias (__libc_memalign, __memalign)
+weak_alias (__libc_memalign, memalign)
+strong_alias (__libc_realloc, __realloc) strong_alias (__libc_realloc, realloc)
+strong_alias (__libc_valloc, __valloc) weak_alias (__libc_valloc, valloc)
+strong_alias (__libc_pvalloc, __pvalloc) weak_alias (__libc_pvalloc, pvalloc)
+strong_alias (__libc_mallinfo, __mallinfo)
+weak_alias (__libc_mallinfo, mallinfo)
+strong_alias (__libc_mallopt, __mallopt) weak_alias (__libc_mallopt, mallopt)
+
+weak_alias (__malloc_usable_size, malloc_usable_size)
+weak_alias (__malloc_trim, malloc_trim)
+weak_alias (__malloc_get_state, malloc_get_state)
+weak_alias (__malloc_set_state, malloc_set_state)
+
+#endif /* _LIBC */
+
+/* ------------------------------------------------------------
+History:
+
+[see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
+
+*/
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/malloc.h b/opal/memory/ptmalloc2/malloc.h
new file mode 100644
index 0000000000..1d486dc7b8
--- /dev/null
+++ b/opal/memory/ptmalloc2/malloc.h
@@ -0,0 +1,291 @@
+/* Prototypes and definition for malloc implementation.
+   Copyright (C) 1996,97,99,2000,2002,2003,2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _MALLOC_H
+#define _MALLOC_H 1
+
+#ifdef _LIBC
+#include <features.h>
+#endif
+
+/*
+  $Id: malloc.h,v 1.7 2004/08/08 12:34:57 wg Exp $
+  `ptmalloc2', a malloc implementation for multiple threads without
+  lock contention, by Wolfram Gloger <wg@malloc.de>.
+
+  VERSION 2.7.0
+
+  This work is mainly derived from malloc-2.7.0 by Doug Lea
+  <dl@cs.oswego.edu>, which is available from:
+
+                 ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+
+  This trimmed-down header file only provides function prototypes and
+  the exported data structures.  For more detailed function
+  descriptions and compile-time options, see the source file
+  `malloc.c'.
+*/
+
+#if defined(__STDC__) || defined (__cplusplus)
+# include <stddef.h>
+# define __malloc_ptr_t  void *
+#else
+# undef  size_t
+# define size_t          unsigned int
+# undef  ptrdiff_t
+# define ptrdiff_t       int
+# define __malloc_ptr_t  char *
+#endif
+
+#ifdef _LIBC
+/* Used by GNU libc internals. */
+# define __malloc_size_t size_t
+# define __malloc_ptrdiff_t ptrdiff_t
+#elif !defined __attribute_malloc__
+# define __attribute_malloc__
+#endif
+
+#ifdef __GNUC__
+
+/* GCC can always grok prototypes.  For C++ programs we add throw()
+   to help it optimize the function calls.  But this works only with
+   gcc 2.8.x and egcs.  */
+# if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8)
+#  define __THROW	throw ()
+# else
+#  define __THROW
+# endif
+# define __MALLOC_P(args)	args __THROW
+/* This macro will be used for functions which might take C++ callback
+   functions.  */
+# define __MALLOC_PMT(args)	args
+
+#else	/* Not GCC.  */
+
+# define __THROW
+
+# if (defined __STDC__ && __STDC__) || defined __cplusplus
+
+#  define __MALLOC_P(args)	args
+#  define __MALLOC_PMT(args)	args
+
+#  ifndef __const
+#   define __const	 const
+#  endif
+
+# else	/* Not ANSI C or C++.  */
+
+#  define __MALLOC_P(args)	()	/* No prototypes.  */
+#  define __MALLOC_PMT(args)	()
+
+#  ifndef __const
+#   define __const
+#  endif
+
+# endif	/* ANSI C or C++.  */
+
+#endif	/* GCC.  */
+
+#ifndef NULL
+# ifdef __cplusplus
+#  define NULL	0
+# else
+#  define NULL	((__malloc_ptr_t) 0)
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Nonzero if the malloc is already initialized.  */
+#ifdef _LIBC
+/* In the GNU libc we rename the global variable
+   `__malloc_initialized' to `__libc_malloc_initialized'.  */
+# define __malloc_initialized __libc_malloc_initialized
+#endif
+extern int __malloc_initialized;
+
+/* Allocate SIZE bytes of memory.  */
+extern __malloc_ptr_t malloc __MALLOC_P ((size_t __size)) __attribute_malloc__;
+
+/* Allocate NMEMB elements of SIZE bytes each, all initialized to 0.  */
+extern __malloc_ptr_t calloc __MALLOC_P ((size_t __nmemb, size_t __size))
+       __attribute_malloc__;
+
+/* Re-allocate the previously allocated block in __ptr, making the new
+   block SIZE bytes long.  */
+extern __malloc_ptr_t realloc __MALLOC_P ((__malloc_ptr_t __ptr,
+					   size_t __size))
+       __attribute_malloc__;
+
+/* Free a block allocated by `malloc', `realloc' or `calloc'.  */
+extern void free __MALLOC_P ((__malloc_ptr_t __ptr));
+
+/* Free a block allocated by `calloc'. */
+extern void cfree __MALLOC_P ((__malloc_ptr_t __ptr));
+
+/* Allocate SIZE bytes allocated to ALIGNMENT bytes.  */
+extern __malloc_ptr_t memalign __MALLOC_P ((size_t __alignment, size_t __size));
+
+/* Allocate SIZE bytes on a page boundary.  */
+extern __malloc_ptr_t valloc __MALLOC_P ((size_t __size)) __attribute_malloc__;
+
+/* Equivalent to valloc(minimum-page-that-holds(n)), that is, round up
+   __size to nearest pagesize. */
+extern __malloc_ptr_t  pvalloc __MALLOC_P ((size_t __size))
+       __attribute_malloc__;
+
+/* Underlying allocation function; successive calls should return
+   contiguous pieces of memory.  */
+extern __malloc_ptr_t (*__morecore) __MALLOC_PMT ((ptrdiff_t __size));
+
+/* Default value of `__morecore'.  */
+extern __malloc_ptr_t __default_morecore __MALLOC_P ((ptrdiff_t __size))
+       __attribute_malloc__;
+
+/* SVID2/XPG mallinfo structure */
+
+struct mallinfo {
+  int arena;    /* non-mmapped space allocated from system */
+  int ordblks;  /* number of free chunks */
+  int smblks;   /* number of fastbin blocks */
+  int hblks;    /* number of mmapped regions */
+  int hblkhd;   /* space in mmapped regions */
+  int usmblks;  /* maximum total allocated space */
+  int fsmblks;  /* space available in freed fastbin blocks */
+  int uordblks; /* total allocated space */
+  int fordblks; /* total free space */
+  int keepcost; /* top-most, releasable (via malloc_trim) space */
+};
+
+/* Returns a copy of the updated current mallinfo. */
+extern struct mallinfo mallinfo __MALLOC_P ((void));
+
+/* SVID2/XPG mallopt options */
+#ifndef M_MXFAST
+# define M_MXFAST  1	/* maximum request size for "fastbins" */
+#endif
+#ifndef M_NLBLKS
+# define M_NLBLKS  2	/* UNUSED in this malloc */
+#endif
+#ifndef M_GRAIN
+# define M_GRAIN   3	/* UNUSED in this malloc */
+#endif
+#ifndef M_KEEP
+# define M_KEEP    4	/* UNUSED in this malloc */
+#endif
+
+/* mallopt options that actually do something */
+#define M_TRIM_THRESHOLD    -1
+#define M_TOP_PAD           -2
+#define M_MMAP_THRESHOLD    -3
+#define M_MMAP_MAX          -4
+#define M_CHECK_ACTION      -5
+
+/* General SVID/XPG interface to tunable parameters. */
+extern int mallopt __MALLOC_P ((int __param, int __val));
+
+/* Release all but __pad bytes of freed top-most memory back to the
+   system. Return 1 if successful, else 0. */
+extern int malloc_trim __MALLOC_P ((size_t __pad));
+
+/* Report the number of usable allocated bytes associated with allocated
+   chunk __ptr. */
+extern size_t malloc_usable_size __MALLOC_P ((__malloc_ptr_t __ptr));
+
+/* Prints brief summary statistics on stderr. */
+extern void malloc_stats __MALLOC_P ((void));
+
+/* Record the state of all malloc variables in an opaque data structure. */
+extern __malloc_ptr_t malloc_get_state __MALLOC_P ((void));
+
+/* Restore the state of all malloc variables from data obtained with
+   malloc_get_state(). */
+extern int malloc_set_state __MALLOC_P ((__malloc_ptr_t __ptr));
+
+/* Called once when malloc is initialized; redefining this variable in
+   the application provides the preferred way to set up the hook
+   pointers. */
+extern void (*__malloc_initialize_hook) __MALLOC_PMT ((void));
+/* Hooks for debugging and user-defined versions. */
+extern void (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr,
+					__const __malloc_ptr_t));
+extern __malloc_ptr_t (*__malloc_hook) __MALLOC_PMT ((size_t __size,
+						    __const __malloc_ptr_t));
+extern __malloc_ptr_t (*__realloc_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr,
+						     size_t __size,
+						     __const __malloc_ptr_t));
+extern __malloc_ptr_t (*__memalign_hook) __MALLOC_PMT ((size_t __alignment,
+						      size_t __size,
+						      __const __malloc_ptr_t));
+extern void (*__after_morecore_hook) __MALLOC_PMT ((void));
+
+/* Activate a standard set of debugging hooks. */
+extern void __malloc_check_init __MALLOC_P ((void));
+
+/* Internal routines, operating on "arenas".  */
+struct malloc_state;
+typedef struct malloc_state *mstate;
+
+extern mstate         _int_new_arena __MALLOC_P ((size_t __ini_size));
+extern __malloc_ptr_t _int_malloc __MALLOC_P ((mstate __m, size_t __size));
+extern void           _int_free __MALLOC_P ((mstate __m, __malloc_ptr_t __ptr));
+extern __malloc_ptr_t _int_realloc __MALLOC_P ((mstate __m,
+						__malloc_ptr_t __ptr,
+						size_t __size));
+extern __malloc_ptr_t _int_memalign __MALLOC_P ((mstate __m, size_t __alignment,
+						 size_t __size));
+/* Return arena number __n, or 0 if out of bounds.  Arena 0 is the
+   main arena.  */
+extern mstate         _int_get_arena __MALLOC_P ((int __n));
+
+/* Implementation-specific mallinfo.  More detailed than mallinfo, and
+   also works for size_t wider than int.  */
+struct malloc_arena_info {
+    int    nfastblocks;    /* number of freed "fastchunks" */
+    int    nbinblocks;     /* number of available chunks in bins */
+    size_t fastavail;      /* total space in freed "fastchunks" */
+    size_t binavail;       /* total space in binned chunks */
+    size_t top_size;       /* size of top chunk */
+    size_t system_mem;     /* bytes allocated from system in this arena */
+    size_t max_system_mem; /* max. bytes allocated from system */
+    /* Statistics for locking.  Only kept if THREAD_STATS is defined
+       at compile time.  */
+    long   stat_lock_direct, stat_lock_loop, stat_lock_wait;
+};
+
+struct malloc_global_info {
+    int    n_mmaps;         /* number of mmap'ed chunks */
+    int    max_n_mmaps;     /* max. number of mmap'ed chunks reached */
+    size_t mmapped_mem;     /* total bytes allocated in mmap'ed chunks */
+    size_t max_mmapped_mem; /* max. bytes allocated in mmap'ed chunks */
+    size_t max_total_mem;   /* only kept for NO_THREADS */
+    int    stat_n_heaps;    /* only kept if THREAD_STATS is defined */
+};
+
+extern void _int_get_arena_info __MALLOC_P ((mstate __m,
+					     struct malloc_arena_info *__ma));
+extern void _int_get_global_info __MALLOC_P ((struct malloc_global_info *__m));
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* malloc.h */
diff --git a/opal/memory/ptmalloc2/sysdeps/generic/atomic.h b/opal/memory/ptmalloc2/sysdeps/generic/atomic.h
new file mode 100644
index 0000000000..a3aeed13e8
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/generic/atomic.h
@@ -0,0 +1 @@
+/* Empty placeholder */
diff --git a/opal/memory/ptmalloc2/sysdeps/generic/malloc-machine.h b/opal/memory/ptmalloc2/sysdeps/generic/malloc-machine.h
new file mode 100644
index 0000000000..345137060d
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/generic/malloc-machine.h
@@ -0,0 +1,68 @@
+/* Basic platform-independent macro definitions for mutexes,
+   thread-specific data and parameters for malloc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _GENERIC_MALLOC_MACHINE_H
+#define _GENERIC_MALLOC_MACHINE_H
+
+#include <atomic.h>
+
+#ifndef mutex_init /* No threads, provide dummy macros */
+
+# define NO_THREADS
+
+/* The mutex functions used to do absolutely nothing, i.e. lock,
+   trylock and unlock would always just return 0.  However, even
+   without any concurrently active threads, a mutex can be used
+   legitimately as an `in use' flag.  To make the code that is
+   protected by a mutex async-signal safe, these macros would have to
+   be based on atomic test-and-set operations, for example. */
+typedef int mutex_t;
+
+# define mutex_init(m)              (*(m) = 0)
+# define mutex_lock(m)              ((*(m) = 1), 0)
+# define mutex_trylock(m)           (*(m) ? 1 : ((*(m) = 1), 0))
+# define mutex_unlock(m)            (*(m) = 0)
+
+typedef void *tsd_key_t;
+# define tsd_key_create(key, destr) do {} while(0)
+# define tsd_setspecific(key, data) ((key) = (data))
+# define tsd_getspecific(key, vptr) (vptr = (key))
+
+# define thread_atfork(prepare, parent, child) do {} while(0)
+
+#endif /* !defined mutex_init */
+
+#ifndef atomic_full_barrier
+# define atomic_full_barrier() __asm ("" ::: "memory")
+#endif
+
+#ifndef atomic_read_barrier
+# define atomic_read_barrier() atomic_full_barrier ()
+#endif
+
+#ifndef atomic_write_barrier
+# define atomic_write_barrier() atomic_full_barrier ()
+#endif
+
+#ifndef DEFAULT_TOP_PAD
+# define DEFAULT_TOP_PAD 131072
+#endif
+
+#endif /* !defined(_GENERIC_MALLOC_MACHINE_H) */
diff --git a/opal/memory/ptmalloc2/sysdeps/generic/thread-st.h b/opal/memory/ptmalloc2/sysdeps/generic/thread-st.h
new file mode 100644
index 0000000000..0243774b7c
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/generic/thread-st.h
@@ -0,0 +1,48 @@
+/*
+ * $Id:$
+ * Generic version: no threads.
+ * by Wolfram Gloger 2004
+ */
+
+#include <stdio.h>
+
+struct thread_st {
+	char *sp;							/* stack pointer, can be 0 */
+	void (*func)(struct thread_st* st);	/* must be set by user */
+	int id;
+	int flags;
+	struct user_data u;
+};
+
+static void
+thread_init(void)
+{
+	printf("No threads.\n");
+}
+
+/* Create a thread. */
+static int
+thread_create(struct thread_st *st)
+{
+	st->flags = 0;
+	st->id = 1;
+	st->func(st);
+	return 0;
+}
+
+/* Wait for one of several subthreads to finish. */
+static void
+wait_for_thread(struct thread_st st[], int n_thr,
+				int (*end_thr)(struct thread_st*))
+{
+	int i;
+	for(i=0; i<n_thr; i++)
+		if(end_thr)
+			end_thr(&st[i]);
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/sysdeps/pthread/malloc-machine.h b/opal/memory/ptmalloc2/sysdeps/pthread/malloc-machine.h
new file mode 100644
index 0000000000..3940a7d6f9
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/pthread/malloc-machine.h
@@ -0,0 +1,132 @@
+/* Basic platform-independent macro definitions for mutexes,
+   thread-specific data and parameters for malloc.
+   Posix threads (pthreads) version.
+   Copyright (C) 2004 Wolfram Gloger <wg@malloc.de>.
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that (i) the above copyright notices and this permission
+notice appear in all copies of the software and related documentation,
+and (ii) the name of Wolfram Gloger may not be used in any advertising
+or publicity relating to the software.
+
+THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
+INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
+OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef _PTHREAD_MALLOC_MACHINE_H
+#define _PTHREAD_MALLOC_MACHINE_H
+
+#include <pthread.h>
+
+#undef thread_atfork_static
+
+/* Use fast inline spinlocks with gcc.  */
+#if (defined __i386__ || defined __x86_64__) && defined __GNUC__ && \
+    !defined USE_NO_SPINLOCKS
+
+#include <time.h>
+#include <sched.h>
+
+typedef struct {
+  volatile unsigned int lock;
+  int pad0_;
+} mutex_t;
+
+#define MUTEX_INITIALIZER          { 0 }
+#define mutex_init(m)              ((m)->lock = 0)
+static inline int mutex_lock(mutex_t *m) {
+  int cnt = 0, r;
+  struct timespec tm;
+
+  for(;;) {
+    __asm__ __volatile__
+      ("xchgl %0, %1"
+       : "=r"(r), "=m"(m->lock)
+       : "0"(1), "m"(m->lock)
+       : "memory");
+    if(!r)
+      return 0;
+    if(cnt < 50) {
+      sched_yield();
+      cnt++;
+    } else {
+      tm.tv_sec = 0;
+      tm.tv_nsec = 2000001;
+      nanosleep(&tm, NULL);
+      cnt = 0;
+    }
+  }
+}
+static inline int mutex_trylock(mutex_t *m) {
+  int r;
+
+  __asm__ __volatile__
+    ("xchgl %0, %1"
+     : "=r"(r), "=m"(m->lock)
+     : "0"(1), "m"(m->lock)
+     : "memory");
+  return r;
+}
+static inline int mutex_unlock(mutex_t *m) {
+  m->lock = 0;
+  __asm __volatile ("" : "=m" (m->lock) : "0" (m->lock));
+  return 0;
+}
+
+#else
+
+/* Normal pthread mutex.  */
+typedef pthread_mutex_t mutex_t;
+
+#define MUTEX_INITIALIZER          PTHREAD_MUTEX_INITIALIZER
+#define mutex_init(m)              pthread_mutex_init(m, NULL)
+#define mutex_lock(m)              pthread_mutex_lock(m)
+#define mutex_trylock(m)           pthread_mutex_trylock(m)
+#define mutex_unlock(m)            pthread_mutex_unlock(m)
+
+#endif /* (__i386__ || __x86_64__) && __GNUC__ && !USE_NO_SPINLOCKS */
+
+/* thread specific data */
+#if defined(__sgi) || defined(USE_TSD_DATA_HACK)
+
+/* Hack for thread-specific data, e.g. on Irix 6.x.  We can't use
+   pthread_setspecific because that function calls malloc() itself.
+   The hack only works when pthread_t can be converted to an integral
+   type. */
+
+typedef void *tsd_key_t[256];
+#define tsd_key_create(key, destr) do { \
+  int i; \
+  for(i=0; i<256; i++) (*key)[i] = 0; \
+} while(0)
+#define tsd_setspecific(key, data) \
+ (key[(unsigned)pthread_self() % 256] = (data))
+#define tsd_getspecific(key, vptr) \
+ (vptr = key[(unsigned)pthread_self() % 256])
+
+#else
+
+typedef pthread_key_t tsd_key_t;
+
+#define tsd_key_create(key, destr) pthread_key_create(key, destr)
+#define tsd_setspecific(key, data) pthread_setspecific(key, data)
+#define tsd_getspecific(key, vptr) (vptr = pthread_getspecific(key))
+
+#endif
+
+/* at fork */
+#define thread_atfork(prepare, parent, child) \
+                                   pthread_atfork(prepare, parent, child)
+
+#include <sysdeps/generic/malloc-machine.h>
+
+#endif /* !defined(_MALLOC_MACHINE_H) */
diff --git a/opal/memory/ptmalloc2/sysdeps/pthread/thread-st.h b/opal/memory/ptmalloc2/sysdeps/pthread/thread-st.h
new file mode 100644
index 0000000000..f97a0a3552
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/pthread/thread-st.h
@@ -0,0 +1,111 @@
+/*
+ * $Id: thread-st.h$
+ * pthread version
+ * by Wolfram Gloger 2004
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+
+pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#ifndef USE_PTHREADS_STACKS
+#define USE_PTHREADS_STACKS 0
+#endif
+
+#ifndef STACKSIZE
+#define STACKSIZE	32768
+#endif
+
+struct thread_st {
+	char *sp;							/* stack pointer, can be 0 */
+	void (*func)(struct thread_st* st);	/* must be set by user */
+	pthread_t id;
+	int flags;
+	struct user_data u;
+};
+
+static void
+thread_init(void)
+{
+	printf("Using posix threads.\n");
+	pthread_cond_init(&finish_cond, NULL);
+	pthread_mutex_init(&finish_mutex, NULL);
+}
+
+static void *
+thread_wrapper(void *ptr)
+{
+	struct thread_st *st = (struct thread_st*)ptr;
+
+	/*printf("begin %p\n", st->sp);*/
+	st->func(st);
+	pthread_mutex_lock(&finish_mutex);
+	st->flags = 1;
+	pthread_mutex_unlock(&finish_mutex);
+	pthread_cond_signal(&finish_cond);
+	/*printf("end %p\n", st->sp);*/
+	return NULL;
+}
+
+/* Create a thread. */
+static int
+thread_create(struct thread_st *st)
+{
+	st->flags = 0;
+	{
+		pthread_attr_t* attr_p = 0;
+#if USE_PTHREADS_STACKS
+		pthread_attr_t attr;
+
+		pthread_attr_init (&attr);
+		if(!st->sp)
+			st->sp = malloc(STACKSIZE+16);
+		if(!st->sp)
+			return -1;
+		if(pthread_attr_setstacksize(&attr, STACKSIZE))
+			fprintf(stderr, "error setting stacksize");
+		else
+			pthread_attr_setstackaddr(&attr, st->sp + STACKSIZE);
+		/*printf("create %p\n", st->sp);*/
+		attr_p = &attr;
+#endif
+		return pthread_create(&st->id, attr_p, thread_wrapper, st);
+	}
+	return 0;
+}
+
+/* Wait for one of several subthreads to finish. */
+static void
+wait_for_thread(struct thread_st st[], int n_thr,
+				int (*end_thr)(struct thread_st*))
+{
+	int i;
+
+	pthread_mutex_lock(&finish_mutex);
+	for(;;) {
+		int term = 0;
+		for(i=0; i<n_thr; i++)
+			if(st[i].flags) {
+				/*printf("joining %p\n", st[i].sp);*/
+				if(pthread_join(st[i].id, NULL) == 0) {
+					st[i].flags = 0;
+					if(end_thr)
+						end_thr(&st[i]);
+				} else
+					fprintf(stderr, "can't join\n");
+				++term;
+			}
+		if(term > 0)
+			break;
+		pthread_cond_wait(&finish_cond, &finish_mutex);
+	}
+	pthread_mutex_unlock(&finish_mutex);
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/sysdeps/solaris/malloc-machine.h b/opal/memory/ptmalloc2/sysdeps/solaris/malloc-machine.h
new file mode 100644
index 0000000000..00e33b0801
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/solaris/malloc-machine.h
@@ -0,0 +1,51 @@
+/* Basic platform-independent macro definitions for mutexes,
+   thread-specific data and parameters for malloc.
+   Solaris threads version.
+   Copyright (C) 2004 Wolfram Gloger <wg@malloc.de>.
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that (i) the above copyright notices and this permission
+notice appear in all copies of the software and related documentation,
+and (ii) the name of Wolfram Gloger may not be used in any advertising
+or publicity relating to the software.
+
+THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
+INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
+OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef _SOLARIS_MALLOC_MACHINE_H
+#define _SOLARIS_MALLOC_MACHINE_H
+
+#include <thread.h>
+
+typedef thread_t thread_id;
+
+#define MUTEX_INITIALIZER          { 0 }
+#define mutex_init(m)              mutex_init(m, USYNC_THREAD, NULL)
+
+/*
+ * Hack for thread-specific data on Solaris.  We can't use thr_setspecific
+ * because that function calls malloc() itself.
+ */
+typedef void *tsd_key_t[256];
+#define tsd_key_create(key, destr) do { \
+  int i; \
+  for(i=0; i<256; i++) (*key)[i] = 0; \
+} while(0)
+#define tsd_setspecific(key, data) (key[(unsigned)thr_self() % 256] = (data))
+#define tsd_getspecific(key, vptr) (vptr = key[(unsigned)thr_self() % 256])
+
+#define thread_atfork(prepare, parent, child) do {} while(0)
+
+#include <sysdeps/generic/malloc-machine.h>
+
+#endif /* !defined(_SOLARIS_MALLOC_MACHINE_H) */
diff --git a/opal/memory/ptmalloc2/sysdeps/solaris/thread-st.h b/opal/memory/ptmalloc2/sysdeps/solaris/thread-st.h
new file mode 100644
index 0000000000..dbb4b097e8
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/solaris/thread-st.h
@@ -0,0 +1,72 @@
+/*
+ * $Id:$
+ * Solaris version
+ * by Wolfram Gloger 2004
+ */
+
+#include <thread.h>
+#include <stdio.h>
+
+#ifndef STACKSIZE
+#define STACKSIZE	32768
+#endif
+
+struct thread_st {
+	char *sp;							/* stack pointer, can be 0 */
+	void (*func)(struct thread_st* st);	/* must be set by user */
+	thread_id id;
+	int flags;
+	struct user_data u;
+};
+
+static void
+thread_init(void)
+{
+	printf("Using Solaris threads.\n");
+}
+
+static void *
+thread_wrapper(void *ptr)
+{
+	struct thread_st *st = (struct thread_st*)ptr;
+
+	/*printf("begin %p\n", st->sp);*/
+	st->func(st);
+	/*printf("end %p\n", st->sp);*/
+	return NULL;
+}
+
+/* Create a thread. */
+static int
+thread_create(struct thread_st *st)
+{
+	st->flags = 0;
+	if(!st->sp)
+		st->sp = malloc(STACKSIZE);
+	if(!st->sp) return -1;
+	thr_create(st->sp, STACKSIZE, thread_wrapper, st, THR_NEW_LWP, &st->id);
+	return 0;
+}
+
+/* Wait for one of several subthreads to finish. */
+static void
+wait_for_thread(struct thread_st st[], int n_thr,
+				int (*end_thr)(struct thread_st*))
+{
+	int i;
+	thread_t id;
+
+	thr_join(0, &id, NULL);
+	for(i=0; i<n_thr; i++)
+		if(id == st[i].id) {
+			if(end_thr)
+				end_thr(&st[i]);
+			break;
+		}
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/sysdeps/sproc/malloc-machine.h b/opal/memory/ptmalloc2/sysdeps/sproc/malloc-machine.h
new file mode 100644
index 0000000000..dcccea3193
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/sproc/malloc-machine.h
@@ -0,0 +1,51 @@
+/* Basic platform-independent macro definitions for mutexes,
+   thread-specific data and parameters for malloc.
+   SGI threads (sprocs) version.
+   Copyright (C) 2004 Wolfram Gloger <wg@malloc.de>.
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that (i) the above copyright notices and this permission
+notice appear in all copies of the software and related documentation,
+and (ii) the name of Wolfram Gloger may not be used in any advertising
+or publicity relating to the software.
+
+THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
+INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
+OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef _SPROC_MALLOC_MACHINE_H
+#define _SPROC_MALLOC_MACHINE_H
+
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <abi_mutex.h>
+
+typedef abilock_t mutex_t;
+
+#define MUTEX_INITIALIZER          { 0 }
+#define mutex_init(m)              init_lock(m)
+#define mutex_lock(m)              (spin_lock(m), 0)
+#define mutex_trylock(m)           acquire_lock(m)
+#define mutex_unlock(m)            release_lock(m)
+
+typedef int tsd_key_t;
+int tsd_key_next;
+#define tsd_key_create(key, destr) ((*key) = tsd_key_next++)
+#define tsd_setspecific(key, data) (((void **)(&PRDA->usr_prda))[key] = data)
+#define tsd_getspecific(key, vptr) (vptr = ((void **)(&PRDA->usr_prda))[key])
+
+#define thread_atfork(prepare, parent, child) do {} while(0)
+
+#include <sysdeps/generic/malloc-machine.h>
+
+#endif /* !defined(_SPROC_MALLOC_MACHINE_H) */
diff --git a/opal/memory/ptmalloc2/sysdeps/sproc/thread-st.h b/opal/memory/ptmalloc2/sysdeps/sproc/thread-st.h
new file mode 100644
index 0000000000..a512f92130
--- /dev/null
+++ b/opal/memory/ptmalloc2/sysdeps/sproc/thread-st.h
@@ -0,0 +1,84 @@
+/*
+ * $Id:$
+ * sproc version
+ * by Wolfram Gloger 2001, 2004
+ */
+
+#include <stdio.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+
+#ifndef STACKSIZE
+#define STACKSIZE	32768
+#endif
+
+struct thread_st {
+	char *sp;							/* stack pointer, can be 0 */
+	void (*func)(struct thread_st* st);	/* must be set by user */
+	thread_id id;
+	int flags;
+	struct user_data u;
+};
+
+static void
+thread_init(void)
+{
+	printf("Using sproc() threads.\n");
+}
+
+static void
+thread_wrapper(void *ptr, size_t stack_len)
+{
+	struct thread_st *st = (struct thread_st*)ptr;
+
+	/*printf("begin %p\n", st->sp);*/
+	st->func(st);
+	/*printf("end %p\n", st->sp);*/
+}
+
+/* Create a thread. */
+static int
+thread_create(struct thread_st *st)
+{
+	st->flags = 0;
+	if(!st->sp)
+		st->sp = malloc(STACKSIZE);
+	if(!st->sp) return -1;
+	st->id = sprocsp(thread_wrapper, PR_SALL, st, st->sp+STACKSIZE, STACKSIZE);
+	if(st->id < 0) {
+		return -1;
+	}
+	return 0;
+}
+
+/* Wait for one of several subthreads to finish. */
+static void
+wait_for_thread(struct thread_st st[], int n_thr,
+				int (*end_thr)(struct thread_st*))
+{
+	int i;
+	int id;
+
+		int status = 0;
+		id = wait(&status);
+		if(status != 0) {
+			if(WIFSIGNALED(status))
+				printf("thread %id terminated by signal %d\n",
+					   id, WTERMSIG(status));
+			else
+				printf("thread %id exited with status %d\n",
+					   id, WEXITSTATUS(status));
+		}
+		for(i=0; i<n_thr; i++)
+			if(id == st[i].id) {
+				if(end_thr)
+					end_thr(&st[i]);
+				break;
+			}
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/t-test.h b/opal/memory/ptmalloc2/t-test.h
new file mode 100644
index 0000000000..a52829a41d
--- /dev/null
+++ b/opal/memory/ptmalloc2/t-test.h
@@ -0,0 +1,143 @@
+/*
+ * $Id: t-test.h,v 1.1 2004/11/04 14:32:21 wg Exp $
+ * by Wolfram Gloger 1996.
+ * Common data structures and functions for testing malloc performance.
+ */
+
+/* Testing level */
+#ifndef TEST
+#define TEST 0
+#endif
+
+/* For large allocation sizes, the time required by copying in
+   realloc() can dwarf all other execution times.  Avoid this with a
+   size threshold. */
+#ifndef REALLOC_MAX
+#define REALLOC_MAX	2000
+#endif
+
+struct bin {
+	unsigned char *ptr;
+	unsigned long size;
+};
+
+#if TEST > 0
+
+static void
+mem_init(unsigned char *ptr, unsigned long size)
+{
+	unsigned long i, j;
+
+	if(size == 0) return;
+	for(i=0; i<size; i+=2047) {
+		j = (unsigned long)ptr ^ i;
+		ptr[i] = ((j ^ (j>>8)) & 0xFF);
+	}
+	j = (unsigned long)ptr ^ (size-1);
+	ptr[size-1] = ((j ^ (j>>8)) & 0xFF);
+}
+
+static int
+mem_check(unsigned char *ptr, unsigned long size)
+{
+	unsigned long i, j;
+
+	if(size == 0) return 0;
+	for(i=0; i<size; i+=2047) {
+		j = (unsigned long)ptr ^ i;
+		if(ptr[i] != ((j ^ (j>>8)) & 0xFF)) return 1;
+	}
+	j = (unsigned long)ptr ^ (size-1);
+	if(ptr[size-1] != ((j ^ (j>>8)) & 0xFF)) return 2;
+	return 0;
+}
+
+static int
+zero_check(unsigned* ptr, unsigned long size)
+{
+	unsigned char* ptr2;
+
+	while(size >= sizeof(*ptr)) {
+		if(*ptr++ != 0)
+			return -1;
+		size -= sizeof(*ptr);
+	}
+	ptr2 = (unsigned char*)ptr;
+	while(size > 0) {
+		if(*ptr2++ != 0)
+			return -1;
+		--size;
+	}
+	return 0;
+}
+
+#endif /* TEST > 0 */
+
+/* Allocate a bin with malloc(), realloc() or memalign().  r must be a
+   random number >= 1024. */
+
+static void
+bin_alloc(struct bin *m, unsigned long size, int r)
+{
+#if TEST > 0
+	if(mem_check(m->ptr, m->size)) {
+		printf("memory corrupt!\n");
+		exit(1);
+	}
+#endif
+	r %= 1024;
+	/*printf("%d ", r);*/
+	if(r < 4) { /* memalign */
+		if(m->size > 0) free(m->ptr);
+		m->ptr = (unsigned char *)memalign(sizeof(int) << r, size);
+	} else if(r < 20) { /* calloc */
+		if(m->size > 0) free(m->ptr);
+		m->ptr = (unsigned char *)calloc(size, 1);
+#if TEST > 0
+		if(zero_check((unsigned*)m->ptr, size)) {
+			long i;
+			for(i=0; i<size; i++)
+				if(m->ptr[i] != 0)
+					break;
+			printf("calloc'ed memory non-zero (ptr=%p, i=%ld)!\n", m->ptr, i);
+			exit(1);
+		}
+#endif
+	} else if(r < 100 && m->size < REALLOC_MAX) { /* realloc */
+		if(m->size == 0) m->ptr = NULL;
+		m->ptr = realloc(m->ptr, size);
+	} else { /* plain malloc */
+		if(m->size > 0) free(m->ptr);
+		m->ptr = (unsigned char *)malloc(size);
+	}
+	if(!m->ptr) {
+		printf("out of memory (r=%d, size=%ld)!\n", r, (long)size);
+		exit(1);
+	}
+	m->size = size;
+#if TEST > 0
+	mem_init(m->ptr, m->size);
+#endif
+}
+
+/* Free a bin. */
+
+static void
+bin_free(struct bin *m)
+{
+	if(m->size == 0) return;
+#if TEST > 0
+	if(mem_check(m->ptr, m->size)) {
+		printf("memory corrupt!\n");
+		exit(1);
+	}
+#endif
+	free(m->ptr);
+	m->size = 0;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/t-test1.c b/opal/memory/ptmalloc2/t-test1.c
new file mode 100644
index 0000000000..15dc7c6aae
--- /dev/null
+++ b/opal/memory/ptmalloc2/t-test1.c
@@ -0,0 +1,285 @@
+/*
+ * $Id: t-test1.c,v 1.2 2004/11/04 14:58:45 wg Exp $
+ * by Wolfram Gloger 1996-1999, 2001, 2004
+ * A multi-thread test for malloc performance, maintaining one pool of
+ * allocated bins per thread.
+ */
+
+#if (defined __STDC__ && __STDC__) || defined __cplusplus
+# include <stdlib.h>
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#if !USE_MALLOC
+#include <malloc.h>
+#else
+#include "malloc.h"
+#endif
+
+#include "lran2.h"
+#include "t-test.h"
+
+struct user_data {
+	int bins, max;
+	unsigned long size;
+	long seed;
+};
+#include "thread-st.h"
+
+#define N_TOTAL		10
+#ifndef N_THREADS
+#define N_THREADS	2
+#endif
+#ifndef N_TOTAL_PRINT
+#define N_TOTAL_PRINT 50
+#endif
+#ifndef MEMORY
+#define MEMORY		8000000l
+#endif
+#define SIZE		10000
+#define I_MAX		10000
+#define ACTIONS_MAX	30
+#ifndef TEST_FORK
+#define TEST_FORK 0
+#endif
+
+#define RANDOM(d,s)	(lran2(d) % (s))
+
+struct bin_info {
+	struct bin *m;
+	unsigned long size, bins;
+};
+
+#if TEST > 0
+
+void
+bin_test(struct bin_info *p)
+{
+	int b;
+
+	for(b=0; b<p->bins; b++) {
+		if(mem_check(p->m[b].ptr, p->m[b].size)) {
+			printf("memory corrupt!\n");
+			abort();
+		}
+	}
+}
+
+#endif
+
+void
+malloc_test(struct thread_st *st)
+{
+	int b, i, j, actions, pid = 1;
+	struct bin_info p;
+	struct lran2_st ld; /* data for random number generator */
+
+	lran2_init(&ld, st->u.seed);
+#if TEST_FORK>0
+	if(RANDOM(&ld, TEST_FORK) == 0) {
+		int status;
+
+#if !USE_THR
+		pid = fork();
+#else
+		pid = fork1();
+#endif
+		if(pid > 0) {
+		    /*printf("forked, waiting for %d...\n", pid);*/
+			waitpid(pid, &status, 0);
+			printf("done with %d...\n", pid);
+			if(!WIFEXITED(status)) {
+				printf("child term with signal %d\n", WTERMSIG(status));
+				exit(1);
+			}
+			return;
+		}
+		exit(0);
+	}
+#endif
+	p.m = (struct bin *)malloc(st->u.bins*sizeof(*p.m));
+	p.bins = st->u.bins;
+	p.size = st->u.size;
+	for(b=0; b<p.bins; b++) {
+		p.m[b].size = 0;
+		p.m[b].ptr = NULL;
+		if(RANDOM(&ld, 2) == 0)
+			bin_alloc(&p.m[b], RANDOM(&ld, p.size) + 1, lran2(&ld));
+	}
+	for(i=0; i<=st->u.max;) {
+#if TEST > 1
+		bin_test(&p);
+#endif
+		actions = RANDOM(&ld, ACTIONS_MAX);
+#if USE_MALLOC && MALLOC_DEBUG
+		if(actions < 2) { mallinfo(); }
+#endif
+		for(j=0; j<actions; j++) {
+			b = RANDOM(&ld, p.bins);
+			bin_free(&p.m[b]);
+		}
+		i += actions;
+		actions = RANDOM(&ld, ACTIONS_MAX);
+		for(j=0; j<actions; j++) {
+			b = RANDOM(&ld, p.bins);
+			bin_alloc(&p.m[b], RANDOM(&ld, p.size) + 1, lran2(&ld));
+#if TEST > 2
+			bin_test(&p);
+#endif
+		}
+#if 0 /* Test illegal free()s while setting MALLOC_CHECK_ */
+		for(j=0; j<8; j++) {
+			b = RANDOM(&ld, p.bins);
+			if(p.m[b].ptr) {
+			  int offset = (RANDOM(&ld, 11) - 5)*8;
+			  char *rogue = (char*)(p.m[b].ptr) + offset;
+			  /*printf("p=%p rogue=%p\n", p.m[b].ptr, rogue);*/
+			  free(rogue);
+			}
+		}
+#endif
+		i += actions;
+	}
+	for(b=0; b<p.bins; b++)
+		bin_free(&p.m[b]);
+	free(p.m);
+	if(pid == 0)
+		exit(0);
+}
+
+int n_total=0, n_total_max=N_TOTAL, n_running;
+
+int
+my_end_thread(struct thread_st *st)
+{
+	/* Thread st has finished.  Start a new one. */
+#if 0
+	printf("Thread %lx terminated.\n", (long)st->id);
+#endif
+	if(n_total >= n_total_max) {
+		n_running--;
+	} else if(st->u.seed++, thread_create(st)) {
+		printf("Creating thread #%d failed.\n", n_total);
+	} else {
+		n_total++;
+		if(n_total%N_TOTAL_PRINT == 0)
+			printf("n_total = %d\n", n_total);
+	}
+	return 0;
+}
+
+#if 0
+/* Protect address space for allocation of n threads by LinuxThreads.  */
+static void
+protect_stack(int n)
+{
+	char buf[2048*1024];
+	char* guard;
+	size_t guard_size = 2*2048*1024UL*(n+2);
+
+	buf[0] = '\0';
+	guard = (char*)(((unsigned long)buf - 4096)& ~4095UL) - guard_size;
+	printf("Setting up stack guard at %p\n", guard);
+	if(mmap(guard, guard_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
+			-1, 0)
+	   != guard)
+		printf("failed!\n");
+}
+#endif
+
+int
+main(int argc, char *argv[])
+{
+	int i, bins;
+	int n_thr=N_THREADS;
+	int i_max=I_MAX;
+	unsigned long size=SIZE;
+	struct thread_st *st;
+
+#if USE_MALLOC && USE_STARTER==2
+	ptmalloc_init();
+	printf("ptmalloc_init\n");
+#endif
+
+	if(argc > 1) n_total_max = atoi(argv[1]);
+	if(n_total_max < 1) n_thr = 1;
+	if(argc > 2) n_thr = atoi(argv[2]);
+	if(n_thr < 1) n_thr = 1;
+	if(n_thr > 100) n_thr = 100;
+	if(argc > 3) i_max = atoi(argv[3]);
+
+	if(argc > 4) size = atol(argv[4]);
+	if(size < 2) size = 2;
+
+	bins = MEMORY/(size*n_thr);
+	if(argc > 5) bins = atoi(argv[5]);
+	if(bins < 4) bins = 4;
+
+	/*protect_stack(n_thr);*/
+
+	thread_init();
+	printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n",
+		   n_total_max, n_thr, i_max, size, bins);
+
+	st = (struct thread_st *)malloc(n_thr*sizeof(*st));
+	if(!st) exit(-1);
+
+#if !defined NO_THREADS && (defined __sun__ || defined sun)
+	/* I know of no other way to achieve proper concurrency with Solaris. */
+	thr_setconcurrency(n_thr);
+#endif
+
+	/* Start all n_thr threads. */
+	for(i=0; i<n_thr; i++) {
+		st[i].u.bins = bins;
+		st[i].u.max = i_max;
+		st[i].u.size = size;
+		st[i].u.seed = ((long)i_max*size + i) ^ bins;
+		st[i].sp = 0;
+		st[i].func = malloc_test;
+		if(thread_create(&st[i])) {
+			printf("Creating thread #%d failed.\n", i);
+			n_thr = i;
+			break;
+		}
+		printf("Created thread %lx.\n", (long)st[i].id);
+	}
+
+	/* Start an extra thread so we don't run out of stacks. */
+	if(0) {
+		struct thread_st lst;
+		lst.u.bins = 10; lst.u.max = 20; lst.u.size = 8000; lst.u.seed = 8999;
+		lst.sp = 0;
+		lst.func = malloc_test;
+		if(thread_create(&lst)) {
+			printf("Creating thread #%d failed.\n", i);
+		} else {
+			wait_for_thread(&lst, 1, NULL);
+		}
+	}
+
+	for(n_running=n_total=n_thr; n_running>0;) {
+		wait_for_thread(st, n_thr, my_end_thread);
+	}
+	for(i=0; i<n_thr; i++) {
+		free(st[i].sp);
+	}
+	free(st);
+#if USE_MALLOC
+	malloc_stats();
+#endif
+	printf("Done.\n");
+	return 0;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/t-test2.c b/opal/memory/ptmalloc2/t-test2.c
new file mode 100644
index 0000000000..9620563b3e
--- /dev/null
+++ b/opal/memory/ptmalloc2/t-test2.c
@@ -0,0 +1,231 @@
+/*
+ * $Id: t-test2.c,v 1.3 2004/11/04 15:01:05 wg Exp $
+ * by Wolfram Gloger 1996-1999, 2001, 2004
+ * A multi-thread test for malloc performance, maintaining a single
+ * global pool of allocated bins.
+ */
+
+#if (defined __STDC__ && __STDC__) || defined __cplusplus
+# include <stdlib.h>
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#if !USE_MALLOC
+#include <malloc.h>
+#else
+#include "malloc.h"
+#endif
+
+#include "lran2.h"
+#include "t-test.h"
+
+struct user_data {
+	int max;
+	unsigned long size;
+	long seed;
+};
+#include "thread-st.h"
+#include "malloc-machine.h" /* for mutex */
+
+#define N_TOTAL		10
+#ifndef N_THREADS
+#define N_THREADS	2
+#endif
+#ifndef N_TOTAL_PRINT
+#define N_TOTAL_PRINT 50
+#endif
+#define STACKSIZE	32768
+#ifndef MEMORY
+#define MEMORY		8000000l
+#endif
+#define SIZE		10000
+#define I_MAX		10000
+#define BINS_PER_BLOCK 20
+
+#define RANDOM(d,s)	(lran2(d) % (s))
+
+struct block {
+	struct bin b[BINS_PER_BLOCK];
+	mutex_t mutex;
+} *blocks;
+
+int n_blocks;
+
+#if TEST > 0
+
+void
+bin_test(void)
+{
+	int b, i;
+
+	for(b=0; b<n_blocks; b++) {
+		mutex_lock(&blocks[b].mutex);
+		for(i=0; i<BINS_PER_BLOCK; i++) {
+			if(mem_check(blocks[b].b[i].ptr, blocks[b].b[i].size)) {
+				printf("memory corrupt!\n");
+				exit(1);
+			}
+		}
+		mutex_unlock(&blocks[b].mutex);
+	}
+}
+
+#endif
+
+void
+malloc_test(struct thread_st *st)
+{
+	struct block *bl;
+	int i, b, r;
+	struct lran2_st ld; /* data for random number generator */
+	unsigned long rsize[BINS_PER_BLOCK];
+	int rnum[BINS_PER_BLOCK];
+
+	lran2_init(&ld, st->u.seed);
+	for(i=0; i<=st->u.max;) {
+#if TEST > 1
+		bin_test();
+#endif
+		bl = &blocks[RANDOM(&ld, n_blocks)];
+		r = RANDOM(&ld, 1024);
+		if(r < 200) { /* free only */
+			mutex_lock(&bl->mutex);
+			for(b=0; b<BINS_PER_BLOCK; b++)
+				bin_free(&bl->b[b]);
+			mutex_unlock(&bl->mutex);
+			i += BINS_PER_BLOCK;
+		} else { /* alloc/realloc */
+			/* Generate random numbers in advance. */
+			for(b=0; b<BINS_PER_BLOCK; b++) {
+				rsize[b] = RANDOM(&ld, st->u.size) + 1;
+				rnum[b] = lran2(&ld);
+			}
+			mutex_lock(&bl->mutex);
+			for(b=0; b<BINS_PER_BLOCK; b++)
+				bin_alloc(&bl->b[b], rsize[b], rnum[b]);
+			mutex_unlock(&bl->mutex);
+			i += BINS_PER_BLOCK;
+		}
+#if TEST > 2
+		bin_test();
+#endif
+	}
+}
+
+int n_total=0, n_total_max=N_TOTAL, n_running;
+
+int
+my_end_thread(struct thread_st *st)
+{
+	/* Thread st has finished.  Start a new one. */
+#if 0
+	printf("Thread %lx terminated.\n", (long)st->id);
+#endif
+	if(n_total >= n_total_max) {
+		n_running--;
+	} else if(st->u.seed++, thread_create(st)) {
+		printf("Creating thread #%d failed.\n", n_total);
+	} else {
+		n_total++;
+		if(n_total%N_TOTAL_PRINT == 0)
+			printf("n_total = %d\n", n_total);
+	}
+	return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+	int i, j, bins;
+	int n_thr=N_THREADS;
+	int i_max=I_MAX;
+	unsigned long size=SIZE;
+	struct thread_st *st;
+
+#if USE_MALLOC && USE_STARTER==2
+	ptmalloc_init();
+	printf("ptmalloc_init\n");
+#endif
+
+	if(argc > 1) n_total_max = atoi(argv[1]);
+	if(n_total_max < 1) n_thr = 1;
+	if(argc > 2) n_thr = atoi(argv[2]);
+	if(n_thr < 1) n_thr = 1;
+	if(n_thr > 100) n_thr = 100;
+	if(argc > 3) i_max = atoi(argv[3]);
+
+	if(argc > 4) size = atol(argv[4]);
+	if(size < 2) size = 2;
+
+	bins = MEMORY/size;
+	if(argc > 5) bins = atoi(argv[5]);
+	if(bins < BINS_PER_BLOCK) bins = BINS_PER_BLOCK;
+
+	n_blocks = bins/BINS_PER_BLOCK;
+	blocks = (struct block *)malloc(n_blocks*sizeof(*blocks));
+	if(!blocks)
+		exit(1);
+
+	thread_init();
+	printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n",
+		   n_total_max, n_thr, i_max, size, n_blocks*BINS_PER_BLOCK);
+
+	for(i=0; i<n_blocks; i++) {
+		mutex_init(&blocks[i].mutex);
+		for(j=0; j<BINS_PER_BLOCK; j++) blocks[i].b[j].size = 0;
+	}
+
+	st = (struct thread_st *)malloc(n_thr*sizeof(*st));
+	if(!st) exit(-1);
+
+#if !defined NO_THREADS && (defined __sun__ || defined sun)
+	/* I know of no other way to achieve proper concurrency with Solaris. */
+	thr_setconcurrency(n_thr);
+#endif
+
+	/* Start all n_thr threads. */
+	for(i=0; i<n_thr; i++) {
+		st[i].u.max = i_max;
+		st[i].u.size = size;
+		st[i].u.seed = ((long)i_max*size + i) ^ n_blocks;
+		st[i].sp = 0;
+		st[i].func = malloc_test;
+		if(thread_create(&st[i])) {
+			printf("Creating thread #%d failed.\n", i);
+			n_thr = i;
+			break;
+		}
+		printf("Created thread %lx.\n", (long)st[i].id);
+	}
+
+	for(n_running=n_total=n_thr; n_running>0;) {
+		wait_for_thread(st, n_thr, my_end_thread);
+	}
+
+	for(i=0; i<n_blocks; i++) {
+		for(j=0; j<BINS_PER_BLOCK; j++)
+			bin_free(&blocks[i].b[j]);
+	}
+
+	for(i=0; i<n_thr; i++) {
+		free(st[i].sp);
+	}
+	free(st);
+	free(blocks);
+#if USE_MALLOC
+	malloc_stats();
+#endif
+	printf("Done.\n");
+	return 0;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/tst-mallocstate.c b/opal/memory/ptmalloc2/tst-mallocstate.c
new file mode 100644
index 0000000000..97a10586d3
--- /dev/null
+++ b/opal/memory/ptmalloc2/tst-mallocstate.c
@@ -0,0 +1,82 @@
+/* Copyright (C) 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>, 2001.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include <stdio.h>
+#include "malloc.h"
+
+static int errors = 0;
+
+static void
+merror (const char *msg)
+{
+  ++errors;
+  printf ("Error: %s\n", msg);
+}
+
+int
+main (void)
+{
+  void *p1, *p2;
+  void *save_state;
+  long i;
+
+  errno = 0;
+
+  p1 = malloc (10);
+  if (p1 == NULL)
+    merror ("malloc (10) failed.");
+
+  p2 = malloc (20);
+  if (p2 == NULL)
+    merror ("malloc (20) failed.");
+
+  free (malloc (10));
+
+  for (i=0; i<100; ++i)
+    {
+      save_state = malloc_get_state ();
+      if (save_state == NULL)
+	{
+	  merror ("malloc_get_state () failed.");
+	  break;
+	}
+      /*free (malloc (10)); This could change the top chunk! */
+      malloc_set_state (save_state);
+      p1 = realloc (p1, i*4 + 4);
+      if (p1 == NULL)
+	merror ("realloc (i*4) failed.");
+      free (save_state);
+    }
+
+  p1 = realloc (p1, 40);
+  free (p2);
+  p2 = malloc (10);
+  if (p2 == NULL)
+    merror ("malloc (10) failed.");
+  free (p1);
+
+  return errors != 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */
diff --git a/opal/memory/ptmalloc2/tst-mstats.c b/opal/memory/ptmalloc2/tst-mstats.c
new file mode 100644
index 0000000000..5d84312841
--- /dev/null
+++ b/opal/memory/ptmalloc2/tst-mstats.c
@@ -0,0 +1,100 @@
+/* Copyright (C) 2004 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wg@malloc.de>, 2004.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include <stdio.h>
+#include "malloc.h"
+
+static int errors = 0;
+
+static void
+merror (const char *msg)
+{
+  ++errors;
+  printf ("Error: %s\n", msg);
+}
+
+int
+main (void)
+{
+  void *p1, *p2;
+  long i;
+  mstate a;
+  struct malloc_arena_info mai;
+  int nfree;
+  unsigned long navail;
+
+  errno = 0;
+
+  malloc_stats(); /* check that it works even without initialization */
+  a = _int_get_arena(0);
+  if (!a) {
+    merror ("Can't get main arena.");
+    return 1;
+  }
+  free (malloc (10));
+  _int_get_arena_info(a, &mai);
+  printf("nfree     = %d\navail     = %lu\nfastavail = %lu\ntop_size  = %lu\n",
+	 mai.nbinblocks + mai.nfastblocks,
+	 (unsigned long)mai.binavail,
+	 (unsigned long)mai.fastavail,
+	 (unsigned long)mai.top_size);
+  if (mai.nfastblocks+mai.nbinblocks < 1)
+    merror ("initial _int_get_arena_info() failed.");
+  nfree = mai.nbinblocks + mai.nfastblocks;
+  navail = mai.binavail + mai.fastavail;
+
+  p1 = malloc (10);
+  if (p1 == NULL)
+    merror ("malloc (10) failed.");
+  p2 = malloc (30);
+  if (p2 == NULL)
+    merror ("malloc (30) failed.");
+
+  free (malloc (10));
+
+  for (i=0; i<100; ++i)
+    {
+      p1 = realloc (p1, i*7 + 3);
+      if (p1 == NULL)
+	merror ("realloc (i*7 + 3) failed.");
+    }
+  free (p2);
+
+  _int_get_arena_info(a, &mai);
+  printf("nfree     = %d\navail     = %lu\nfastavail = %lu\ntop_size  = %lu\n",
+	 mai.nbinblocks + mai.nfastblocks,
+	 (unsigned long)mai.binavail,
+	 (unsigned long)mai.fastavail,
+	 (unsigned long)mai.top_size);
+  /* Assume that no memory is returned to the system from these small
+     chunks.  */
+  if (mai.nbinblocks+mai.nfastblocks < nfree ||
+      mai.binavail+mai.fastavail < navail)
+    merror ("final _int_get_arena_info() failed.");
+  malloc_stats();
+
+  return errors != 0;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * End:
+ */