Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ libcfa/src/Makefile.am	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -72,4 +72,5 @@
 	common.hfa \
 	fstream.hfa \
+	heap.hfa \
 	iostream.hfa \
 	iterator.hfa \
@@ -101,7 +102,5 @@
 	startup.hfa \
 	virtual.c \
-	virtual.h \
-	heap.cc \
-	heap.h
+	virtual.h
 
 # not all platforms support concurrency, add option do disable it
@@ -173,5 +172,4 @@
 
 -include $(libdeps)
--include $(DEPDIR)/heap.Plo
 
 thread_libdeps = $(join \
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ libcfa/src/concurrency/coroutine.cfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -85,9 +85,6 @@
 // minimum feasible stack size in bytes
 static const size_t MinStackSize = 1000;
-
-extern "C" {
-	extern size_t __cfa_page_size;				// architecture pagesize HACK, should go in proper runtime singleton
-	extern int __map_prot;
-}
+extern size_t __page_size;				// architecture pagesize HACK, should go in proper runtime singleton
+extern int __map_prot;
 
 void __stack_prepare( __stack_info_t * this, size_t create_size );
@@ -160,22 +157,22 @@
 [void *, size_t] __stack_alloc( size_t storageSize ) {
 	const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
-	assert(__cfa_page_size != 0l);
+	assert(__page_size != 0l);
 	size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
-	size = ceiling(size, __cfa_page_size);
+	size = ceiling(size, __page_size);
 
 	// If we are running debug, we also need to allocate a guardpage to catch stack overflows.
 	void * storage;
 	#if CFA_COROUTINE_USE_MMAP
-		storage = mmap(0p, size + __cfa_page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		storage = mmap(0p, size + __page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 		if(storage == ((void*)-1)) {
 			abort( "coroutine stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
 		}
-		if ( mprotect( storage, __cfa_page_size, PROT_NONE ) == -1 ) {
+		if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
 			abort( "coroutine stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 		} // if
-		storage = (void *)(((intptr_t)storage) + __cfa_page_size);
+		storage = (void *)(((intptr_t)storage) + __page_size);
 	#else
 		__cfaabi_dbg_debug_do(
-			storage = memalign( __cfa_page_size, size + __cfa_page_size );
+			storage = memalign( __page_size, size + __page_size );
 		);
 		__cfaabi_dbg_no_debug_do(
@@ -184,8 +181,8 @@
 
 		__cfaabi_dbg_debug_do(
-			if ( mprotect( storage, __cfa_page_size, PROT_NONE ) == -1 ) {
+			if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
 				abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
 			}
-			storage = (void *)(((intptr_t)storage) + __cfa_page_size);
+			storage = (void *)(((intptr_t)storage) + __page_size);
 		);
 	#endif
@@ -201,12 +198,12 @@
 	#if CFA_COROUTINE_USE_MMAP
 		size_t size = ((intptr_t)this->storage->base) - ((intptr_t)this->storage->limit) + sizeof(__stack_t);
-		storage = (void *)(((intptr_t)storage) - __cfa_page_size);
-		if(munmap(storage, size + __cfa_page_size) == -1) {
+		storage = (void *)(((intptr_t)storage) - __page_size);
+		if(munmap(storage, size + __page_size) == -1) {
 			abort( "coroutine stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
 		}
 	#else
 		__cfaabi_dbg_debug_do(
-			storage = (char*)(storage) - __cfa_page_size;
-			if ( mprotect( storage, __cfa_page_size, __map_prot ) == -1 ) {
+			storage = (char*)(storage) - __page_size;
+			if ( mprotect( storage, __page_size, __map_prot ) == -1 ) {
 				abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
 			}
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -122,7 +122,8 @@
 extern "C" {
 	struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
-	extern size_t __cfa_page_size;
-	extern int __map_prot;
-}
+}
+
+extern size_t __page_size;
+extern int __map_prot;
 
 //-----------------------------------------------------------------------------
@@ -573,4 +574,5 @@
 }
 
+extern size_t __page_size;
 void ^?{}(processor & this) with( this ){
 	/* paranoid */ verify( !__atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) );
@@ -738,17 +740,17 @@
 	void * stack;
 	#if CFA_PROCESSOR_USE_MMAP
-		stacksize = ceiling( stacksize, __cfa_page_size ) + __cfa_page_size;
+		stacksize = ceiling( stacksize, __page_size ) + __page_size;
 		stack = mmap(0p, stacksize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 		if(stack == ((void*)-1)) {
 			abort( "pthread stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
 		}
-		if ( mprotect( stack, __cfa_page_size, PROT_NONE ) == -1 ) {
+		if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
 			abort( "pthread stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 		} // if
 	#else
 		__cfaabi_dbg_debug_do(
-			stack = memalign( __cfa_page_size, stacksize + __cfa_page_size );
+			stack = memalign( __page_size, stacksize + __page_size );
 			// pthread has no mechanism to create the guard page in user supplied stack.
-			if ( mprotect( stack, __cfa_page_size, PROT_NONE ) == -1 ) {
+			if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
 				abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 			} // if
@@ -777,5 +779,5 @@
 		check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
 		assert( stacksize >= PTHREAD_STACK_MIN );
-		stacksize += __cfa_page_size;
+		stacksize += __page_size;
 
 		if(munmap(stack, stacksize) == -1) {
@@ -785,5 +787,5 @@
 		__cfaabi_dbg_debug_do(
 			// pthread has no mechanism to create the guard page in user supplied stack.
-			if ( mprotect( stack, __cfa_page_size, __map_prot ) == -1 ) {
+			if ( mprotect( stack, __page_size, __map_prot ) == -1 ) {
 				abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 			} // if
Index: bcfa/src/heap.cc
===================================================================
--- libcfa/src/heap.cc	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ 	(revision )
@@ -1,1717 +1,0 @@
-#include "heap.h"
-
-#include <algorithm>									// lower_bound, min
-#include <cstring>										// strlen, memset, memcpy
-#include <climits>										// ULONG_MAX
-#include <cstdarg>										// va_start, va_end
-#include <cerrno>										// errno, ENOMEM, EINVAL
-#include <cassert>
-#include <unistd.h>										// STDERR_FILENO, sbrk, sysconf, write
-#include <sys/mman.h>									// mmap, munmap
-#include <cstdint>										// uintptr_t, uint64_t, uint32_t
-
-#define TLS
-#define AWAYSPIN										// toggle spinlock / lockfree stack
-#define FASTLOOKUP										// use O(1) table lookup from allocation size to bucket size
-#define CFA_THREADSAFE_HEAP
-
-#ifdef __CFA_DEBUG__
-#define __DEBUG__
-#endif // __CFA_DEBUG__
-
-#define LIKELY(x) __builtin_expect(!!(x), 1)
-#define UNLIKELY(x) __builtin_expect(!!(x), 0)
-
-#define str(s) #s
-#define xstr(s) str(s)
-#define WARNING( s ) xstr( GCC diagnostic ignored str( -W ## s ) )
-#define NOWARNING( statement, warning ) \
-	_Pragma( "GCC diagnostic push" ) \
-	_Pragma( WARNING( warning ) ) \
-	statement ;	\
-	_Pragma ( "GCC diagnostic pop" )
-
-enum { __ALIGN__ = 16,									// minimum allocation alignment, bytes
-	   __DEFAULT_HEAP_EXPANSION__ = 2 * 1024 * 1024,	// sbrk extension amount when full
-	   __DEFAULT_MMAP_START__ = 512 * 1024 + 1,			// crossover allocation size from sbrk to mmap
-	   __DEFAULT_HEAP_UNFREED__ = 0,					// amount subtracted to adjust for unfreed program storage
-}; // enum
-
-#ifdef TLS
-#define TLSMODEL __attribute__(( tls_model("initial-exec") ))
-#else
-#define TLSMODEL
-#endif // TLS
-
-#ifdef CFA_THREADSAFE_HEAP
-extern "C" {
-	void enable_interrupts();
-	void disable_interrupts();
-}
-#define ENABLE_INTERRUPTS (void)enable_interrupts()
-#define DISABLE_INTERRUPTS (void)disable_interrupts()
-#else
-#define ENABLE_INTERRUPTS
-#define DISABLE_INTERRUPTS
-#endif // CFA_THREADSAFE_HEAP
-
-//######################### Helpers #########################
-
-
-// Called by macro assert in assert.h. Replace to prevent recursive call to malloc.
-/*
-void __assert_fail( const char assertion[], const char file[], unsigned int line, const char function[] ) {
-	extern const char * __progname;						// global name of running executable (argv[0])
-	char helpText[1024];
-	int len = snprintf( helpText, sizeof(helpText), "Internal assertion error \"%s\" from program \"%s\" in \"%s\" at line %d in file \"%s.\n",
-						assertion, __progname, function, line, file );
-	NOWARNING( write( STDERR_FILENO, helpText, len ), unused-result );
-	abort();
-	// CONTROL NEVER REACHES HERE!
-} // __assert_fail
-FIXME */
-
-void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
-void abort( const char fmt[], ... ) {					// overload real abort
-	va_list args;
-	va_start( args, fmt );
-	vfprintf( stderr, fmt, args );
-	if ( fmt[strlen( fmt ) - 1] != '\n' ) {				// add optional newline if missing at the end of the format text
-		vfprintf( stderr, "\n", args );					// g++-10 does not allow nullptr for va_list
-	} // if
-	va_end( args );
-	abort();											// call the real abort
-	// CONTROL NEVER REACHES HERE!
-} // abort
-
-static inline bool Pow2( unsigned long int value ) {
-	// clears all bits below value, rounding value down to the next lower multiple of value
-	return (value & (value - 1)) == 0;
-} // Pow2
-
-static inline unsigned long int Floor( unsigned long int value, unsigned long int align ) {
-	assert( Pow2( align ) );
-	// clears all bits above or equal to align, getting (value % align), the phase of value with regards to align
-	return value & -align;
-} // Floor
-
-static inline unsigned long int Ceiling( unsigned long int value, unsigned long int align ) {
-	assert( Pow2( align ) );
-	// "negate, round down, negate" is the same as round up
-	return -Floor( -value, align );
-} // Ceiling
-
-template< typename T > static inline T AtomicFetchAdd( volatile T & counter, int increment ) {
-	return __atomic_fetch_add( &counter, increment, __ATOMIC_SEQ_CST );
-} // AtomicFetchAdd
-
-
-//######################### Spin Lock #########################
-
-
-#define CACHE_ALIGN 128									// Intel recommendation
-#define CALIGN __attribute__(( aligned(CACHE_ALIGN) ))
-
-// pause to prevent excess processor bus usage
-#if defined( __i386 ) || defined( __x86_64 )
-	#define Pause() __asm__ __volatile__ ( "pause" : : : )
-#elif defined(__ARM_ARCH)
-	#define Pause() __asm__ __volatile__ ( "YIELD" : : : )
-#else
-	#error unsupported architecture
-#endif
-
-typedef volatile uintptr_t SpinLock_t CALIGN;			// aligned addressable word-size
-
-void spin_acquire( volatile SpinLock_t * lock ) {
-	enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
-	unsigned int spin = SPIN_START;
-
-	for ( unsigned int i = 1;; i += 1 ) {
-	  if ( *lock == 0 && __atomic_test_and_set( lock, __ATOMIC_SEQ_CST ) == 0 ) break; // Fence
-		for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin
-		spin += spin;									// powers of 2
-		//if ( i % 64 == 0 ) spin += spin;				// slowly increase by powers of 2
-		if ( spin > SPIN_END ) spin = SPIN_END;			// cap spinning
-	} // for
-} // spin_lock
-
-void spin_release( volatile SpinLock_t * lock ) {
-	__atomic_clear( lock, __ATOMIC_SEQ_CST );			// Fence
-} // spin_unlock
-
-
-//####################### Heap Statistics ####################
-
-
-#ifdef __STATISTICS__
-enum { CntTriples = 12 };								// number of counter triples
-struct HeapStatistics {
-	enum { MALLOC, AALLOC, CALLOC, MEMALIGN, AMEMALIGN, CMEMALIGN, RESIZE, REALLOC };
-	union {
-		struct {
-			unsigned int malloc_calls, malloc_0_calls;
-			unsigned long long int malloc_storage_request, malloc_storage_alloc;
-			unsigned int aalloc_calls, aalloc_0_calls;
-			unsigned long long int aalloc_storage_request, aalloc_storage_alloc;
-			unsigned int calloc_calls, calloc_0_calls;
-			unsigned long long int calloc_storage_request, calloc_storage_alloc;
-			unsigned int memalign_calls, memalign_0_calls;
-			unsigned long long int memalign_storage_request, memalign_storage_alloc;
-			unsigned int amemalign_calls, amemalign_0_calls;
-			unsigned long long int amemalign_storage_request, amemalign_storage_alloc;
-			unsigned int cmemalign_calls, cmemalign_0_calls;
-			unsigned long long int cmemalign_storage_request, cmemalign_storage_alloc;
-			unsigned int resize_calls, resize_0_calls;
-			unsigned long long int resize_storage_request, resize_storage_alloc;
-			unsigned int realloc_calls, realloc_0_calls;
-			unsigned long long int realloc_storage_request, realloc_storage_alloc;
-			unsigned int free_calls, free_null_calls;
-			unsigned long long int free_storage_request, free_storage_alloc;
-			unsigned int away_pulls, away_pushes;
-			unsigned long long int away_storage_request, away_storage_alloc;
-			unsigned int mmap_calls, mmap_0_calls;		// no zero calls
-			unsigned long long int mmap_storage_request, mmap_storage_alloc;
-			unsigned int munmap_calls, munmap_0_calls;	// no zero calls
-			unsigned long long int munmap_storage_request, munmap_storage_alloc;
-		};
-		struct {										// overlay for iteration
-			unsigned int cnt1, cnt2;
-			unsigned long long int cnt3, cnt4;
-		} counters[CntTriples];
-	};
-
-	HeapStatistics() {
-		for ( unsigned int i = 0; i < CntTriples; i += 1 ) {
-			counters[i].cnt1 = counters[i].cnt2 = counters[i].cnt3 = counters[i].cnt4 = 0;
-		} // for
-	} // HeapStatistics::HeapStatistics
-
-	friend HeapStatistics & operator+=( HeapStatistics & lhs, const HeapStatistics & rhs ) {
-		for ( unsigned int i = 0; i < CntTriples; i += 1 ) {
-			lhs.counters[i].cnt1 += rhs.counters[i].cnt1;
-			lhs.counters[i].cnt2 += rhs.counters[i].cnt2;
-			lhs.counters[i].cnt3 += rhs.counters[i].cnt3;
-			lhs.counters[i].cnt4 += rhs.counters[i].cnt4;
-		} // for
-		return lhs;
-	} // HeapStatistics::operator+=
-}; // HeapStatistics
-
-static_assert( sizeof(HeapStatistics) == CntTriples * sizeof(HeapStatistics::counters[0] ),
-			   "Heap statistics counter-triplets does not match with array size" );
-#endif // __STATISTICS__
-
-
-//####################### Heap Structure ####################
-
-
-struct Heap {
-	struct FreeHeader;									// forward declaration
-
-	struct Storage {
-		struct Header {									// header
-			union Kind {
-				struct RealHeader {
-					union {
-						struct {						// 4-byte word => 8-byte header, 8-byte word => 16-byte header
-							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
-							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
-							#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
-
-							union {
-								// 2nd low-order bit => zero filled, 3rd low-order bit => mmapped
-								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
-								size_t blockSize;		// size for munmap (must overlay alignment)
-								Storage * next;			// freed block points to next freed block of same size
-							};
-							size_t size;				// allocation size in bytes
-
-							#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
-							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
-							#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
-						};
-					};
-				} real; // RealHeader
-				struct FakeHeader {
-					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-					uint32_t alignment;					// 1st low-order bit => fake header & alignment
-					#endif // __ORDER_LITTLE_ENDIAN__
-
-					uint32_t offset;
-
-					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-					uint32_t alignment;					// 1st low-order bit => fake header & alignment
-					#endif // __ORDER_BIG_ENDIAN__
-				} fake; // FakeHeader
-			} kind; // Kind
-		} header; // Header
-
-		char pad[__ALIGN__ - sizeof( Header )];
-		char data[0];									// storage
-	}; // Storage
-
-	static_assert( __ALIGN__ >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" );
-
-	struct FreeHeader {
-		#ifdef AWAYSPIN
-		SpinLock_t awayLock;							// LOCK(S) MUST BE FIRST FIELD(S) FOR ALIGNMENT
-		#endif // AWAYSPIN
-
-		Storage * freeList;								// thread free list
-		Storage * awayList;								// other thread return list
-
-		Heap * homeManager;								// heap owner (free storage to bucket, from bucket to heap)
-		size_t blockSize;								// size of allocations on this list
-
-		bool operator<( const size_t bsize ) const { return blockSize < bsize; }
-	}; // FreeHeader
-
-	// Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
-	// Break recursion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
-	enum {
-		#ifdef FASTLOOKUP
-		LookupSizes = 65'536 + sizeof(Storage),			// number of fast lookup sizes '
-		#endif // FASTLOOKUP
-		NoBucketSizes = 91,								// number of bucket sizes
-	}; // enum
-
-	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
-	void * heapBuffer;
-	size_t heapReserve;
-
-	Heap * nextHeapManager;								// intrusive link of existing heaps; traversed to collect statistics
-	Heap * nextFreeHeapManager;							// intrusive link of free heaps from terminated threads; reused by new threads
-
-	#ifdef __DEBUG__
-	long long int allocUnfreed;							// running total of allocations minus frees; can be negative
-	#endif // __DEBUG__
-
-	#ifdef __STATISTICS__
-	HeapStatistics stats;								// local statistic table for this heap
-	#endif // __STATISTICS__
-
-	static void heapManagerCtor(
-		#ifdef __DEBUG__
-		size_t size
-		#endif // __DEBUG__
-	);
-	static void heapManagerDtor();
-}; // Heap
-
-
-struct ThreadManager {
-	int dummy;											// used to trigger allocation of storage
-	~ThreadManager() { Heap::heapManagerDtor(); }		// called automagically when thread terminates
-}; // ThreadManager
-
-
-struct HeapMaster {
-	SpinLock_t masterExtLock;							// protects allocation-buffer extension
-	SpinLock_t masterMgrLock;							// protects freeHeapManagersList, heapManagersList, heapManagersStorage, heapManagersStorageEnd
-
-	#ifdef FASTLOOKUP
-	unsigned char lookup[Heap::LookupSizes];			// O(1) lookup for small sizes
-	#endif // FASTLOOKUP
-
-	static const unsigned int bucketSizes[];			// initialized statically, outside constructor
-	void * heapBegin;									// start of heap
-	void * heapEnd;										// logical end of heap
-	size_t heapRemaining;								// amount of storage not allocated in the current chunk
-	size_t pageSize;									// architecture pagesize
-	size_t heapExpand;									// sbrk advance
-	size_t mmapStart;									// cross over point for mmap
-	unsigned int maxBucketsUsed;						// maximum number of buckets in use
-	static const off_t mmapFd;							// fake or actual fd for anonymous file. initialized statically, outside constructor
-
-	Heap * heapManagersList;							// heap-list head
-	Heap * freeHeapManagersList;						// free-list head
-
-	// Heap superblocks are not linked; heaps in superblocks are linked via intrusive links.
-	Heap * heapManagersStorage;							// next heap to use in heap superblock
-	Heap * heapManagersStorageEnd;						// logical heap outside of superblock's end
-
-	#ifdef __STATISTICS__
-	unsigned long int threads_started, threads_exited;  // counts threads that have started and exited
-	unsigned long int reused_heap, new_heap;  			// counts reusability of heaps
-	unsigned int sbrk_calls;
-	unsigned long long int sbrk_storage;
-	int stats_fd;
-	HeapStatistics stats;								// global stats for thread-local heaps to add there counters when exiting
-	#endif // __STATISTICS__
-
-	// Prevents two threads from constructing heapMaster.
-	static volatile bool heapMasterBootFlag;			// trigger for first heap
-
-	#ifdef __DEBUG__
-	long long int allocUnfreed;
-	#endif // __DEBUG__
-
-	static void heapMasterCtor();
-	static void heapMasterDtor();
-}; // HeapMaster
-
-extern "C" {
-	int __map_prot = PROT_READ | PROT_WRITE | PROT_EXEC; 	// common mmap/mprotect protection
-	size_t __cfa_page_size;									// architecture pagesize
-}
-
-volatile bool HeapMaster::heapMasterBootFlag = false;
-static HeapMaster heapMaster;							// program global
-
-// Thread-local storage is allocated lazily when the storage is accessed.
-static thread_local size_t PAD1 CALIGN TLSMODEL __attribute__(( unused )); // protect false sharing
-static thread_local ThreadManager threadManager CALIGN TLSMODEL;
-// Do not put heapManager in ThreadManager because thread-local destructor results in extra access code.
-static thread_local Heap * heapManager CALIGN TLSMODEL;
-static thread_local bool heapManagerBootFlag CALIGN TLSMODEL = false;
-static thread_local size_t PAD2 CALIGN TLSMODEL __attribute__(( unused )); // protect further false sharing
-
-
-#ifdef __DEBUG__
-extern "C" {
-	void heapAppStart( void ) {
-		assert( heapManager );
-		heapManager->allocUnfreed = 0;
-	} // heapAppStart
-
-	void heapAppStop( void ) {
-		long long int allocUnfreed = heapMaster.allocUnfreed;
-		for ( Heap * heap = heapMaster.heapManagersList; heap; heap = heap->nextHeapManager ) {
-			allocUnfreed += heap->allocUnfreed;
-		} // for
-
-		allocUnfreed -= malloc_unfreed();
-		if ( allocUnfreed > 0 ) {
-			// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-			char helpText[512];
-			int len = snprintf( helpText, sizeof(helpText), "Runtime warning (UNIX pid:%ld) : program terminating with %llu(0x%llx) bytes of storage allocated but not freed.\n"
-								"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
-								(long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
-			NOWARNING( write( STDERR_FILENO, helpText, len ), unused-result );
-		} // if
-	} // heapAppStop
-} // extern "C"
-#endif // __DEBUG__
-
-
-// declare helper functions for HeapMaster
-void noMemory();										// forward, called by "builtin_new" when malloc returns 0
-
-void HeapMaster::heapMasterCtor() {
-	// Singleton pattern to initialize heap master
-	__cfa_page_size = sysconf( _SC_PAGESIZE );
-
-	assert( heapMaster.mmapFd == -1 );
-	assert( heapMaster.bucketSizes[0] == (16 + sizeof(Heap::Storage)) );
-
-	heapMaster.masterExtLock = 0;
-	heapMaster.masterMgrLock = 0;
-
-	char * end = (char *)sbrk( 0 );
-	heapMaster.heapBegin = heapMaster.heapEnd = sbrk( (char *)Ceiling( (long unsigned int)end, __ALIGN__ ) - end ); // move start of heap to multiple of alignment
-	heapMaster.heapRemaining = 0;
-	heapMaster.heapExpand = malloc_expansion();
-	heapMaster.mmapStart = malloc_mmap_start();
-
-	// find the closest bucket size less than or equal to the mmapStart size
-	heapMaster.maxBucketsUsed = std::lower_bound( heapMaster.bucketSizes, heapMaster.bucketSizes + (Heap::NoBucketSizes - 1), heapMaster.mmapStart ) - heapMaster.bucketSizes; // binary search
-
-	assert( (heapMaster.mmapStart >= __cfa_page_size) && (heapMaster.bucketSizes[Heap::NoBucketSizes - 1] >= heapMaster.mmapStart) );
-	assert( heapMaster.maxBucketsUsed < Heap::NoBucketSizes ); // subscript failure ?
-	assert( heapMaster.mmapStart <= heapMaster.bucketSizes[heapMaster.maxBucketsUsed] ); // search failure ?
-
-	heapMaster.heapManagersList = nullptr;
-	heapMaster.freeHeapManagersList = nullptr;
-
-	heapMaster.heapManagersStorage = nullptr;
-	heapMaster.heapManagersStorageEnd = nullptr;
-
-	#ifdef __STATISTICS__
-	heapMaster.threads_started = heapMaster.threads_exited = 0;
-	heapMaster.reused_heap = heapMaster.new_heap = 0;
-	heapMaster.sbrk_calls = heapMaster.sbrk_storage = 0;
-	heapMaster.stats_fd = STDERR_FILENO;
-	#endif // __STATISTICS__
-
-	#ifdef __DEBUG__
-	heapMaster.allocUnfreed = 0;
-	#endif // __DEBUG__
-
-	#ifdef FASTLOOKUP
-	for ( unsigned int i = 0, idx = 0; i < Heap::LookupSizes; i += 1 ) {
-		if ( i > heapMaster.bucketSizes[idx] ) idx += 1;
-		heapMaster.lookup[i] = idx;
-		assert( i <= heapMaster.bucketSizes[idx] );
-		assert( (i <= 32 && idx == 0) || (i > heapMaster.bucketSizes[idx - 1]) );
-	} // for
-	#endif // FASTLOOKUP
-
-	std::set_new_handler( noMemory );					// do not throw exception as the default
-
-	HeapMaster::heapMasterBootFlag = true;
-} // HeapMaster::heapMasterCtor
-
-
-#define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes."
-
-void Heap::heapManagerCtor(
-		#ifdef __DEBUG__
-		size_t size
-		#endif // __DEBUG__
-) {
-	if ( UNLIKELY( ! HeapMaster::heapMasterBootFlag ) ) HeapMaster::heapMasterCtor();
-
-	// Trigger thread_local storage implicit allocation (causes recursive call)
-	volatile int dummy __attribute__(( unused )) = threadManager.dummy;
-
-	spin_acquire( &heapMaster.masterMgrLock );			// protect heapMaster counters
-	// The atomic test-and-set instruction is a fence so heapManagerBootFlag is read after the magic recursive call to
-	// initialize thread-local storage. Hence, heapManagerBootFlag is NOT declared as volatile.
-  if ( heapManagerBootFlag ) {							// singleton
-		spin_release( &heapMaster.masterMgrLock );
-		return;											// always return on recursive initiation
-	} // if
-
-	assert( ! heapManagerBootFlag );
-
-	// get storage for heap manager
-
-	if ( heapMaster.freeHeapManagersList ) {			// free heap for reused ?
-		heapManager = heapMaster.freeHeapManagersList;
-		heapMaster.freeHeapManagersList = heapManager->nextFreeHeapManager;
-
-		#ifdef __STATISTICS__
-		heapMaster.reused_heap += 1;
-		#endif // __STATISTICS__
-	} else {											// free heap not found, create new
-		// Heap size is about 12K, FreeHeader (128 bytes because of cache alignment) * NoBucketSizes (91) => 128 heaps * 12K ~= 120K byte superblock.
-		// Where 128-heap superblock handles a medium sized multi-processor server.
-		enum { HeapDim = 128 };							// number of heaps in superblock
-		size_t remaining = heapMaster.heapManagersStorageEnd - heapMaster.heapManagersStorage; // remaining free heaps in superblock
-		if ( ! heapMaster.heapManagersStorage || remaining != 0 ) {
-			size_t size = HeapDim * sizeof( Heap );
-			heapMaster.heapManagersStorage = (Heap *)mmap( 0, size, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, heapMaster.mmapFd, 0 );
-			if ( UNLIKELY( heapMaster.heapManagersStorage == MAP_FAILED ) ) { // failed ?
-				if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, size ); // no memory
-				// Do not call strerror( errno ) as it may call malloc.
-				abort( "heapManagerCtor() : internal error, mmap failure, size:%zu error %d.",
-					   size, errno );
-			} // if
-			heapMaster.heapManagersStorageEnd = &heapMaster.heapManagersStorage[HeapDim]; // outside array
-		} // if
-
-		heapManager = heapMaster.heapManagersStorage;
-		heapMaster.heapManagersStorage = heapMaster.heapManagersStorage + 1; // bump next heap
-
-		heapManager->nextHeapManager = heapMaster.heapManagersList;
-		heapMaster.heapManagersList = heapManager;
-
-		#ifdef __STATISTICS__
-		heapMaster.new_heap += 1;
-		#endif // __STATISTICS__
-	} // if
-
-	#ifdef __STATISTICS__
-	heapMaster.threads_started += 1;
-	#endif // __STATISTICS__
-
-	#ifdef __DEBUG__
-	heapManager->allocUnfreed -= size;
-	#endif // __DEBUG__
-
-	spin_release( &heapMaster.masterMgrLock );
-
-	for ( unsigned int j = 0; j < Heap::NoBucketSizes; j += 1 ) { // initialize free lists
-		heapManager->freeLists[j] = (Heap::FreeHeader){
-			#ifdef AWAYSPIN
-			.awayLock = 0,
-			#endif // AWAYSPIN
-			.freeList = nullptr,
-			.awayList = nullptr,
-			.homeManager = heapManager,
-			.blockSize = heapMaster.bucketSizes[j],
-		};
-	} // for
-
-	heapManager->heapBuffer = nullptr;
-	heapManager->heapReserve = 0;
-	heapManager->nextFreeHeapManager = nullptr;
-	heapManagerBootFlag = true;
-} // Heap::heapManagerCtor
-
-
-void Heap::heapManagerDtor() {
-  if ( UNLIKELY( ! heapManagerBootFlag ) ) return;
-
-	DISABLE_INTERRUPTS;
-
-	spin_acquire( &heapMaster.masterMgrLock );
-
-	// place heap on list of free heaps for reusability
-	heapManager->nextFreeHeapManager = heapMaster.freeHeapManagersList;
-	heapMaster.freeHeapManagersList = heapManager;
-
-	// SKULLDUGGERY: The thread heap ends BEFORE the last free(s) occurs from the thread-local storage allocations for
-	// the thread. This final allocation must be handled in doFree for this thread and its terminated heap. However,
-	// this heap has just been put on the heap freelist, and hence there is a race returning the thread-local storage
-	// and a new thread using this heap. The current thread detects it is executing its last free in doFree via
-	// heapManager being null. The trick is for this thread to placed the last free onto the current heap's away-list as
-	// the free-storage header points are this heap. Now, even if other threads are pushing to the away list, it is safe
-	// because of the locking.
-	heapManager = nullptr;
-
-	#ifdef __STATISTICS__
-	heapMaster.threads_exited += 1;
-	#endif // __STATISTICS__
-
-	spin_release( &heapMaster.masterMgrLock );
-
-	ENABLE_INTERRUPTS;
-} // Heap::heapManagerDtor
-
-
-const off_t HeapMaster::mmapFd = -1;
-// Bucket size must be multiple of 16.
-// Powers of 2 are common allocation sizes, so make powers of 2 generate the minimum required size.
-const unsigned int HeapMaster::bucketSizes[] = {		// different bucket sizes
-	16 + sizeof(Heap::Storage), 32 + sizeof(Heap::Storage), 48 + sizeof(Heap::Storage), 64 + sizeof(Heap::Storage), // 4
-	96 + sizeof(Heap::Storage), 112 + sizeof(Heap::Storage), 128 + sizeof(Heap::Storage), // 3
-	160, 192, 224, 256 + sizeof(Heap::Storage), // 4
-	320, 384, 448, 512 + sizeof(Heap::Storage), // 4
-	640, 768, 896, 1'024 + sizeof(Heap::Storage), // 4
-	1'536, 2'048 + sizeof(Heap::Storage), // 2
-	2'560, 3'072, 3'584, 4'096 + sizeof(Heap::Storage), // 4
-	6'144, 8'192 + sizeof(Heap::Storage), // 2
-	9'216, 10'240, 11'264, 12'288, 13'312, 14'336, 15'360, 16'384 + sizeof(Heap::Storage), // 8
-	18'432, 20'480, 22'528, 24'576, 26'624, 28'672, 30'720, 32'768 + sizeof(Heap::Storage), // 8
-	36'864, 40'960, 45'056, 49'152, 53'248, 57'344, 61'440, 65'536 + sizeof(Heap::Storage), // 8
-	73'728, 81'920, 90'112, 98'304, 106'496, 114'688, 122'880, 131'072 + sizeof(Heap::Storage), // 8
-	147'456, 163'840, 180'224, 196'608, 212'992, 229'376, 245'760, 262'144 + sizeof(Heap::Storage), // 8
-	294'912, 327'680, 360'448, 393'216, 425'984, 458'752, 491'520, 524'288 + sizeof(Heap::Storage), // 8
-	655'360, 786'432, 917'504, 1'048'576 + sizeof(Heap::Storage), // 4
-	1'179'648, 1'310'720, 1'441'792, 1'572'864, 1'703'936, 1'835'008, 1'966'080, 2'097'152 + sizeof(Heap::Storage), // 8
-	2'621'440, 3'145'728, 3'670'016, 4'194'304 + sizeof(Heap::Storage), // 4
-};
-
-static_assert( Heap::NoBucketSizes == sizeof(HeapMaster::bucketSizes) / sizeof(HeapMaster::bucketSizes[0]), "size of bucket array wrong" );
-
-
-//####################### Memory Allocation Routines' Helpers ####################
-
-
-#ifdef __STATISTICS__
-static inline HeapStatistics & collectStats( HeapStatistics & stats ) {
-	spin_acquire( &heapMaster.masterMgrLock );
-
-	stats += heapMaster.stats;
-	for ( Heap * node = heapMaster.heapManagersList; node; node = node->nextHeapManager ) {
-		stats += node->stats;
-	} // for
-
-	spin_release(&heapMaster.masterMgrLock);
-	return stats;
-} // collectStats
-
-// Use "write" because streams may be shutdown when calls are made.
-static void printStats( HeapStatistics & stats ) {
-	char helpText[1024];
-	int len = snprintf( helpText, sizeof(helpText),
-						"\nHeap statistics: (storage request / allocation)\n"
-						"  malloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  aalloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  calloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  memalign  >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  amemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  cmemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  resize    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  realloc   >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-						"  free      !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n"
-						"  away      pulls %'u; pushes %'u; storage %'llu / %'llu bytes\n"
-						"  sbrk      calls %'u; storage %'llu bytes\n"
-						"  mmap      calls %'u; storage %'llu / %'llu bytes\n"
-						"  munmap    calls %'u; storage %'llu / %'llu bytes\n"
-						"  threads   started %'lu; exited %'lu\n"
-						"  heaps     new %'lu; reused %'lu\n",
-						stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc,
-						stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc,
-						stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc,
-						stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc,
-						stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc,
-						stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc,
-						stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc,
-						stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc,
-						stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc,
-						stats.away_pulls, stats.away_pushes, stats.away_storage_request, stats.away_storage_alloc,
-						heapMaster.sbrk_calls, heapMaster.sbrk_storage,
-						stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc,
-						stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc,
-						heapMaster.threads_started, heapMaster.threads_exited,
-						heapMaster.new_heap, heapMaster.reused_heap
-		);
-	NOWARNING( write( heapMaster.stats_fd, helpText, len ), unused-result );
-} // printStats
-
-
-static int printStatsXML( HeapStatistics & stats, FILE * stream ) {
-	char helpText[1024];
-	int len = snprintf( helpText, sizeof(helpText),
-						"<malloc version=\"1\">\n"
-						"<heap nr=\"0\">\n"
-						"<sizes>\n"
-						"</sizes>\n"
-						"<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"away\" pulls=\"%'u;\" 0 pushes=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n"
-						"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"threads\" started=\"%'lu;\" exited=\"%'lu\"/>\n"
-						"</malloc>",
-						stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc,
-						stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc,
-						stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc,
-						stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc,
-						stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc,
-						stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc,
-						stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc,
-						stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc,
-						stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc,
-						stats.away_pulls, stats.away_pushes, stats.away_storage_request, stats.away_storage_alloc,
-						heapMaster.sbrk_calls, heapMaster.sbrk_storage,
-						stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc,
-						stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc,
-						heapMaster.threads_started, heapMaster.threads_exited
-		);
-	NOWARNING( write( fileno(stream), helpText, len ), unused-result );
-	return len;
-} // printStatsXML
-#endif // __STATISTICS__
-
-
-inline void noMemory() {
-	abort( "Heap memory exhausted at %zu bytes.\n"
-		   "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
-		   ((char *)(sbrk( 0 )) - (char *)(heapMaster.heapBegin)) );
-} // noMemory
-
-
-static bool setMmapStart( size_t value ) {
-  if ( value < __cfa_page_size || heapMaster.bucketSizes[Heap::NoBucketSizes - 1] < value ) return false;
-	heapMaster.mmapStart = value;						// set global
-
-	// find the closest bucket size less than or equal to the mmapStart size
-	heapMaster.maxBucketsUsed = std::lower_bound( heapMaster.bucketSizes, heapMaster.bucketSizes + (Heap::NoBucketSizes - 1), heapMaster.mmapStart ) - heapMaster.bucketSizes; // binary search
-	assert( heapMaster.maxBucketsUsed < Heap::NoBucketSizes ); // subscript failure ?
-	assert( heapMaster.mmapStart <= heapMaster.bucketSizes[heapMaster.maxBucketsUsed] ); // search failure ?
-	return true;
-} // setMmapStart
-
-// <-------+----------------------------------------------------> bsize (bucket size)
-// |header |addr
-//==================================================================================
-//                   align/offset |
-// <-----------------<------------+-----------------------------> bsize (bucket size)
-//                   |fake-header | addr
-#define headerAddr( addr ) ((Heap::Storage::Header *)( (char *)addr - sizeof(Heap::Storage) ))
-#define realHeader( header ) ((Heap::Storage::Header *)((char *)header - header->kind.fake.offset))
-
-// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
-// |header |addr
-//==================================================================================
-//                   align/offset |
-// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
-//                   |fake-header |addr
-#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
-
-
-static inline void checkAlign( size_t alignment ) {
-	if ( UNLIKELY( alignment < __ALIGN__ || ! Pow2( alignment ) ) ) {
-		abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, __ALIGN__ );
-	} // if
-} // checkAlign
-
-
-static inline void checkHeader( bool check, const char name[], void * addr ) {
-	if ( UNLIKELY( check ) ) {							// bad address ?
-		abort( "Attempt to %s storage %p with address outside the heap.\n"
-			   "Possible cause is duplicate free on same block or overwriting of memory.",
-			   name, addr );
-	} // if
-} // checkHeader
-
-
-static inline void fakeHeader( Heap::Storage::Header *& header, size_t & alignment ) {
-	if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
-		alignment = header->kind.fake.alignment & -2;	// remove flag from value
-		#ifdef __DEBUG__
-		checkAlign( alignment );						// check alignment
-		#endif // __DEBUG__
-		header = realHeader( header );					// backup from fake to real header
-	} else {
-		alignment = __ALIGN__;							// => no fake header
-	} // if
-} // fakeHeader
-
-
-static inline bool headers( const char name[] __attribute__(( unused )), void * addr, Heap::Storage::Header *& header, Heap::FreeHeader *& freeHead, size_t & size, size_t & alignment ) {
-	header = headerAddr( addr );
-
-  if ( UNLIKELY( addr < heapMaster.heapBegin || heapMaster.heapEnd < addr ) ) { // mmapped ?
-		fakeHeader( header, alignment );
-		size = header->kind.real.blockSize & -3;		// mmap size
-		return true;
-	} // if
-
-	#ifdef __DEBUG__
-	checkHeader( header < heapMaster.heapBegin, name, addr ); // bad low address ?
-	#endif // __DEBUG__
-
-	// header may be safe to dereference
-	fakeHeader( header, alignment );
-	#ifdef __DEBUG__
-	checkHeader( header < heapMaster.heapBegin || heapMaster.heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
-	#endif // __DEBUG__
-
-	freeHead = (Heap::FreeHeader *)((size_t)header->kind.real.home & -3);
-	#ifdef __DEBUG__
-	Heap * homeManager = freeHead->homeManager;
-	if ( UNLIKELY( freeHead < &homeManager->freeLists[0] || &homeManager->freeLists[Heap::NoBucketSizes - 1] < freeHead ) ) {
-		abort( "Attempt to %s storage %p with corrupted header.\n"
-			   "Possible cause is duplicate free on same block or overwriting of header information.",
-			   name, addr );
-	} // if
-	#endif // __DEBUG__
-	size = freeHead->blockSize;
-	return false;
-} // headers
-
-
-static inline void * master_extend( size_t size ) {
-	spin_acquire( &heapMaster.masterExtLock );
-
-	ptrdiff_t rem = heapMaster.heapRemaining - size;
-	if ( UNLIKELY( rem < 0 ) ) {
-		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
-
-		size_t increase = Ceiling( size > heapMaster.heapExpand ? size : heapMaster.heapExpand, __ALIGN__ );
-		if ( UNLIKELY( sbrk( increase ) == (void *)-1 ) ) {	// failed, no memory ?
-			spin_release( &heapMaster.masterExtLock );
-			abort( NO_MEMORY_MSG, size );				// give up
-		} // if
-		#ifdef __STATISTICS__
-		heapMaster.sbrk_calls += 1;
-		heapMaster.sbrk_storage += increase;
-		#endif // __STATISTICS__
-		rem = heapMaster.heapRemaining + increase - size;
-	} // if
-
-	Heap::Storage * block = (Heap::Storage *)heapMaster.heapEnd;
-	heapMaster.heapRemaining = rem;
-	heapMaster.heapEnd = (char *)heapMaster.heapEnd + size;
-
-	spin_release( &heapMaster.masterExtLock );
-	return block;
-} // master_extend
-
-
-static inline void * manager_extend( size_t size ) {
-	ptrdiff_t rem = heapManager->heapReserve - size;
-
-	if ( UNLIKELY( rem < 0 ) ) {						// negative
-		// If the size requested is bigger than the current remaining reserve, use the current reserve to populate
-		// smaller freeLists, and increase the reserve.
-
-		rem = heapManager->heapReserve;					// positive
-
-		if ( rem >= heapMaster.bucketSizes[0] ) {
-			Heap::FreeHeader * freeHead =
-			#ifdef FASTLOOKUP
-				rem < Heap::LookupSizes ? &(heapManager->freeLists[heapMaster.lookup[rem]]) :
-			#endif // FASTLOOKUP
-			std::lower_bound( heapManager->freeLists, heapManager->freeLists + heapMaster.maxBucketsUsed, rem ); // binary search
-
-			if ( UNLIKELY( freeHead->blockSize > (size_t)rem ) ) freeHead -= 1;
-			Heap::Storage * block = (Heap::Storage *)heapManager->heapBuffer;
-
-			block->header.kind.real.next = freeHead->freeList;	// push on stack
-			freeHead->freeList = block;
-		} // if
-
-		size_t increase = Ceiling( size > ( heapMaster.heapExpand / 10 ) ? size : ( heapMaster.heapExpand / 10 ), __ALIGN__ );
-		heapManager->heapBuffer = master_extend(increase);
-		rem = increase - size;
-	} // if
-
-	Heap::Storage * block = (Heap::Storage *)heapManager->heapBuffer;
-	heapManager->heapReserve = rem;
-	heapManager->heapBuffer = (char *)heapManager->heapBuffer + size;
-
-	return block;
-} // manager_extend
-
-
-static inline void * doMalloc( size_t size
-							  #ifdef __STATISTICS__
-							  , unsigned int counter
-							  #endif // __STATISTICS__
-		) {
-	 Heap::Storage * block;
-
-	// Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
-	// along with the block and is a multiple of the alignment size.
-	size_t tsize = size + sizeof(Heap::Storage);
-
-	#ifdef __STATISTICS__
-	heapManager->stats.counters[counter].cnt1 += 1;
-	heapManager->stats.counters[counter].cnt3 += size;
-	#endif // __STATISTICS__
-
-	if ( LIKELY( tsize < heapMaster.mmapStart ) ) {		// small size => sbrk
-		Heap::FreeHeader * freeHead =
-			#ifdef FASTLOOKUP
-			LIKELY( tsize < Heap::LookupSizes ) ? &(heapManager->freeLists[heapMaster.lookup[tsize]]) :
-			#endif // FASTLOOKUP
-			std::lower_bound( heapManager->freeLists, heapManager->freeLists + heapMaster.maxBucketsUsed, tsize ); // binary search
-
-		assert( freeHead <= &heapManager->freeLists[heapMaster.maxBucketsUsed] ); // subscripting error ?
-		assert( tsize <= freeHead->blockSize );			// search failure ?
-		tsize = freeHead->blockSize;					// total space needed for request
-		#ifdef __STATISTICS__
-		heapManager->stats.counters[counter].cnt4 += tsize;
-		#endif // __STATISTICS__
-
-		block = freeHead->freeList;						// remove node from stack
-		if ( UNLIKELY( block == nullptr ) ) {			// no free block ?
-			// Freelist for that size is empty, so carve it out of the heap, if there is enough left, or get some more
-			// and then carve it off.
-
-			#ifdef AWAYSPIN
-			spin_acquire( &freeHead->awayLock );
-			block = freeHead->awayList;
-			freeHead->awayList = nullptr;
-			spin_release( &freeHead->awayLock );
-			#else
-			block = __atomic_exchange_n( &freeHead->awayList, nullptr, __ATOMIC_SEQ_CST );
-			#endif // AWAYSPIN
-			if ( LIKELY( block == nullptr ) ) {			// away list also empty?
-				block = (Heap::Storage *)manager_extend( tsize ); // mutual exclusion on call
-			} else {									// merge awayList into freeHead
-				#ifdef __STATISTICS__
-				heapManager->stats.away_pulls += 1;
-				#endif // __STATISTICS__
-				freeHead->freeList = block->header.kind.real.next;
-			} // if
-		} else {
-			freeHead->freeList = block->header.kind.real.next;
-		} // if
-
-		block->header.kind.real.home = freeHead;		// pointer back to free list of apropriate size
-	} else {											// large size => mmap
-  if ( UNLIKELY( size > ULONG_MAX - __cfa_page_size ) ) return nullptr; // error check
-		tsize = Ceiling( tsize, __cfa_page_size );	// must be multiple of page size
-		#ifdef __STATISTICS__
-		heapManager->stats.counters[counter].cnt4 += tsize;
-		heapManager->stats.mmap_calls += 1;
-		heapManager->stats.mmap_storage_request += size;
-		heapManager->stats.mmap_storage_alloc += tsize;
-		#endif // __STATISTICS__
-
-		block = (Heap::Storage *)::mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, heapMaster.mmapFd, 0 );
-		if ( UNLIKELY( block == MAP_FAILED ) ) {		// failed ?
-			if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
-			// Do not call strerror( errno ) as it may call malloc.
-			abort( "(Heap &)0x%p.doMalloc() : internal error, mmap failure, size:%zu %lu %lu error %d.",
-				   &heapManager, tsize, size, heapMaster.mmapStart, errno );
-		} // if
-		block->header.kind.real.blockSize = tsize;		// storage size for munmap
-	} // if
-
-	block->header.kind.real.size = size;				// store allocation size
-	void * addr = &(block->data);						// adjust off header to user bytes
-	assert( ((uintptr_t)addr & (__ALIGN__ - 1)) == 0 ); // minimum alignment ?
-
-	#ifdef __DEBUG__
-	heapManager->allocUnfreed += size;
-	#endif // __DEBUG__
-
-	return addr;
-} // doMalloc
-
-
-static inline void doFree( void * addr ) {
-	DISABLE_INTERRUPTS;
-
-	if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
-		#ifdef __DEBUG__
-		0
-		#endif // __DEBUG__
-		);
-
-	Heap::Storage::Header * header;
-	Heap::FreeHeader * freeHead;
-	size_t size, alignment;								// not used (see realloc)
-
-	bool mapped = headers( "free", addr, header, freeHead, size, alignment );
-
-	if ( UNLIKELY( mapped ) ) {						// mmapped ?
-		#ifdef __STATISTICS__
-		heapManager->stats.munmap_calls += 1;
-		heapManager->stats.munmap_storage_request += header->kind.real.size;
-		heapManager->stats.munmap_storage_alloc += size;
-		#endif // __STATISTICS__
-		if ( UNLIKELY( munmap( header, size ) == -1 ) ) {
-			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
-				   "Possible cause is invalid pointer.",
-				   addr );
-		} // if
-	} else {
-		if ( LIKELY( heapManager == freeHead->homeManager ) ) {	// belongs to this thread
-			header->kind.real.next = freeHead->freeList; // push on stack
-			freeHead->freeList = (Heap::Storage *)header;
-		} else {										// return to thread owner
-			#ifdef AWAYSPIN
-			spin_acquire( &freeHead->awayLock );
-			header->kind.real.next = freeHead->awayList; // push to bucket away list
-			freeHead->awayList = (Heap::Storage *)header;
-			spin_release( &freeHead->awayLock );
-			#else										// lock free
-			header->kind.real.next = freeHead->awayList; // link new node to top node
-			// CAS resets header->kind.real.next = freeHead->awayList on failure
-			while ( ! __atomic_compare_exchange_n( &freeHead->awayList, &header->kind.real.next, header,
-												   false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) );
-			#endif // AWAYSPIN
-
-			// detect free after thread-local storage destruction and use global stats in that case
-			if ( UNLIKELY( heapManager == nullptr ) ) {
-				#ifdef __STATISTICS__
-				AtomicFetchAdd( heapMaster.stats.free_storage_request, header->kind.real.size );
-				AtomicFetchAdd( heapMaster.stats.free_storage_alloc, size );
-				#endif // __STATISTICS__
-				// away push counters are not incremented because this is a self-away push, and there is no
-				// corresponding pull counter that needs to match.
-				ENABLE_INTERRUPTS;
-				return;
-			} // if
-
-			#ifdef __STATISTICS__
-			heapManager->stats.away_pushes += 1;
-			heapManager->stats.away_storage_request += header->kind.real.size;
-			heapManager->stats.away_storage_alloc += size;
-			#endif // __STATISTICS__
-		} // if
-	} // if
-
-	#ifdef __STATISTICS__
-	heapManager->stats.free_storage_request += header->kind.real.size;
-	heapManager->stats.free_storage_alloc += size;
-	#endif // __STATISTICS__
-
-	#ifdef __DEBUG__
-	heapManager->allocUnfreed -= header->kind.real.size;
-	#endif // __DEBUG__
-
-	ENABLE_INTERRUPTS;
-} // doFree
-
-
-static inline void * mallocNoStats( size_t size
-									#ifdef __STATISTICS__
-									, unsigned int counter
-									#endif // __STATISTICS__
-		) {
-	DISABLE_INTERRUPTS;
-
-	if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
-		#ifdef __DEBUG__
-		size
-		#endif // __DEBUG__
-		);
-
-  if ( UNLIKELY( size ) == 0 ||							// 0 BYTE ALLOCATION RETURNS NULL POINTER
-	   UNLIKELY( size > ULONG_MAX - sizeof(Heap::Storage) ) ) { // error check
-		#ifdef __STATISTICS__
-		heapManager->stats.counters[counter].cnt2 += 1;
-		#endif // __STATISTICS__
-		ENABLE_INTERRUPTS;
-		return nullptr;
-	} // if
-
-	void * addr = doMalloc( size
-							#ifdef __STATISTICS__
-							, counter
-							#endif // __STATISTICS__
-		);
-
-	ENABLE_INTERRUPTS;
-	return addr;
-} // mallocNoStats
-
-
-static inline void * memalignNoStats( size_t alignment, size_t size
-									  #ifdef __STATISTICS__
-									  , unsigned int counter
-									  #endif // __STATISTICS__
-		) {
-	DISABLE_INTERRUPTS;
-
-	if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
-		#ifdef __DEBUG__
-		size
-		#endif // __DEBUG__
-		);
-
-  if ( UNLIKELY( size ) == 0 ||							// 0 BYTE ALLOCATION RETURNS NULL POINTER
-	   UNLIKELY( size > ULONG_MAX - sizeof(Heap::Storage) ) ) { // error check
-		#ifdef __STATISTICS__
-		heapManager->stats.counters[counter].cnt2 += 1;
-		#endif // __STATISTICS__
-
-		ENABLE_INTERRUPTS;
-		return nullptr;
-	} // if
-
-	#ifdef __DEBUG__
-	checkAlign( alignment );							// check alignment
-	#endif // __DEBUG__
-
-	// if alignment <= default alignment, do normal malloc as two headers are unnecessary
-  if ( UNLIKELY( alignment <= __ALIGN__ ) ) {
-		void * addr = doMalloc( size
-			#ifdef __STATISTICS__
-			, counter
-			#endif // __STATISTICS__
-		);
-
-		ENABLE_INTERRUPTS;
-		return addr;
-  }
-
-
-	// Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
-	// administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
-	//      .-------------v-----------------v----------------v----------,
-	//      | Real Header | ... padding ... |   Fake Header  | data ... |
-	//      `-------------^-----------------^-+--------------^----------'
-	//      |<--------------------------------' offset/align |<-- alignment boundary
-
-	// subtract __ALIGN__ because it is already the minimum alignment
-	// add sizeof(Heap::Storage) for fake header
-	char * addr = (char *)doMalloc( size + alignment - __ALIGN__ + sizeof(Heap::Storage)
-									#ifdef __STATISTICS__
-									, counter
-									#endif // __STATISTICS__
-		);
-
-	// address in the block of the "next" alignment address
-	char * user = (char *)Ceiling( (uintptr_t)(addr + sizeof(Heap::Storage)), alignment );
-
-	// address of header from malloc
-	Heap::Storage::Header * realHeader = headerAddr( addr );
-	realHeader->kind.real.size = size;					// correct size to eliminate above alignment offset
-	// address of fake header * before* the alignment location
-	Heap::Storage::Header * fakeHeader = headerAddr( user );
-	// SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
-	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
-	// SKULLDUGGERY: odd alignment imples fake header
-	fakeHeader->kind.fake.alignment = alignment | 1;
-
-	ENABLE_INTERRUPTS;
-	return user;
-} // memalignNoStats
-
-// Operators new and new [] call malloc; delete calls free
-
-
-//####################### Memory Allocation Routines ####################
-
-
-extern "C" {
-	// Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
-	// then malloc() returns a unique pointer value that can later be successfully passed to free().
-	void * malloc( size_t size ) {
-		return mallocNoStats( size
-							  #ifdef __STATISTICS__
-							  , HeapStatistics::MALLOC
-							  #endif // __STATISTICS__
-			);
-	} // malloc
-
-
-	// Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
-	void * aalloc( size_t dim, size_t elemSize ) {
-		return mallocNoStats( dim * elemSize
-							  #ifdef __STATISTICS__
-							  , HeapStatistics::AALLOC
-							  #endif // __STATISTICS__
-			);
-	} // aalloc
-
-
-	// Same as aalloc() with memory set to zero.
-	void * calloc( size_t dim, size_t elemSize ) {
-		size_t size = dim * elemSize;
-		char * addr = (char *)mallocNoStats( size
-											 #ifdef __STATISTICS__
-											 , HeapStatistics::CALLOC
-											 #endif // __STATISTICS__
-			);
-
-		if ( UNLIKELY( addr == NULL ) ) return NULL; // stop further processing if 0p is returned
-
-		Heap::Storage::Header * header;
-		Heap::FreeHeader * freeHead;
-		size_t bsize, alignment;
-
-		#ifndef __DEBUG__
-		bool mapped =
-		#endif // __DEBUG__
-			headers( "calloc", addr, header, freeHead, bsize, alignment );
-
-		#ifndef __DEBUG__
-		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		if ( LIKELY( ! mapped ) )
-		#endif // __DEBUG__
-			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
-			// `-header`-addr                      `-size
-			memset( addr, '\0', size );					// set to zeros
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return addr;
-	} // calloc
-
-
-	// Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
-	// nullptr, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
-	// not nullptr, then the call is equivalent to free(oaddr). Unless oaddr is nullptr, it must have been returned by an earlier
-	// call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
-	void * resize( void * oaddr, size_t size ) {
-	  if ( UNLIKELY( oaddr == nullptr ) ) {				// special cases
-			return mallocNoStats( size
-								  #ifdef __STATISTICS__
-								  , HeapStatistics::RESIZE
-								  #endif // __STATISTICS__
-				);
-		} // if
-
-		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-	  if ( UNLIKELY( size == 0 ) ) {					// special cases
-			#ifdef __STATISTICS__
-			heapManager->stats.resize_0_calls += 1;
-			#endif // __STATISTICS__
-			doFree( oaddr );							// free previous storage
-			return nullptr;
-		} // if
-
-		Heap::Storage::Header * header;
-		Heap::FreeHeader * freeHead;
-		size_t bsize, oalign;
-		headers( "resize", oaddr, header, freeHead, bsize, oalign );
-
-		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-		// same size, DO NOT preserve STICKY PROPERTIES.
-		if ( oalign == __ALIGN__ && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
-			#ifdef __STATISTICS__
-			heapManager->stats.resize_calls += 1;
-			#endif // __STATISTICS__
-			header->kind.real.blockSize &= -2;			// no alignment and turn off 0 fill
-			header->kind.real.size = size;				// reset allocation size
-			return oaddr;
-		} // if
-
-		// change size, DO NOT preserve STICKY PROPERTIES.
-		doFree( oaddr );								// free previous storage
-		return mallocNoStats( size						// create new area
-							  #ifdef __STATISTICS__
-							  , HeapStatistics::RESIZE
-							  #endif // __STATISTICS__
-			);
-	} // resize
-
-
-	// Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
-	// the old and new sizes.
-	void * realloc( void * oaddr, size_t size ) {
-	  if ( UNLIKELY( oaddr == nullptr ) ) {				// special cases
-			return mallocNoStats( size
-								  #ifdef __STATISTICS__
-								  , HeapStatistics::REALLOC
-								  #endif // __STATISTICS__
-				);
-		} // if
-
-		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-	  if ( UNLIKELY( size == 0 ) ) {					// special cases
-			#ifdef __STATISTICS__
-			heapManager->stats.realloc_0_calls += 1;
-			#endif // __STATISTICS__
-			doFree( oaddr );							// free previous storage
-			return nullptr;
-		} // if
-
-		Heap::Storage::Header * header;
-		Heap::FreeHeader * freeHead;
-		size_t bsize, oalign;
-		headers( "realloc", oaddr, header, freeHead, bsize, oalign );
-
-		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-		size_t osize = header->kind.real.size;			// old allocation size
-		bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
-	  if ( UNLIKELY( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
-			#ifdef __STATISTICS__
-			heapManager->stats.realloc_calls += 1;
-			heapManager->stats.realloc_storage_request += size;
-			#endif // __STATISTICS__
-
-	  		header->kind.real.size = size;				// reset allocation size
-	  		if ( UNLIKELY( ozfill ) && size > osize ) {	// previous request zero fill and larger ?
-	  			memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage
-	  		} // if
-			return oaddr;
-		} // if
-
-		// change size and copy old content to new storage
-
-		void * naddr;
-		if ( UNLIKELY( oalign <= __ALIGN__ ) ) {		// previous request not aligned ?
-			naddr = mallocNoStats( size					// create new area
-								   #ifdef __STATISTICS__
-								   , HeapStatistics::REALLOC
-								   #endif // __STATISTICS__
-				);
-		} else {
-			naddr = memalignNoStats( oalign, size		// create new aligned area
-									 #ifdef __STATISTICS__
-									 , HeapStatistics::REALLOC
-									 #endif // __STATISTICS__
-				);
-		} // if
-
-		headers( "realloc", naddr, header, freeHead, bsize, oalign );
-		// To preserve prior fill, the entire bucket must be copied versus the size.
-		memcpy( naddr, oaddr, std::min( osize, size ) ); // copy bytes
-		doFree( oaddr );								// free previous storage
-
-		if ( UNLIKELY( ozfill ) ) {						// previous request zero fill ?
-			header->kind.real.blockSize |= 2;			// mark new request as zero filled
-			if ( size > osize ) {						// previous request larger ?
-				memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
-			} // if
-		} // if
-		return naddr;
-	} // realloc
-
-
-	// Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
-	void * memalign( size_t alignment, size_t size ) {
-		return memalignNoStats( alignment, size
-								#ifdef __STATISTICS__
-								, HeapStatistics::MEMALIGN
-								#endif // __STATISTICS__
-			);
-	} // memalign
-
-
-	// Same as aalloc() with memory alignment.
-	void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
-		return memalignNoStats( alignment, dim * elemSize
-								#ifdef __STATISTICS__
-								, HeapStatistics::AMEMALIGN
-								#endif // __STATISTICS__
-			);
-	} // amemalign
-
-
-	// Same as calloc() with memory alignment.
-	void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
-		size_t size = dim * elemSize;
-		char * addr = (char *)memalignNoStats( alignment, size
-											   #ifdef __STATISTICS__
-											   , HeapStatistics::CMEMALIGN
-											   #endif // __STATISTICS__
-			);
-
-		if ( UNLIKELY( addr == NULL ) ) return NULL; // stop further processing if 0p is returned
-
-		Heap::Storage::Header * header;
-		Heap::FreeHeader * freeHead;
-		size_t bsize;
-
-		#ifndef __DEBUG__
-		bool mapped =
-		#endif // __DEBUG__
-			headers( "cmemalign", addr, header, freeHead, bsize, alignment );
-
-		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		#ifndef __DEBUG__
-		if ( LIKELY( ! mapped ) )
-		#endif // __DEBUG__
-			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
-			// `-header`-addr                      `-size
-			memset( addr, '\0', size );					// set to zeros
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return addr;
-	} // cmemalign
-
-
-	// Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
-	// of alignment. This requirement is universally ignored.
-	void * aligned_alloc( size_t alignment, size_t size ) {
-		return memalign( alignment, size );
-	} // aligned_alloc
-
-
-	// Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
-	// memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
-	// is 0, then posix_memalign() returns either nullptr, or a unique pointer value that can later be successfully passed to
-	// free(3).
-	int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
-	  if ( UNLIKELY( alignment < __ALIGN__ || ! Pow2( alignment ) ) ) return EINVAL; // check alignment
-		*memptr = memalign( alignment, size );
-		return 0;
-	} // posix_memalign
-
-
-	// Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
-	// page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
-	void * valloc( size_t size ) {
-		return memalign( __cfa_page_size, size );
-	} // valloc
-
-
-	// Same as valloc but rounds size to multiple of page size.
-	void * pvalloc( size_t size ) {						// round size to multiple of page size
-		return memalign( __cfa_page_size, Ceiling( size, __cfa_page_size ) );
-	} // pvalloc
-
-
-	// Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
-	// or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
-	// nullptr, no operation is performed.
-	void free( void * addr ) {
-		// detect free after thread-local storage destruction and use global stats in that case
-		#ifdef __STATISTICS__
-		DISABLE_INTERRUPTS;
-		if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
-			#ifdef __DEBUG__
-			0
-			#endif // __DEBUG__
-			);
-		#endif // __STATISTICS__
-
-		// detect free after thread-local storage destruction and use global stats in that case
-		if ( UNLIKELY( addr == nullptr ) ) {			// special case
-			#ifdef __STATISTICS__
-			if ( LIKELY( heapManager ) ) heapManager->stats.free_null_calls += 1;
-			else AtomicFetchAdd( heapMaster.stats.free_null_calls, 1 );
-			ENABLE_INTERRUPTS;
-			#endif // __STATISTICS__
-			return;
-		} // fi
-
-		#ifdef __STATISTICS__
-		if ( LIKELY( heapManager ) ) heapManager->stats.free_calls += 1;
-		else AtomicFetchAdd( heapMaster.stats.free_calls, 1 );
-		ENABLE_INTERRUPTS;
-		#endif // __STATISTICS__
-
-		doFree( addr );
-	} // free
-
-
-	// Returns the alignment of an allocation.
-	size_t malloc_alignment( void * addr ) {
-	  if ( UNLIKELY( addr == nullptr ) ) return __ALIGN__; // minimum alignment
-		Heap::Storage::Header * header = headerAddr( addr );
-		if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) {	// fake header ?
-			return header->kind.fake.alignment & -2;	// remove flag from value
-		} else {
-			return __ALIGN__;							// minimum alignment
-		} // if
-	} // malloc_alignment
-
-
-	// Returns true if the allocation is zero filled, e.g., allocated by calloc().
-	bool malloc_zero_fill( void * addr ) {
-	  if ( UNLIKELY( addr == nullptr ) ) return false;	// null allocation is not zero fill
-		Heap::Storage::Header * header = headerAddr( addr );
-		if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
-			header = realHeader( header );				// backup from fake to real header
-		} // if
-		return (header->kind.real.blockSize & 2) != 0;	// zero filled ?
-	} // malloc_zero_fill
-
-
-	// Returns original total allocation size (not bucket size) => array size is dimension * sizeof(T).
-	size_t malloc_size( void * addr ) {
-	  if ( UNLIKELY( addr == nullptr ) ) return 0;		// null allocation is not zero fill
-		Heap::Storage::Header * header = headerAddr( addr );
-		if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
-			header = realHeader( header );				// backup from fake to real header
-		} // if
-		return header->kind.real.size;
-	} // malloc_size
-
-
-	// Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
-	// malloc or a related function.
-	size_t malloc_usable_size( void * addr ) {
-	  if ( UNLIKELY( addr == nullptr ) ) return 0;		// null allocation has 0 size
-		Heap::Storage::Header * header;
-		Heap::FreeHeader * freeHead;
-		size_t bsize, alignment;
-
-		headers( "malloc_usable_size", addr, header, freeHead, bsize, alignment );
-		return dataStorage( bsize, addr, header );		// data storage in bucket
-	} // malloc_usable_size
-
-
-	// Prints (on default standard error) statistics about memory allocated by malloc and related functions.
-	void malloc_stats() {
-		#ifdef __STATISTICS__
-		HeapStatistics stats;
-		printStats( collectStats( stats ) );
-		#else
-		#define MALLOC_STATS_MSG "malloc_stats statistics disabled.\n"
-		NOWARNING( write( STDERR_FILENO, MALLOC_STATS_MSG, sizeof( MALLOC_STATS_MSG ) - 1 /* size includes '\0' */ ), unused-result );
-		#endif // __STATISTICS__
-	} // malloc_stats
-
-
-	// Changes the file descriptor where malloc_stats() writes statistics.
-	int malloc_stats_fd( int fd __attribute__(( unused )) ) {
-		#ifdef __STATISTICS__
-		int temp = heapMaster.stats_fd;
-		heapMaster.stats_fd = fd;
-		return temp;
-		#else
-		return -1;										// unsupported
-		#endif // __STATISTICS__
-	} // malloc_stats_fd
-
-
-	// Prints an XML string that describes the current state of the memory-allocation implementation in the caller.
-	// The string is printed on the file stream stream.  The exported string includes information about all arenas (see
-	// malloc).
-	int malloc_info( int options, FILE * stream __attribute__(( unused )) ) {
-	  if ( options != 0 ) { errno = EINVAL; return -1; }
-		#ifdef __STATISTICS__
-		HeapStatistics stats;
-		return printStatsXML( collectStats( stats ), stream );
-		#else
-		return 0;										// unsupported
-		#endif // __STATISTICS__
-	} // malloc_info
-
-
-	// Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
-	// specifies the parameter to be modified, and value specifies the new value for that parameter.
-	int mallopt( int option, int value ) {
-	  if ( value < 0 ) return 0;
-		switch( option ) {
-		  case M_TOP_PAD:
-			heapMaster.heapExpand = Ceiling( value, __cfa_page_size );
-			return 1;
-		  case M_MMAP_THRESHOLD:
-			if ( setMmapStart( value ) ) return 1;
-			break;
-		} // switch
-		return 0;										// error, unsupported
-	} // mallopt
-
-
-	// Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
-	int malloc_trim( size_t ) {
-		return 0;										// => impossible to release memory
-	} // malloc_trim
-
-
-	// Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
-	// or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
-	// structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
-	// result.  (The caller must free this memory.)
-	void * malloc_get_state( void ) {
-		return nullptr;									// unsupported
-	} // malloc_get_state
-
-
-	// Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
-	// structure pointed to by state.
-	int malloc_set_state( void * ) {
-		return 0;										// unsupported
-	} // malloc_set_state
-
-	// Set the amount (bytes) to extend the heap size once all the current storage in the heap is allocated.
-	size_t malloc_expansion() { return __DEFAULT_HEAP_EXPANSION__; }
-
-	// Set the crossover point between allocations occuring in the sbrk area or separately mmapped.
-	size_t malloc_mmap_start() { return __DEFAULT_MMAP_START__; }
-
-	// Amount subtracted to adjust for unfreed program storage (debug only).
-	size_t malloc_unfreed() { return __DEFAULT_HEAP_UNFREED__; }
-} // extern "C"
-
-
-#ifdef __cforall
-void * resize( void * oaddr, size_t nalign, size_t size )
-#else
-extern "C" {
-void * _X6resizeFPv_Pvmm__1( void * oaddr, size_t nalign, size_t size )
-#endif
-{
-  if ( UNLIKELY( oaddr == nullptr ) ) {
-		return memalignNoStats( nalign, size
-								#ifdef __STATISTICS__
-								, HeapStatistics::RESIZE
-								#endif // __STATISTICS__
-			);
-	} // if
-
-	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( UNLIKELY( size == 0 ) ) {						// special cases
-		#ifdef __STATISTICS__
-		heapManager->stats.resize_0_calls += 1;
-		#endif // __STATISTICS__
-		doFree( oaddr );								// free previous storage
-		return nullptr;
-	} // if
-
-	#ifdef __DEBUG__
-	checkAlign( nalign );								// check alignment
-	#endif // __DEBUG__
-
-	// Attempt to reuse existing alignment.
-	Heap::Storage::Header * header = headerAddr( oaddr );
-	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
-	size_t oalign;
-	if ( UNLIKELY( isFakeHeader ) ) {
-		oalign = header->kind.fake.alignment & -2;		// old alignment
-		if ( UNLIKELY( (uintptr_t)oaddr % nalign == 0	// lucky match ?
-			 && ( oalign <= nalign						// going down
-				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
-			) ) {
-			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-			Heap::FreeHeader * freeHead;
-			size_t bsize, oalign;
-			headers( "resize", oaddr, header, freeHead, bsize, oalign );
-			size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-
-			if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
-				headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-
-				header->kind.real.blockSize &= -2;		// turn off 0 fill
-				header->kind.real.size = size;			// reset allocation size
-				return oaddr;
-			} // if
-		} // if
-	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
-				&& nalign == __ALIGN__ ) {				// new alignment also on libAlign => no fake header needed
-		return resize( oaddr, size );					// duplicate special case checks
-	} // if
-
-	// change size, DO NOT preserve STICKY PROPERTIES.
-	doFree( oaddr );									// free previous storage
-	return memalignNoStats( nalign, size				// create new aligned area
-							#ifdef __STATISTICS__
-							, HeapStatistics::RESIZE
-							#endif // __STATISTICS__
-		);
-} // resize
-#ifndef __cforall
-}
-#endif
-
-
-#ifdef __cforall
-void * realloc( void * oaddr, size_t nalign, size_t size )
-#else
-extern "C" {
-void * _X7reallocFPv_Pvmm__1( void * oaddr, size_t nalign, size_t size )
-#endif
-{
-  if ( UNLIKELY( oaddr == nullptr ) ) {
-		return memalignNoStats( nalign, size
-								#ifdef __STATISTICS__
-								, HeapStatistics::REALLOC
-								#endif // __STATISTICS__
-			);
-	} // if
-
-	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( UNLIKELY( size == 0 ) ) {						// special cases
-		#ifdef __STATISTICS__
-		heapManager->stats.realloc_0_calls += 1;
-		#endif // __STATISTICS__
-		doFree( oaddr );								// free previous storage
-		return nullptr;
-	} // if
-
-	#ifdef __DEBUG__
-	checkAlign( nalign );								// check alignment
-	#endif // __DEBUG__
-
-	// Attempt to reuse existing alignment.
-	Heap::Storage::Header * header = headerAddr( oaddr );
-	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
-	size_t oalign;
-	if ( UNLIKELY( isFakeHeader ) ) {
-		oalign = header->kind.fake.alignment & -2;		// old alignment
-		if ( UNLIKELY( (uintptr_t)oaddr % nalign == 0	// lucky match ?
-			 && ( oalign <= nalign						// going down
-				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
-			) ) {
-			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-			return realloc( oaddr, size );				// duplicate special case checks
-		} // if
-	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
-				&& nalign == __ALIGN__ ) {				// new alignment also on libAlign => no fake header needed
-		return realloc( oaddr, size );					// duplicate special case checks
-	} // if
-
-	Heap::FreeHeader * freeHead;
-	size_t bsize;
-	headers( "realloc", oaddr, header, freeHead, bsize, oalign );
-
-	// change size and copy old content to new storage
-
-	size_t osize = header->kind.real.size;				// old allocation size
-	bool ozfill = (header->kind.real.blockSize & 2);	// old allocation zero filled
-
-	void * naddr = memalignNoStats( nalign, size		// create new aligned area
-									#ifdef __STATISTICS__
-									, HeapStatistics::REALLOC
-									#endif // __STATISTICS__
-		);
-
-	headers( "realloc", naddr, header, freeHead, bsize, oalign );
-	memcpy( naddr, oaddr, std::min( osize, size ) );	// copy bytes
-	doFree( oaddr );									// free previous storage
-
-	if ( UNLIKELY( ozfill ) ) {							// previous request zero fill ?
-		header->kind.real.blockSize |= 2;				// mark new request as zero filled
-		if ( size > osize ) {							// previous request larger ?
-			memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
-		} // if
-	} // if
-	return naddr;
-} // realloc
-#ifndef __cforall
-}
-#endif
-
-// zip -r HeapPerThread.zip heap/HeapPerThread.h heap/HeapPerThread.cc heap/Makefile heap/affinity.h heap/test.cc heap/away.cc
-
-// g++-10 -Wall -Wextra -g -O3 -DNDEBUG -D__STATISTICS__ -DTLS HeapPerThread.cc -fPIC -shared -o HeapPerThread.so
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "g++-10 -Wall -Wextra -g -O3 -DNDEBUG -D__STATISTICS__ HeapPerThread.cc -c" //
-// End: //
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
+++ libcfa/src/heap.cfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -0,0 +1,1416 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// heap.cfa --
+//
+// Author           : Peter A. Buhr
+// Created On       : Tue Dec 19 21:58:35 2017
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Sun Jan  2 23:29:41 2022
+// Update Count     : 1058
+//
+
+#include <unistd.h>										// sbrk, sysconf
+#include <stdlib.h>										// EXIT_FAILURE
+#include <stdbool.h>									// true, false
+#include <stdio.h>										// snprintf, fileno
+#include <errno.h>										// errno
+#include <string.h>										// memset, memcpy
+#include <limits.h>										// ULONG_MAX
+#include <malloc.h>										// memalign, malloc_usable_size
+#include <sys/mman.h>									// mmap, munmap
+
+#include "bits/align.hfa"								// libAlign
+#include "bits/defs.hfa"								// likely, unlikely
+#include "bits/locks.hfa"								// __spinlock_t
+#include "startup.hfa"									// STARTUP_PRIORITY_MEMORY
+#include "math.hfa"										// min
+#include "bitmanip.hfa"									// is_pow2, ceiling2
+
+static bool traceHeap = false;
+
+inline bool traceHeap() { return traceHeap; }
+
+bool traceHeapOn() {
+	bool temp = traceHeap;
+	traceHeap = true;
+	return temp;
+} // traceHeapOn
+
+bool traceHeapOff() {
+	bool temp = traceHeap;
+	traceHeap = false;
+	return temp;
+} // traceHeapOff
+
+bool traceHeapTerm() { return false; }
+
+
+static bool prtFree = false;
+
+bool prtFree() {
+	return prtFree;
+} // prtFree
+
+bool prtFreeOn() {
+	bool temp = prtFree;
+	prtFree = true;
+	return temp;
+} // prtFreeOn
+
+bool prtFreeOff() {
+	bool temp = prtFree;
+	prtFree = false;
+	return temp;
+} // prtFreeOff
+
+
+enum {
+	// Define the default extension heap amount in units of bytes. When the uC++ supplied heap reaches the brk address,
+	// the brk address is extended by the extension amount.
+	__CFA_DEFAULT_HEAP_EXPANSION__ = (10 * 1024 * 1024),
+
+	// Define the mmap crossover point during allocation. Allocations less than this amount are allocated from buckets;
+	// values greater than or equal to this value are mmap from the operating system.
+	__CFA_DEFAULT_MMAP_START__ = (512 * 1024 + 1),
+};
+
+size_t default_mmap_start() __attribute__(( weak )) {
+	return __CFA_DEFAULT_MMAP_START__;
+} // default_mmap_start
+
+size_t default_heap_expansion() __attribute__(( weak )) {
+	return __CFA_DEFAULT_HEAP_EXPANSION__;
+} // default_heap_expansion
+
+
+#ifdef __CFA_DEBUG__
+static size_t allocUnfreed;								// running total of allocations minus frees
+
+static void prtUnfreed() {
+	if ( allocUnfreed != 0 ) {
+		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+		char helpText[512];
+		int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"
+							"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+							(long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
+		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+	} // if
+} // prtUnfreed
+
+extern int cfa_main_returned;							// from interpose.cfa
+extern "C" {
+	void heapAppStart() {								// called by __cfaabi_appready_startup
+		allocUnfreed = 0;
+	} // heapAppStart
+
+	void heapAppStop() {								// called by __cfaabi_appready_startdown
+		fclose( stdin ); fclose( stdout );
+		if ( cfa_main_returned ) prtUnfreed();			// do not check unfreed storage if exit called
+	} // heapAppStop
+} // extern "C"
+#endif // __CFA_DEBUG__
+
+
+// statically allocated variables => zero filled.
+size_t __page_size;										// architecture pagesize
+int __map_prot;											// common mmap/mprotect protection
+static size_t heapExpand;								// sbrk advance
+static size_t mmapStart;								// cross over point for mmap
+static unsigned int maxBucketsUsed;						// maximum number of buckets in use
+
+
+#define SPINLOCK 0
+#define LOCKFREE 1
+#define BUCKETLOCK SPINLOCK
+#if BUCKETLOCK == SPINLOCK
+#elif BUCKETLOCK == LOCKFREE
+#include <stackLockFree.hfa>
+#else
+	#error undefined lock type for bucket lock
+#endif // LOCKFREE
+
+// Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
+// Break recusion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
+enum { NoBucketSizes = 91 };							// number of buckets sizes
+
+struct HeapManager {
+	struct Storage {
+		struct Header {									// header
+			union Kind {
+				struct RealHeader {
+					union {
+						struct {						// 4-byte word => 8-byte header, 8-byte word => 16-byte header
+							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
+							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
+
+							union {
+								// FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
+								// 2nd low-order bit => zero filled
+								void * home;			// allocated block points back to home locations (must overlay alignment)
+								size_t blockSize;		// size for munmap (must overlay alignment)
+								#if BUCKETLOCK == SPINLOCK
+								Storage * next;			// freed block points next freed block of same size
+								#endif // SPINLOCK
+							};
+							size_t size;				// allocation size in bytes
+
+							#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
+							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
+						};
+						#if BUCKETLOCK == LOCKFREE
+						Link(Storage) next;				// freed block points next freed block of same size (double-wide)
+						#endif // LOCKFREE
+					};
+				} real; // RealHeader
+
+				struct FakeHeader {
+					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+					uint32_t alignment;					// 1st low-order bit => fake header & alignment
+					#endif // __ORDER_LITTLE_ENDIAN__
+
+					uint32_t offset;
+
+					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
+					#endif // __ORDER_BIG_ENDIAN__
+				} fake; // FakeHeader
+			} kind; // Kind
+		} header; // Header
+		char pad[libAlign() - sizeof( Header )];
+		char data[0];									// storage
+	}; // Storage
+
+	static_assert( libAlign() >= sizeof( Storage ), "libAlign() < sizeof( Storage )" );
+
+	struct FreeHeader {
+		#if BUCKETLOCK == SPINLOCK
+		__spinlock_t lock;								// must be first field for alignment
+		Storage * freeList;
+		#else
+		StackLF(Storage) freeList;
+		#endif // BUCKETLOCK
+		size_t blockSize;								// size of allocations on this list
+	}; // FreeHeader
+
+	// must be first fields for alignment
+	__spinlock_t extlock;								// protects allocation-buffer extension
+	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
+
+	void * heapBegin;									// start of heap
+	void * heapEnd;										// logical end of heap
+	size_t heapRemaining;								// amount of storage not allocated in the current chunk
+}; // HeapManager
+
+#if BUCKETLOCK == LOCKFREE
+static inline {
+	Link(HeapManager.Storage) * ?`next( HeapManager.Storage * this ) { return &this->header.kind.real.next; }
+	void ?{}( HeapManager.FreeHeader & ) {}
+	void ^?{}( HeapManager.FreeHeader & ) {}
+} // distribution
+#endif // LOCKFREE
+
+static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
+
+
+#define FASTLOOKUP
+#define __STATISTICS__
+
+// Size of array must harmonize with NoBucketSizes and individual bucket sizes must be multiple of 16.
+// Smaller multiples of 16 and powers of 2 are common allocation sizes, so make them generate the minimum required bucket size.
+// malloc(0) returns 0p, so no bucket is necessary for 0 bytes returning an address that can be freed.
+static const unsigned int bucketSizes[] @= {			// different bucket sizes
+	16 + sizeof(HeapManager.Storage), 32 + sizeof(HeapManager.Storage), 48 + sizeof(HeapManager.Storage), 64 + sizeof(HeapManager.Storage), // 4
+	96 + sizeof(HeapManager.Storage), 112 + sizeof(HeapManager.Storage), 128 + sizeof(HeapManager.Storage), // 3
+	160, 192, 224, 256 + sizeof(HeapManager.Storage), // 4
+	320, 384, 448, 512 + sizeof(HeapManager.Storage), // 4
+	640, 768, 896, 1_024 + sizeof(HeapManager.Storage), // 4
+	1_536, 2_048 + sizeof(HeapManager.Storage), // 2
+	2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), // 4
+	6_144, 8_192 + sizeof(HeapManager.Storage), // 2
+	9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360, 16_384 + sizeof(HeapManager.Storage), // 8
+	18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720, 32_768 + sizeof(HeapManager.Storage), // 8
+	36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440, 65_536 + sizeof(HeapManager.Storage), // 8
+	73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880, 131_072 + sizeof(HeapManager.Storage), // 8
+	147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760, 262_144 + sizeof(HeapManager.Storage), // 8
+	294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520, 524_288 + sizeof(HeapManager.Storage), // 8
+	655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), // 4
+	1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), // 8
+	2_621_440, 3_145_728, 3_670_016, 4_194_304 + sizeof(HeapManager.Storage), // 4
+};
+
+static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" );
+
+#ifdef FASTLOOKUP
+enum { LookupSizes = 65_536 + sizeof(HeapManager.Storage) }; // number of fast lookup sizes
+static unsigned char lookup[LookupSizes];				// O(1) lookup for small sizes
+#endif // FASTLOOKUP
+
+static const off_t mmapFd = -1;							// fake or actual fd for anonymous file
+#ifdef __CFA_DEBUG__
+static bool heapBoot = 0;								// detect recursion during boot
+#endif // __CFA_DEBUG__
+
+// The constructor for heapManager is called explicitly in memory_startup.
+static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
+
+
+#ifdef __STATISTICS__
+// Heap statistics counters.
+static unsigned int malloc_calls, malloc_0_calls;
+static unsigned long long int malloc_storage_request, malloc_storage_alloc;
+static unsigned int aalloc_calls, aalloc_0_calls;
+static unsigned long long int aalloc_storage_request, aalloc_storage_alloc;
+static unsigned int calloc_calls, calloc_0_calls;
+static unsigned long long int calloc_storage_request, calloc_storage_alloc;
+static unsigned int memalign_calls, memalign_0_calls;
+static unsigned long long int memalign_storage_request, memalign_storage_alloc;
+static unsigned int amemalign_calls, amemalign_0_calls;
+static unsigned long long int amemalign_storage_request, amemalign_storage_alloc;
+static unsigned int cmemalign_calls, cmemalign_0_calls;
+static unsigned long long int cmemalign_storage_request, cmemalign_storage_alloc;
+static unsigned int resize_calls, resize_0_calls;
+static unsigned long long int resize_storage_request, resize_storage_alloc;
+static unsigned int realloc_calls, realloc_0_calls;
+static unsigned long long int realloc_storage_request, realloc_storage_alloc;
+static unsigned int free_calls, free_null_calls;
+static unsigned long long int free_storage_request, free_storage_alloc;
+static unsigned int mmap_calls;
+static unsigned long long int mmap_storage_request, mmap_storage_alloc;
+static unsigned int munmap_calls;
+static unsigned long long int munmap_storage_request, munmap_storage_alloc;
+static unsigned int sbrk_calls;
+static unsigned long long int sbrk_storage;
+// Statistics file descriptor (changed by malloc_stats_fd).
+static int stats_fd = STDERR_FILENO;					// default stderr
+
+// Use "write" because streams may be shutdown when calls are made.
+static void printStats() {
+	char helpText[1024];
+	__cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
+								"\nHeap statistics: (storage request / allocation + header)\n"
+								"  malloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  aalloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  calloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  memalign  >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  amemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  cmemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  resize    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  realloc   >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  free      !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n"
+								"  sbrk      calls %'u; storage %'llu bytes\n"
+								"  mmap      calls %'u; storage %'llu / %'llu bytes\n"
+								"  munmap    calls %'u; storage %'llu / %'llu bytes\n",
+								malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
+								aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
+								calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
+								memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
+								amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
+								cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
+								resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
+								realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
+								free_calls, free_null_calls, free_storage_request, free_storage_alloc,
+								sbrk_calls, sbrk_storage,
+								mmap_calls, mmap_storage_request, mmap_storage_alloc,
+								munmap_calls, munmap_storage_request, munmap_storage_alloc
+		);
+} // printStats
+
+static int printStatsXML( FILE * stream ) {				// see malloc_info
+	char helpText[1024];
+	int len = snprintf( helpText, sizeof(helpText),
+						"<malloc version=\"1\">\n"
+						"<heap nr=\"0\">\n"
+						"<sizes>\n"
+						"</sizes>\n"
+						"<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
+						"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n"
+						"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"</malloc>",
+						malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
+						aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
+						calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
+						memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
+						amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
+						cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
+						resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
+						realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
+						free_calls, free_null_calls, free_storage_request, free_storage_alloc,
+						sbrk_calls, sbrk_storage,
+						mmap_calls, mmap_storage_request, mmap_storage_alloc,
+						munmap_calls, munmap_storage_request, munmap_storage_alloc
+		);
+	__cfaabi_bits_write( fileno( stream ), helpText, len );	// ensures all bytes written or exit
+	return len;
+} // printStatsXML
+#endif // __STATISTICS__
+
+
+// thunk problem
+size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
+	size_t l = 0, m, h = dim;
+	while ( l < h ) {
+		m = (l + h) / 2;
+		if ( (unsigned int &)(vals[m]) < key ) {		// cast away const
+			l = m + 1;
+		} else {
+			h = m;
+		} // if
+	} // while
+	return l;
+} // Bsearchl
+
+
+static inline bool setMmapStart( size_t value ) {		// true => mmapped, false => sbrk
+  if ( value < __page_size || bucketSizes[NoBucketSizes - 1] < value ) return false;
+	mmapStart = value;									// set global
+
+	// find the closest bucket size less than or equal to the mmapStart size
+	maxBucketsUsed = Bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
+	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+	return true;
+} // setMmapStart
+
+
+// <-------+----------------------------------------------------> bsize (bucket size)
+// |header |addr
+//==================================================================================
+//                   align/offset |
+// <-----------------<------------+-----------------------------> bsize (bucket size)
+//                   |fake-header | addr
+#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
+#define realHeader( header ) ((HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset))
+
+// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
+// |header |addr
+//==================================================================================
+//                   align/offset |
+// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
+//                   |fake-header |addr
+#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
+
+
+static inline void checkAlign( size_t alignment ) {
+	if ( alignment < libAlign() || ! is_pow2( alignment ) ) {
+		abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
+	} // if
+} // checkAlign
+
+
+static inline void checkHeader( bool check, const char name[], void * addr ) {
+	if ( unlikely( check ) ) {							// bad address ?
+		abort( "Attempt to %s storage %p with address outside the heap.\n"
+			   "Possible cause is duplicate free on same block or overwriting of memory.",
+			   name, addr );
+	} // if
+} // checkHeader
+
+
+static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & alignment ) {
+	if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+		alignment = header->kind.fake.alignment & -2;	// remove flag from value
+		#ifdef __CFA_DEBUG__
+		checkAlign( alignment );						// check alignment
+		#endif // __CFA_DEBUG__
+		header = realHeader( header );					// backup from fake to real header
+	} else {
+		alignment = libAlign();							// => no fake header
+	} // if
+} // fakeHeader
+
+
+static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem,
+							size_t & size, size_t & alignment ) with( heapManager ) {
+	header = headerAddr( addr );
+
+  if ( unlikely( addr < heapBegin || heapEnd < addr ) ) { // mmapped ?
+		fakeHeader( header, alignment );
+		size = header->kind.real.blockSize & -3;		// mmap size
+		return true;
+	} // if
+
+	#ifdef __CFA_DEBUG__
+	checkHeader( header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
+	#endif // __CFA_DEBUG__
+
+	// header may be safe to dereference
+	fakeHeader( header, alignment );
+	#ifdef __CFA_DEBUG__
+	checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
+	#endif // __CFA_DEBUG__
+
+	freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
+	#ifdef __CFA_DEBUG__
+	if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
+		abort( "Attempt to %s storage %p with corrupted header.\n"
+			   "Possible cause is duplicate free on same block or overwriting of header information.",
+			   name, addr );
+	} // if
+	#endif // __CFA_DEBUG__
+	size = freeElem->blockSize;
+	return false;
+} // headers
+
+// #ifdef __CFA_DEBUG__
+// #if __SIZEOF_POINTER__ == 4
+// #define MASK 0xdeadbeef
+// #else
+// #define MASK 0xdeadbeefdeadbeef
+// #endif
+// #define STRIDE size_t
+
+// static void * Memset( void * addr, STRIDE size ) {		// debug only
+// 	if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) );
+// 	if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) );
+
+// 	STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE);
+// 	for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK;
+// 	return addr;
+// } // Memset
+// #endif // __CFA_DEBUG__
+
+
+#define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes."
+
+static inline void * extend( size_t size ) with( heapManager ) {
+	lock( extlock __cfaabi_dbg_ctx2 );
+	ptrdiff_t rem = heapRemaining - size;
+	if ( rem < 0 ) {
+		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
+
+		size_t increase = ceiling2( size > heapExpand ? size : heapExpand, __page_size );
+		// Do not call abort or strerror( errno ) as they may call malloc.
+		if ( sbrk( increase ) == (void *)-1 ) {			// failed, no memory ?
+			unlock( extlock );
+			__cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size );
+			_exit( EXIT_FAILURE );						// give up
+		} // if
+		// Make storage executable for thunks.
+		if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) {
+			unlock( extlock );
+			__cfaabi_bits_print_nolock( STDERR_FILENO, "extend() : internal error, mprotect failure, heapEnd:%p size:%zd, errno:%d.\n", heapEnd, increase, errno );
+			_exit( EXIT_FAILURE );
+		} // if
+		#ifdef __STATISTICS__
+		sbrk_calls += 1;
+		sbrk_storage += increase;
+		#endif // __STATISTICS__
+		#ifdef __CFA_DEBUG__
+		// Set new memory to garbage so subsequent uninitialized usages might fail.
+		memset( (char *)heapEnd + heapRemaining, '\xde', increase );
+		//Memset( (char *)heapEnd + heapRemaining, increase );
+		#endif // __CFA_DEBUG__
+		rem = heapRemaining + increase - size;
+	} // if
+
+	HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
+	heapRemaining = rem;
+	heapEnd = (char *)heapEnd + size;
+	unlock( extlock );
+	return block;
+} // extend
+
+
+static inline void * doMalloc( size_t size ) with( heapManager ) {
+	HeapManager.Storage * block;						// pointer to new block of storage
+
+	// Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
+	// along with the block and is a multiple of the alignment size.
+
+  if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
+	size_t tsize = size + sizeof(HeapManager.Storage);
+	if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
+		size_t posn;
+		#ifdef FASTLOOKUP
+		if ( tsize < LookupSizes ) posn = lookup[tsize];
+		else
+		#endif // FASTLOOKUP
+			posn = Bsearchl( (unsigned int)tsize, bucketSizes, (size_t)maxBucketsUsed );
+		HeapManager.FreeHeader * freeElem = &freeLists[posn];
+		verify( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
+		verify( tsize <= freeElem->blockSize );			// search failure ?
+		tsize = freeElem->blockSize;					// total space needed for request
+
+		// Spin until the lock is acquired for this particular size of block.
+
+		#if BUCKETLOCK == SPINLOCK
+		lock( freeElem->lock __cfaabi_dbg_ctx2 );
+		block = freeElem->freeList;						// remove node from stack
+		#else
+		block = pop( freeElem->freeList );
+		#endif // BUCKETLOCK
+		if ( unlikely( block == 0p ) ) {				// no free block ?
+			#if BUCKETLOCK == SPINLOCK
+			unlock( freeElem->lock );
+			#endif // BUCKETLOCK
+
+			// Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
+			// and then carve it off.
+
+			block = (HeapManager.Storage *)extend( tsize );	// mutual exclusion on call
+		#if BUCKETLOCK == SPINLOCK
+		} else {
+			freeElem->freeList = block->header.kind.real.next;
+			unlock( freeElem->lock );
+		#endif // BUCKETLOCK
+		} // if
+
+		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
+	} else {											// large size => mmap
+  if ( unlikely( size > ULONG_MAX - __page_size ) ) return 0p;
+		tsize = ceiling2( tsize, __page_size );			// must be multiple of page size
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &mmap_storage_request, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		block = (HeapManager.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
+		if ( block == (HeapManager.Storage *)MAP_FAILED ) { // failed ?
+			if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
+			// Do not call strerror( errno ) as it may call malloc.
+			abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu errno:%d.", &heapManager, tsize, errno );
+		} //if
+		#ifdef __CFA_DEBUG__
+		// Set new memory to garbage so subsequent uninitialized usages might fail.
+		memset( block, '\xde', tsize );
+		//Memset( block, tsize );
+		#endif // __CFA_DEBUG__
+		block->header.kind.real.blockSize = tsize;		// storage size for munmap
+	} // if
+
+	block->header.kind.real.size = size;				// store allocation size
+	void * addr = &(block->data);						// adjust off header to user bytes
+	verify( ((uintptr_t)addr & (libAlign() - 1)) == 0 ); // minimum alignment ?
+
+	#ifdef __CFA_DEBUG__
+	__atomic_add_fetch( &allocUnfreed, tsize, __ATOMIC_SEQ_CST );
+	if ( traceHeap() ) {
+		enum { BufferSize = 64 };
+		char helpText[BufferSize];
+		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", addr, size, tsize );
+		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+	} // if
+	#endif // __CFA_DEBUG__
+
+	return addr;
+} // doMalloc
+
+
+static inline void doFree( void * addr ) with( heapManager ) {
+	#ifdef __CFA_DEBUG__
+	if ( unlikely( heapManager.heapBegin == 0p ) ) {
+		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
+	} // if
+	#endif // __CFA_DEBUG__
+
+	HeapManager.Storage.Header * header;
+	HeapManager.FreeHeader * freeElem;
+	size_t size, alignment;								// not used (see realloc)
+
+	if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &munmap_storage_alloc, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		if ( munmap( header, size ) == -1 ) {
+			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
+				   "Possible cause is invalid pointer.",
+				   addr );
+		} // if
+	} else {
+		#ifdef __CFA_DEBUG__
+		// Set free memory to garbage so subsequent usages might fail.
+		memset( ((HeapManager.Storage *)header)->data, '\xde', freeElem->blockSize - sizeof( HeapManager.Storage ) );
+		//Memset( ((HeapManager.Storage *)header)->data, freeElem->blockSize - sizeof( HeapManager.Storage ) );
+		#endif // __CFA_DEBUG__
+
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &free_storage_alloc, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		#if BUCKETLOCK == SPINLOCK
+		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
+		header->kind.real.next = freeElem->freeList;	// push on stack
+		freeElem->freeList = (HeapManager.Storage *)header;
+		unlock( freeElem->lock );						// release spin lock
+		#else
+		push( freeElem->freeList, *(HeapManager.Storage *)header );
+		#endif // BUCKETLOCK
+	} // if
+
+	#ifdef __CFA_DEBUG__
+	__atomic_add_fetch( &allocUnfreed, -size, __ATOMIC_SEQ_CST );
+	if ( traceHeap() ) {
+		char helpText[64];
+		int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
+		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+	} // if
+	#endif // __CFA_DEBUG__
+} // doFree
+
+
+size_t prtFree( HeapManager & manager ) with( manager ) {
+	size_t total = 0;
+	#ifdef __STATISTICS__
+	__cfaabi_bits_acquire();
+	__cfaabi_bits_print_nolock( STDERR_FILENO, "\nBin lists (bin size : free blocks on list)\n" );
+	#endif // __STATISTICS__
+	for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
+		size_t size = freeLists[i].blockSize;
+		#ifdef __STATISTICS__
+		unsigned int N = 0;
+		#endif // __STATISTICS__
+
+		#if BUCKETLOCK == SPINLOCK
+		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
+		#else
+			for(;;) {
+//		for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; p = (p)`next->top ) {
+//		for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; /* p = getNext( p )->top */) {
+//			HeapManager.Storage * temp = p->header.kind.real.next.top; // FIX ME: direct assignent fails, initialization works`
+//			typeof(p) temp = (( p )`next)->top;			// FIX ME: direct assignent fails, initialization works`
+//			p = temp;
+		#endif // BUCKETLOCK
+			total += size;
+			#ifdef __STATISTICS__
+			N += 1;
+			#endif // __STATISTICS__
+		} // for
+
+		#ifdef __STATISTICS__
+		__cfaabi_bits_print_nolock( STDERR_FILENO, "%7zu, %-7u  ", size, N );
+		if ( (i + 1) % 8 == 0 ) __cfaabi_bits_print_nolock( STDERR_FILENO, "\n" );
+		#endif // __STATISTICS__
+	} // for
+	#ifdef __STATISTICS__
+	__cfaabi_bits_print_nolock( STDERR_FILENO, "\ntotal free blocks:%zu\n", total );
+	__cfaabi_bits_release();
+	#endif // __STATISTICS__
+	return (char *)heapEnd - (char *)heapBegin - total;
+} // prtFree
+
+
+static void ?{}( HeapManager & manager ) with( manager ) {
+	__page_size = sysconf( _SC_PAGESIZE );
+	__map_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+
+	for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
+		freeLists[i].blockSize = bucketSizes[i];
+	} // for
+
+	#ifdef FASTLOOKUP
+	unsigned int idx = 0;
+	for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
+		if ( i > bucketSizes[idx] ) idx += 1;
+		lookup[i] = idx;
+	} // for
+	#endif // FASTLOOKUP
+
+	if ( ! setMmapStart( default_mmap_start() ) ) {
+		abort( "HeapManager : internal error, mmap start initialization failure." );
+	} // if
+	heapExpand = default_heap_expansion();
+
+	char * end = (char *)sbrk( 0 );
+	heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, __page_size ) - end ); // move start of heap to multiple of alignment
+} // HeapManager
+
+
+static void ^?{}( HeapManager & ) {
+	#ifdef __STATISTICS__
+	if ( traceHeapTerm() ) {
+		printStats();
+		// prtUnfreed() called in heapAppStop()
+	} // if
+	#endif // __STATISTICS__
+} // ~HeapManager
+
+
+static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
+void memory_startup( void ) {
+	#ifdef __CFA_DEBUG__
+	if ( heapBoot ) {									// check for recursion during system boot
+		abort( "boot() : internal error, recursively invoked during system boot." );
+	} // if
+	heapBoot = true;
+	#endif // __CFA_DEBUG__
+
+	//verify( heapManager.heapBegin != 0 );
+	//heapManager{};
+	if ( heapManager.heapBegin == 0p ) heapManager{};	// sanity check
+} // memory_startup
+
+static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
+void memory_shutdown( void ) {
+	^heapManager{};
+} // memory_shutdown
+
+
+static inline void * mallocNoStats( size_t size ) {		// necessary for malloc statistics
+	verify( heapManager.heapBegin != 0p );				// called before memory_startup ?
+  if ( unlikely( size ) == 0 ) return 0p;				// 0 BYTE ALLOCATION RETURNS NULL POINTER
+
+#if __SIZEOF_POINTER__ == 8
+	verify( size < ((typeof(size_t))1 << 48) );
+#endif // __SIZEOF_POINTER__ == 8
+	return doMalloc( size );
+} // mallocNoStats
+
+
+static inline void * memalignNoStats( size_t alignment, size_t size ) {
+  if ( unlikely( size ) == 0 ) return 0p;				// 0 BYTE ALLOCATION RETURNS NULL POINTER
+
+	#ifdef __CFA_DEBUG__
+	checkAlign( alignment );							// check alignment
+	#endif // __CFA_DEBUG__
+
+	// if alignment <= default alignment, do normal malloc as two headers are unnecessary
+  if ( unlikely( alignment <= libAlign() ) ) return mallocNoStats( size );
+
+	// Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
+	// administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
+	//      .-------------v-----------------v----------------v----------,
+	//      | Real Header | ... padding ... |   Fake Header  | data ... |
+	//      `-------------^-----------------^-+--------------^----------'
+	//      |<--------------------------------' offset/align |<-- alignment boundary
+
+	// subtract libAlign() because it is already the minimum alignment
+	// add sizeof(Storage) for fake header
+	char * addr = (char *)mallocNoStats( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
+
+	// address in the block of the "next" alignment address
+	char * user = (char *)ceiling2( (uintptr_t)(addr + sizeof(HeapManager.Storage)), alignment );
+
+	// address of header from malloc
+	HeapManager.Storage.Header * realHeader = headerAddr( addr );
+	realHeader->kind.real.size = size;					// correct size to eliminate above alignment offset
+	// address of fake header * before* the alignment location
+	HeapManager.Storage.Header * fakeHeader = headerAddr( user );
+	// SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
+	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
+	// SKULLDUGGERY: odd alignment imples fake header
+	fakeHeader->kind.fake.alignment = alignment | 1;
+
+	return user;
+} // memalignNoStats
+
+
+extern "C" {
+	// Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
+	// then malloc() returns a unique pointer value that can later be successfully passed to free().
+	void * malloc( size_t size ) {
+		#ifdef __STATISTICS__
+		if ( likely( size > 0 ) ) {
+			__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &malloc_storage_request, size, __ATOMIC_SEQ_CST );
+		} else {
+			__atomic_add_fetch( &malloc_0_calls, 1, __ATOMIC_SEQ_CST );
+		} // if
+		#endif // __STATISTICS__
+
+		return mallocNoStats( size );
+	} // malloc
+
+
+	// Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
+	void * aalloc( size_t dim, size_t elemSize ) {
+		size_t size = dim * elemSize;
+		#ifdef __STATISTICS__
+		if ( likely( size > 0 ) ) {
+			__atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &aalloc_storage_request, size, __ATOMIC_SEQ_CST );
+		} else {
+			__atomic_add_fetch( &aalloc_0_calls, 1, __ATOMIC_SEQ_CST );
+		} // if
+		#endif // __STATISTICS__
+
+		return mallocNoStats( size );
+	} // aalloc
+
+
+	// Same as aalloc() with memory set to zero.
+	void * calloc( size_t dim, size_t elemSize ) {
+		size_t size = dim * elemSize;
+	  if ( unlikely( size ) == 0 ) {			// 0 BYTE ALLOCATION RETURNS NULL POINTER
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &calloc_0_calls, 1, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+			return 0p;
+		} // if
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &calloc_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		char * addr = (char *)mallocNoStats( size );
+
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t bsize, alignment;
+
+		#ifndef __CFA_DEBUG__
+		bool mapped =
+			#endif // __CFA_DEBUG__
+			headers( "calloc", addr, header, freeElem, bsize, alignment );
+
+		#ifndef __CFA_DEBUG__
+		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
+		if ( ! mapped )
+		#endif // __CFA_DEBUG__
+			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
+			// `-header`-addr                      `-size
+			memset( addr, '\0', size );					// set to zeros
+
+		header->kind.real.blockSize |= 2;				// mark as zero filled
+		return addr;
+	} // calloc
+
+
+	// Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
+	// 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
+	// not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
+	// call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
+	void * resize( void * oaddr, size_t size ) {
+		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+	  if ( unlikely( size == 0 ) ) {					// special cases
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+			free( oaddr );
+			return 0p;
+		} // if
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+	  if ( unlikely( oaddr == 0p ) ) {
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+			return mallocNoStats( size );
+		} // if
+
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t bsize, oalign;
+		headers( "resize", oaddr, header, freeElem, bsize, oalign );
+
+		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+		// same size, DO NOT preserve STICKY PROPERTIES.
+		if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
+			header->kind.real.blockSize &= -2;			// no alignment and turn off 0 fill
+			header->kind.real.size = size;				// reset allocation size
+			return oaddr;
+		} // if
+
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		// change size, DO NOT preserve STICKY PROPERTIES.
+		free( oaddr );
+		return mallocNoStats( size );					// create new area
+	} // resize
+
+
+	// Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
+	// the old and new sizes.
+	void * realloc( void * oaddr, size_t size ) {
+		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+	  if ( unlikely( size == 0 ) ) {					// special cases
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+			free( oaddr );
+			return 0p;
+		} // if
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+	  if ( unlikely( oaddr == 0p ) ) {
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+			return mallocNoStats( size );
+		} // if
+
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t bsize, oalign;
+		headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+
+		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+		size_t osize = header->kind.real.size;			// old allocation size
+		bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
+	  if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
+	  		header->kind.real.size = size;				// reset allocation size
+	  		if ( unlikely( ozfill ) && size > osize ) {	// previous request zero fill and larger ?
+	  			memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage
+	  		} // if
+			return oaddr;
+		} // if
+
+		#ifdef __STATISTICS__
+	  	__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		// change size and copy old content to new storage
+
+		void * naddr;
+		if ( likely( oalign == libAlign() ) ) {			// previous request not aligned ?
+			naddr = mallocNoStats( size );				// create new area
+		} else {
+			naddr = memalignNoStats( oalign, size );	// create new aligned area
+		} // if
+
+		headers( "realloc", naddr, header, freeElem, bsize, oalign );
+		memcpy( naddr, oaddr, min( osize, size ) );		// copy bytes
+		free( oaddr );
+
+		if ( unlikely( ozfill ) ) {						// previous request zero fill ?
+			header->kind.real.blockSize |= 2;			// mark new request as zero filled
+			if ( size > osize ) {						// previous request larger ?
+				memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
+			} // if
+		} // if
+		return naddr;
+	} // realloc
+
+
+	// Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
+	void * memalign( size_t alignment, size_t size ) {
+		#ifdef __STATISTICS__
+		if ( likely( size > 0 ) ) {
+			__atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &memalign_storage_request, size, __ATOMIC_SEQ_CST );
+		} else {
+			__atomic_add_fetch( &memalign_0_calls, 1, __ATOMIC_SEQ_CST );
+		} // if
+		#endif // __STATISTICS__
+
+		return memalignNoStats( alignment, size );
+	} // memalign
+
+
+	// Same as aalloc() with memory alignment.
+	void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
+		size_t size = dim * elemSize;
+		#ifdef __STATISTICS__
+		if ( likely( size > 0 ) ) {
+			__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &cmemalign_storage_request, size, __ATOMIC_SEQ_CST );
+		} else {
+			__atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
+		} // if
+		#endif // __STATISTICS__
+
+		return memalignNoStats( alignment, size );
+	} // amemalign
+
+
+	// Same as calloc() with memory alignment.
+	void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
+		size_t size = dim * elemSize;
+	  if ( unlikely( size ) == 0 ) {					// 0 BYTE ALLOCATION RETURNS NULL POINTER
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+			return 0p;
+		} // if
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &cmemalign_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		char * addr = (char *)memalignNoStats( alignment, size );
+
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t bsize;
+
+		#ifndef __CFA_DEBUG__
+		bool mapped =
+			#endif // __CFA_DEBUG__
+			headers( "cmemalign", addr, header, freeElem, bsize, alignment );
+
+		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
+		#ifndef __CFA_DEBUG__
+		if ( ! mapped )
+		#endif // __CFA_DEBUG__
+			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
+			// `-header`-addr                      `-size
+			memset( addr, '\0', size );					// set to zeros
+
+		header->kind.real.blockSize |= 2;				// mark as zero filled
+		return addr;
+	} // cmemalign
+
+
+	// Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
+    // of alignment. This requirement is universally ignored.
+	void * aligned_alloc( size_t alignment, size_t size ) {
+		return memalign( alignment, size );
+	} // aligned_alloc
+
+
+	// Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
+	// memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
+	// is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to
+	// free(3).
+	int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
+	  if ( alignment < libAlign() || ! is_pow2( alignment ) ) return EINVAL; // check alignment
+		* memptr = memalign( alignment, size );
+		return 0;
+	} // posix_memalign
+
+
+	// Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
+	// page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
+	void * valloc( size_t size ) {
+		return memalign( __page_size, size );
+	} // valloc
+
+
+	// Same as valloc but rounds size to multiple of page size.
+	void * pvalloc( size_t size ) {
+		return memalign( __page_size, ceiling2( size, __page_size ) );
+	} // pvalloc
+
+
+	// Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
+	// or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
+	// 0p, no operation is performed.
+	void free( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) {					// special case
+			#ifdef __STATISTICS__
+			__atomic_add_fetch( &free_null_calls, 1, __ATOMIC_SEQ_CST );
+			#endif // __STATISTICS__
+
+			// #ifdef __CFA_DEBUG__
+			// if ( traceHeap() ) {
+			// 	#define nullmsg "Free( 0x0 ) size:0\n"
+			// 	// Do not debug print free( 0p ), as it can cause recursive entry from sprintf.
+			// 	__cfaabi_dbg_write( nullmsg, sizeof(nullmsg) - 1 );
+			// } // if
+			// #endif // __CFA_DEBUG__
+			return;
+		} // exit
+
+		doFree( addr );
+	} // free
+
+
+	// Returns the alignment of an allocation.
+	size_t malloc_alignment( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) return libAlign();	// minimum alignment
+		HeapManager.Storage.Header * header = headerAddr( addr );
+		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
+			return header->kind.fake.alignment & -2;	// remove flag from value
+		} else {
+			return libAlign();							// minimum alignment
+		} // if
+	} // malloc_alignment
+
+
+	// Set the alignment for an the allocation and return previous alignment or 0 if no alignment.
+	size_t malloc_alignment_set$( void * addr, size_t alignment ) {
+	  if ( unlikely( addr == 0p ) ) return libAlign();	// minimum alignment
+		size_t ret;
+		HeapManager.Storage.Header * header = headerAddr( addr );
+		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
+			ret = header->kind.fake.alignment & -2;		// remove flag from old value
+			header->kind.fake.alignment = alignment | 1; // add flag to new value
+		} else {
+			ret = 0;									// => no alignment to change
+		} // if
+		return ret;
+	} // malloc_alignment_set$
+
+
+	// Returns true if the allocation is zero filled, e.g., allocated by calloc().
+	bool malloc_zero_fill( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) return false;		// null allocation is not zero fill
+		HeapManager.Storage.Header * header = headerAddr( addr );
+		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
+			header = realHeader( header );				// backup from fake to real header
+		} // if
+		return (header->kind.real.blockSize & 2) != 0;	// zero filled ?
+	} // malloc_zero_fill
+
+	// Set allocation is zero filled and return previous zero filled.
+	bool malloc_zero_fill_set$( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) return false;		// null allocation is not zero fill
+		HeapManager.Storage.Header * header = headerAddr( addr );
+		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
+			header = realHeader( header );				// backup from fake to real header
+		} // if
+		bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ?
+		header->kind.real.blockSize |= 2;				// mark as zero filled
+		return ret;
+	} // malloc_zero_fill_set$
+
+
+	// Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T).
+	size_t malloc_size( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has zero size
+		HeapManager.Storage.Header * header = headerAddr( addr );
+		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
+			header = realHeader( header );				// backup from fake to real header
+		} // if
+		return header->kind.real.size;
+	} // malloc_size
+
+	// Set allocation size and return previous size.
+	size_t malloc_size_set$( void * addr, size_t size ) {
+	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has 0 size
+		HeapManager.Storage.Header * header = headerAddr( addr );
+		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
+			header = realHeader( header );				// backup from fake to real header
+		} // if
+		size_t ret = header->kind.real.size;
+		header->kind.real.size = size;
+		return ret;
+	} // malloc_size_set$
+
+
+	// Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
+	// malloc or a related function.
+	size_t malloc_usable_size( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has 0 size
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t bsize, alignment;
+
+		headers( "malloc_usable_size", addr, header, freeElem, bsize, alignment );
+		return dataStorage( bsize, addr, header );		// data storage in bucket
+	} // malloc_usable_size
+
+
+	// Prints (on default standard error) statistics about memory allocated by malloc and related functions.
+	void malloc_stats( void ) {
+		#ifdef __STATISTICS__
+		printStats();
+		if ( prtFree() ) prtFree( heapManager );
+		#endif // __STATISTICS__
+	} // malloc_stats
+
+
+	// Changes the file descripter where malloc_stats() writes statistics.
+	int malloc_stats_fd( int fd __attribute__(( unused )) ) {
+		#ifdef __STATISTICS__
+		int temp = stats_fd;
+		stats_fd = fd;
+		return temp;
+		#else
+		return -1;
+		#endif // __STATISTICS__
+	} // malloc_stats_fd
+
+
+	// Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
+	// specifies the parameter to be modified, and value specifies the new value for that parameter.
+	int mallopt( int option, int value ) {
+		choose( option ) {
+		  case M_TOP_PAD:
+			heapExpand = ceiling2( value, __page_size ); return 1;
+		  case M_MMAP_THRESHOLD:
+			if ( setMmapStart( value ) ) return 1;
+			break;
+		} // switch
+		return 0;										// error, unsupported
+	} // mallopt
+
+
+	// Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
+	int malloc_trim( size_t ) {
+		return 0;										// => impossible to release memory
+	} // malloc_trim
+
+
+	// Exports an XML string that describes the current state of the memory-allocation implementation in the caller.
+	// The string is printed on the file stream stream.  The exported string includes information about all arenas (see
+	// malloc).
+	int malloc_info( int options, FILE * stream __attribute__(( unused )) ) {
+	  if ( options != 0 ) { errno = EINVAL; return -1; }
+		#ifdef __STATISTICS__
+		return printStatsXML( stream );
+		#else
+		return 0;										// unsupported
+		#endif // __STATISTICS__
+	} // malloc_info
+
+
+	// Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
+	// or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
+	// structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
+	// result.  (The caller must free this memory.)
+	void * malloc_get_state( void ) {
+		return 0p;										// unsupported
+	} // malloc_get_state
+
+
+	// Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
+	// structure pointed to by state.
+	int malloc_set_state( void * ) {
+		return 0;										// unsupported
+	} // malloc_set_state
+} // extern "C"
+
+
+// Must have CFA linkage to overload with C linkage realloc.
+void * resize( void * oaddr, size_t nalign, size_t size ) {
+	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( unlikely( size == 0 ) ) {						// special cases
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		free( oaddr );
+		return 0p;
+	} // if
+
+	if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
+	#ifdef __CFA_DEBUG__
+	else checkAlign( nalign );							// check alignment
+	#endif // __CFA_DEBUG__
+
+  if ( unlikely( oaddr == 0p ) ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		return memalignNoStats( nalign, size );
+	} // if
+
+	// Attempt to reuse existing alignment.
+	HeapManager.Storage.Header * header = headerAddr( oaddr );
+	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
+	size_t oalign;
+	if ( isFakeHeader ) {
+		oalign = header->kind.fake.alignment & -2;		// old alignment
+		if ( (uintptr_t)oaddr % nalign == 0				// lucky match ?
+			 && ( oalign <= nalign						// going down
+				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
+			) {
+			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+			HeapManager.FreeHeader * freeElem;
+			size_t bsize, oalign;
+			headers( "resize", oaddr, header, freeElem, bsize, oalign );
+			size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+
+			if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
+				headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+
+				header->kind.real.blockSize &= -2;		// turn off 0 fill
+				header->kind.real.size = size;			// reset allocation size
+				return oaddr;
+			} // if
+		} // if
+	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
+				&& nalign == libAlign() ) {				// new alignment also on libAlign => no fake header needed
+		return resize( oaddr, size );					// duplicate special case checks
+	} // if
+
+	#ifdef __STATISTICS__
+	__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
+	#endif // __STATISTICS__
+
+	// change size, DO NOT preserve STICKY PROPERTIES.
+	free( oaddr );
+	return memalignNoStats( nalign, size );				// create new aligned area
+} // resize
+
+
+void * realloc( void * oaddr, size_t nalign, size_t size ) {
+	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( unlikely( size == 0 ) ) {						// special cases
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		free( oaddr );
+		return 0p;
+	} // if
+
+	if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
+	#ifdef __CFA_DEBUG__
+	else checkAlign( nalign );							// check alignment
+	#endif // __CFA_DEBUG__
+
+  if ( unlikely( oaddr == 0p ) ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		return memalignNoStats( nalign, size );
+	} // if
+
+	// Attempt to reuse existing alignment.
+	HeapManager.Storage.Header * header = headerAddr( oaddr );
+	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
+	size_t oalign;
+	if ( isFakeHeader ) {
+		oalign = header->kind.fake.alignment & -2;		// old alignment
+		if ( (uintptr_t)oaddr % nalign == 0				// lucky match ?
+			 && ( oalign <= nalign						// going down
+				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
+			) {
+			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+			return realloc( oaddr, size );				// duplicate alignment and special case checks
+		} // if
+	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
+				&& nalign == libAlign() )				// new alignment also on libAlign => no fake header needed
+		return realloc( oaddr, size );					// duplicate alignment and special case checks
+
+	#ifdef __STATISTICS__
+	__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+	__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
+	#endif // __STATISTICS__
+
+	HeapManager.FreeHeader * freeElem;
+	size_t bsize;
+	headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+
+	// change size and copy old content to new storage
+
+	size_t osize = header->kind.real.size;				// old allocation size
+	bool ozfill = (header->kind.real.blockSize & 2);	// old allocation zero filled
+
+	void * naddr = memalignNoStats( nalign, size );		// create new aligned area
+
+	headers( "realloc", naddr, header, freeElem, bsize, oalign );
+	memcpy( naddr, oaddr, min( osize, size ) );			// copy bytes
+	free( oaddr );
+
+	if ( unlikely( ozfill ) ) {							// previous request zero fill ?
+		header->kind.real.blockSize |= 2;				// mark new request as zero filled
+		if ( size > osize ) {							// previous request larger ?
+			memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
+		} // if
+	} // if
+	return naddr;
+} // realloc
+
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa -nodebug -O2 heap.cfa" //
+// End: //
Index: bcfa/src/heap.h
===================================================================
--- libcfa/src/heap.h	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ 	(revision )
@@ -1,51 +1,0 @@
-#pragma once
-
-#include <stdlib.h>
-#include <malloc.h>
-
-// supported mallopt options
-#ifndef M_MMAP_THRESHOLD
-#define M_MMAP_THRESHOLD (-1)
-#endif // M_MMAP_THRESHOLD
-
-#ifndef M_TOP_PAD
-#define M_TOP_PAD (-2)
-#endif // M_TOP_PAD
-
-extern "C" {
-	void * malloc( size_t size );
-	void * aalloc( size_t dim, size_t elemSize );
-	void * calloc( size_t dim, size_t elemSize );
-	void * resize( void * oaddr, size_t size );
-	void * realloc( void * oaddr, size_t size );
-	void * memalign( size_t alignment, size_t size );
-	void * amemalign( size_t align, size_t dim, size_t elemSize );
-	void * cmemalign( size_t align, size_t dim, size_t elemSize );
-	void * valloc( size_t size );
-	void * pvalloc( size_t size );
-	void free( void * addr );
-	size_t malloc_alignment( void * addr );
-	bool malloc_zero_fill( void * addr );
-	size_t malloc_size( void * addr );
-	size_t malloc_usable_size( void * addr );
-	int malloc_stats_fd( int fd );
-	int malloc_info( int options, FILE * stream );
-	int mallopt( int param_number, int value );
-	size_t malloc_expansion() __attribute__((weak));	// heap expansion size (bytes)
-	size_t malloc_mmap_start() __attribute__((weak));	// crossover allocation size from sbrk to mmap
-	size_t malloc_unfreed() __attribute__((weak));		// heap unfreed size (bytes)
-} // extern "C"
-
-#ifdef __cforall
-void * resize( void * oaddr, size_t alignment, size_t size );
-void * realloc( void * oaddr, size_t alignment, size_t size );
-#else
-extern "C" {
-	void * _X6resizeFPv_Pvmm__1( void * oaddr, size_t alignment, size_t size );
-	void * _X7reallocFPv_Pvmm__1( void * oaddr, size_t alignment, size_t size );
-}
-#endif
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: libcfa/src/heap.hfa
===================================================================
--- libcfa/src/heap.hfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
+++ libcfa/src/heap.hfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -0,0 +1,59 @@
+// 
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+// 
+// heap.hfa -- 
+// 
+// Author           : Peter A. Buhr
+// Created On       : Tue May 26 11:23:55 2020
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Sat Aug  8 17:36:48 2020
+// Update Count     : 16
+// 
+
+#pragma once
+
+size_t default_mmap_start();							// CFA extras
+size_t default_heap_expansion();
+
+bool traceHeap();
+bool traceHeapOn();
+bool traceHeapOff();
+
+bool traceHeapTerm();
+bool traceHeapTermOn();
+bool traceHeapTermOff();
+
+bool checkFree();
+bool checkFreeOn();
+bool checkFreeOff();
+
+// supported mallopt options
+#ifndef M_MMAP_THRESHOLD
+#define M_MMAP_THRESHOLD (-1)
+#endif // M_TOP_PAD
+#ifndef M_TOP_PAD
+#define M_TOP_PAD (-2)
+#endif // M_TOP_PAD
+
+extern "C" {
+	void * aalloc( size_t dim, size_t elemSize );
+	void * resize( void * oaddr, size_t size );
+	void * amemalign( size_t align, size_t dim, size_t elemSize );
+	void * cmemalign( size_t align, size_t dim, size_t elemSize );
+	size_t malloc_alignment( void * addr );
+	bool malloc_zero_fill( void * addr );
+	size_t malloc_size( void * addr );
+	size_t malloc_usable_size( void * addr );
+	int malloc_stats_fd( int fd );
+} // extern "C"
+
+void * resize( void * oaddr, size_t nalign, size_t size );
+void * realloc( void * oaddr, size_t nalign, size_t size );
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ libcfa/src/startup.cfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -27,16 +27,16 @@
 	void __cfaabi_appready_startup( void ) {
 		tzset();										// initialize time global variables
-		#ifdef __CFA_DEBUG__FIXME
+		#ifdef __CFA_DEBUG__
 		extern void heapAppStart();
 		heapAppStart();
-		#endif // __CFA_DEBUG__FIXME
+		#endif // __CFA_DEBUG__
 	} // __cfaabi_appready_startup
 
 	void __cfaabi_appready_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_APPREADY ) ));
 	void __cfaabi_appready_shutdown( void ) {
-		#ifdef __CFA_DEBUG__FIXME
+		#ifdef __CFA_DEBUG__
 		extern void heapAppStop();
 		heapAppStop();
-		#endif // __CFA_DEBUG__FIXME
+		#endif // __CFA_DEBUG__
 	} // __cfaabi_appready_shutdown
 
Index: libcfa/src/stdhdr/malloc.h
===================================================================
--- libcfa/src/stdhdr/malloc.h	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ libcfa/src/stdhdr/malloc.h	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -18,5 +18,5 @@
 } // extern "C"
 
-#include <heap.h>
+#include <heap.hfa>
 
 // Local Variables: //
Index: libcfa/src/stdlib.hfa
===================================================================
--- libcfa/src/stdlib.hfa	(revision f69fac7faef547c274fcfbc767d8c423a7f3a207)
+++ libcfa/src/stdlib.hfa	(revision a182ad5ee947c0c1f30075a88713688a0acd0307)
@@ -21,5 +21,5 @@
 
 #include <stdlib.h>										// *alloc, strto*, ato*
-#include <heap.h>
+#include <heap.hfa>
 
 
