Index: src/libcfa/heap.c
===================================================================
--- src/libcfa/heap.c	(revision c4f68dcd3fea60da49bcba686a6f6a3b92f92e2f)
+++ src/libcfa/heap.c	(revision c4f68dcd3fea60da49bcba686a6f6a3b92f92e2f)
@@ -0,0 +1,897 @@
+// 
+// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+// 
+// heap.c -- 
+// 
+// Author           : Peter A. Buhr
+// Created On       : Tue Dec 19 21:58:35 2017
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Jul 24 08:57:22 2018
+// Update Count     : 388
+// 
+
+#include <unistd.h>										// sbrk, sysconf
+#include <stdbool.h>									// true, false
+#include <stdio.h>										// snprintf, fileno
+#include <errno.h>										// errno
+extern "C" {
+#include <sys/mman.h>									// mmap, munmap
+} // extern "C"
+
+#include "bits/align.h"									// libPow2
+#include "bits/defs.h"									// likely, unlikely
+#include "bits/locks.h"									// __spinlock_t
+#include "stdlib"										// bsearchl
+#include "malloc.h"
+
+
+enum {
+	__CFA_DEFAULT_MMAP_START__ = (512 * 1024 + 1),
+	__CFA_DEFAULT_HEAP_EXPANSION__ = (1 * 1024 * 1024),
+};
+
+size_t default_mmap_start() __attribute__(( weak )) {
+    return __CFA_DEFAULT_MMAP_START__;
+} // default_mmap_start
+
+size_t default_heap_expansion() __attribute__(( weak )) {
+    return __CFA_DEFAULT_HEAP_EXPANSION__;
+} // default_heap_expansion
+
+
+// supported mallopt options
+#ifndef M_MMAP_THRESHOLD
+#define M_MMAP_THRESHOLD (-1)
+#endif // M_TOP_PAD
+#ifndef M_TOP_PAD
+#define M_TOP_PAD (-2)
+#endif // M_TOP_PAD
+
+#define FASTLOOKUP
+#define __STATISTICS__
+
+#define SPINLOCK 0
+#define LOCKFREE 1
+#define BUCKETLOCK SPINLOCK
+#if BUCKETLOCK == LOCKFREE
+#include <uStackLF.h>
+#endif // LOCKFREE
+
+#define ALIGN 16
+
+// enum { NoBucketSizes = 93,								// number of buckets sizes
+// #ifdef FASTLOOKUP
+// 	   LookupSizes = 65536,								// number of fast lookup sizes
+// #endif // FASTLOOKUP
+// };
+#define NoBucketSizes 93								// number of buckets sizes
+#ifdef FASTLOOKUP
+#define LookupSizes 65536								// number of fast lookup sizes
+#endif // FASTLOOKUP
+
+
+struct HeapManager {
+//	struct FreeHeader;									// forward declaration
+
+	struct Storage {
+	    struct Header {									// header
+			union Kind {
+				struct RealHeader {
+					union {
+						struct {						// 32-bit word => 64-bit header, 64-bit word => 128-bit header
+							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32
+							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __U_WORDSIZE__ == 32 && __U_WORDSIZE__ == 32
+
+							union {
+//								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
+								void * home;			// allocated block points back to home locations (must overlay alignment)
+								size_t blockSize;		// size for munmap (must overlay alignment)
+								#if BUCKLOCK == SPINLOCK
+								Storage * next;			// freed block points next freed block of same size
+								#endif // SPINLOCK
+							};
+
+							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32
+							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __U_WORDSIZE__ == 32 && __U_WORDSIZE__ == 32
+
+						};
+						#if BUCKLOCK == LOCKFREE
+						Stack<Storage>::Link next;		// freed block points next freed block of same size (double-wide)
+						#endif // LOCKFREE
+					};
+				} real;
+				struct FakeHeader {
+					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
+					#endif // __BYTE_ORDER__
+
+					uint32_t offset;
+
+					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
+					#endif // __BYTE_ORDER__
+				} fake;
+			} kind;
+	    } header; // Header
+	    char pad[ALIGN - sizeof( Header )];
+	    char data[0];									// storage
+	}; // Storage
+
+	static_assert( ALIGN >= sizeof( Storage ), "ALIGN < sizeof( Storage )" );
+
+	struct FreeHeader {
+		#if BUCKLOCK == SPINLOCK
+	    __spinlock_t lock;								// must be first field for alignment
+	    Storage * freeList;
+		#elif BUCKLOCK == LOCKFREE
+	    StackLF<Storage> freeList;
+		#else
+			#error undefined lock type for bucket lock
+		#endif // SPINLOCK
+	    size_t blockSize;								// size of allocations on this list
+	}; // FreeHeader
+
+	// must be first fields for alignment
+	__spinlock_t extlock;								// protects allocation-buffer extension
+	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
+
+	void * heapBegin;									// start of heap
+	void * heapEnd;										// logical end of heap
+	size_t heapRemaining;								// amount of storage not allocated in the current chunk
+}; // HeapManager
+
+#ifdef __CFA_DEBUG__
+static _Bool heapBoot = 0;								// detect recursion during boot
+#endif // __CFA_DEBUG__
+static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
+
+static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
+
+// statically allocated variables => zero filled.
+
+static size_t pageSize;									// architecture pagesize
+static size_t heapExpand;								// sbrk advance
+static size_t mmapStart;								// cross over point for mmap
+static unsigned int maxBucketsUsed;						// maximum number of buckets in use
+static unsigned int bucketSizes[NoBucketSizes] = {		// different bucket sizes
+    16, 32, 48, 64,
+    80, 96, 112, 128, 144, 160, 192, 224,
+    256, 320, 384, 448, 512, 640, 768, 896,
+    1024, 1536, 2048, 2560, 3072, 3584, 4096, 6144,
+    8192, 9216, 10240, 11264, 12288, 13312, 14336, 15360,
+    16384, 18432, 20480, 22528, 24576, 26624, 28672, 30720,
+    32768, 36864, 40960, 45056, 49152, 53248, 57344, 61440,
+    65536, 73728, 81920, 90112, 98304, 106496, 114688, 122880,
+    131072, 147456, 163840, 180224, 196608, 212992, 229376, 245760,
+    262144, 294912, 327680, 360448, 393216, 425984, 458752, 491520,
+    524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792,
+    1572864, 1703936, 1835008, 1966080, 2097152, 2621440, 3145728, 3670016,
+    4194304
+};
+#ifdef FASTLOOKUP
+static unsigned char lookup[LookupSizes];				// O(1) lookup for small sizes
+#endif // FASTLOOKUP
+static int mmapFd = -1;									// fake or actual fd for anonymous file
+#ifdef __CFA_DEBUG__
+static unsigned int allocfree;							// running total of allocations minus frees
+#endif // __CFA_DEBUG__
+
+
+#ifdef __STATISTICS__
+// Heap statistics
+static unsigned long long int mmap_storage;
+static unsigned int mmap_calls;
+static unsigned long long int munmap_storage;
+static unsigned int munmap_calls;
+static unsigned long long int sbrk_storage;
+static unsigned int sbrk_calls;
+static unsigned long long int malloc_storage;
+static unsigned int malloc_calls;
+static unsigned long long int free_storage;
+static unsigned int free_calls;
+static unsigned long long int calloc_storage;
+static unsigned int calloc_calls;
+static unsigned long long int memalign_storage;
+static unsigned int memalign_calls;
+static unsigned long long int cmemalign_storage;
+static unsigned int cmemalign_calls;
+static unsigned long long int realloc_storage;
+static unsigned int realloc_calls;
+static int statfd;
+
+
+// Use "write" because streams may be shutdown when calls are made.
+static void print() {
+    char helpText[512];
+    int len = snprintf( helpText, 512,
+						"\nHeap statistics:\n"
+						"  malloc: calls %u / storage %llu\n"
+						"  calloc: calls %u / storage %llu\n"
+						"  memalign: calls %u / storage %llu\n"
+						"  cmemalign: calls %u / storage %llu\n"
+						"  realloc: calls %u / storage %llu\n"
+						"  free: calls %u / storage %llu\n"
+						"  mmap: calls %u / storage %llu\n"
+						"  munmap: calls %u / storage %llu\n"
+						"  sbrk: calls %u / storage %llu\n",
+						malloc_calls, malloc_storage,
+						calloc_calls, calloc_storage,
+						memalign_calls, memalign_storage,
+						cmemalign_calls, cmemalign_storage,
+						realloc_calls, realloc_storage,
+						free_calls, free_storage,
+						mmap_calls, mmap_storage,
+						munmap_calls, munmap_storage,
+						sbrk_calls, sbrk_storage
+		);
+    write( statfd, helpText, len );
+} // print
+
+
+static int printXML( FILE * stream ) {
+    char helpText[512];
+    int len = snprintf( helpText, 512,
+						"<malloc version=\"1\">\n"
+						"<heap nr=\"0\">\n"
+						"<sizes>\n"
+						"</sizes>\n"
+						"<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"mmap\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"munmap\" count=\"%u\" size=\"%llu\"/>\n"
+						"<total type=\"sbrk\" count=\"%u\" size=\"%llu\"/>\n"
+						"</malloc>",
+						malloc_calls, malloc_storage,
+						calloc_calls, calloc_storage,
+						memalign_calls, memalign_storage,
+						cmemalign_calls, cmemalign_storage,
+						realloc_calls, realloc_storage,
+						free_calls, free_storage,
+						mmap_calls, mmap_storage,
+						munmap_calls, munmap_storage,
+						sbrk_calls, sbrk_storage
+		);
+    return write( fileno( stream ), helpText, len );	// -1 => error
+} // printXML
+#endif // __STATISTICS__
+
+
+static inline void noMemory() {
+    abort( "Heap memory exhausted at %zu bytes.\n"
+			"Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
+			((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
+} // noMemory
+
+
+static inline void checkAlign( size_t alignment ) {
+    if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) {
+		abort( "Alignment %zu for memory allocation is less than sizeof(void *) and/or not a power of 2.", alignment );
+    } // if
+} // checkAlign
+
+
+static inline _Bool setHeapExpand( size_t value ) {
+    if ( heapExpand < pageSize ) return true;
+    heapExpand = value;
+    return false;
+} // setHeapExpand
+
+
+static inline _Bool setMmapStart( size_t value ) {
+    if ( value < pageSize || bucketSizes[NoBucketSizes-1] < value ) return true;
+    mmapStart = value;									// set global
+
+    // find the closest bucket size less than or equal to the mmapStart size
+    maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
+    assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+    assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+    return false;
+} // setMmapStart
+
+
+static inline void checkHeader( _Bool check, const char * name, void * addr ) {
+    if ( unlikely( check ) ) {							// bad address ?
+		abort( "Attempt to %s storage %p with address outside the heap.\n"
+				"Possible cause is duplicate free on same block or overwriting of memory.",
+				name, addr );
+    } // if
+} // checkHeader
+
+static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & size, size_t & alignment ) {
+    if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+		size_t offset = header->kind.fake.offset;
+		alignment = header->kind.fake.alignment & -2;	// remove flag from value
+		#ifdef __CFA_DEBUG__
+		checkAlign( alignment );						// check alignment
+		#endif // __CFA_DEBUG__
+		header = (HeapManager.Storage.Header *)((char *)header - offset);
+    } // if
+} // fakeHeader
+
+
+#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
+
+static inline _Bool headers( const char * name, void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
+    header = headerAddr( addr );
+
+    if ( unlikely( heapEnd < addr ) ) {					// mmapped ?
+		fakeHeader( header, size, alignment );
+		size = header->kind.real.blockSize & -3;		// mmap size
+		return true;
+    } // if
+
+	#ifdef __CFA_DEBUG__
+    checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
+	#endif // __CFA_DEBUG__
+    // header may be safe to dereference
+    fakeHeader( header, size, alignment );
+	#ifdef __CFA_DEBUG__
+    checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
+	#endif // __CFA_DEBUG__
+
+    freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
+	#ifdef __CFA_DEBUG__
+    if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
+		abort( "Attempt to %s storage %p with corrupted header.\n"
+			   "Possible cause is duplicate free on same block or overwriting of header information.",
+			   name, addr );
+    } // if
+	#endif // __CFA_DEBUG__
+    size = freeElem->blockSize;
+    return false;
+} // headers
+
+
+static inline void * extend( size_t size ) with ( heapManager ) {
+    lock( extlock __cfaabi_dbg_ctx2 );
+    ptrdiff_t rem = heapRemaining - size;
+    if ( rem < 0 ) {
+		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
+
+		size_t increase = libCeiling( size > heapExpand ? size : heapExpand, libAlign() );
+		if ( sbrk( increase ) == (void *)-1 ) {
+			unlock( extlock );
+			errno = ENOMEM;
+			return 0;
+		} // if
+#ifdef __STATISTICS__
+		sbrk_calls += 1;
+		sbrk_storage += increase;
+#endif // __STATISTICS__
+#ifdef __CFA_DEBUG__
+		// Set new memory to garbage so subsequent uninitialized usages might fail.
+		memset( (char *)heapEnd + heapRemaining, '\377', increase );
+#endif // __CFA_DEBUG__
+		rem = heapRemaining + increase - size;
+    } // if
+
+    HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
+    heapRemaining = rem;
+    heapEnd = (char *)heapEnd + size;
+    unlock( extlock );
+    return block;
+} // extend
+
+
+static inline void * doMalloc( size_t size ) with ( heapManager ) {
+    HeapManager.Storage * block;
+
+    // Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
+    // along with the block and is a multiple of the alignment size.
+
+    size_t tsize = size + sizeof(HeapManager.Storage);
+    if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
+		HeapManager.FreeHeader * freeElem =
+			#ifdef FASTLOOKUP
+			tsize < LookupSizes ? &freeLists[lookup[tsize]] :
+			#endif // FASTLOOKUP
+			bsearchl( tsize, freeLists, (size_t)maxBucketsUsed ); // binary search
+		assert( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
+		assert( tsize <= freeElem->blockSize );			// search failure ?
+		tsize = freeElem->blockSize;					// total space needed for request
+
+		// Spin until the lock is acquired for this particular size of block.
+
+		#if defined( SPINLOCK )
+		lock( freeElem->lock __cfaabi_dbg_ctx2 );
+		block = freeElem->freeList;						// remove node from stack
+		#else
+		block = freeElem->freeList.pop();
+		#endif // SPINLOCK
+		if ( unlikely( block == 0 ) ) {					// no free block ?
+			#if defined( SPINLOCK )
+			unlock( freeElem->lock );
+			#endif // SPINLOCK
+			// Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
+			// and then carve it off.
+
+			block = (HeapManager.Storage *)extend( tsize );	// mutual exclusion on call
+			if ( unlikely( block == 0 ) ) return 0;
+			#if defined( SPINLOCK )
+		} else {
+			freeElem->freeList = block->header.kind.real.next;
+			unlock( freeElem->lock );
+			#endif // SPINLOCK
+		} // if
+
+		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
+    } else {											// large size => mmap
+		tsize = libCeiling( tsize, pageSize );			// must be multiple of page size
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		block = (HeapManager.Storage *)mmap( 0, tsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
+		if ( block == (HeapManager.Storage *)MAP_FAILED ) {
+			// Do not call strerror( errno ) as it may call malloc.
+			abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu error:%d.", &heapManager, tsize, errno );
+		} // if
+#ifdef __CFA_DEBUG__
+		// Set new memory to garbage so subsequent uninitialized usages might fail.
+		memset( block, '\377', tsize );
+#endif // __CFA_DEBUG__
+		block->header.kind.real.blockSize = tsize;		// storage size for munmap
+    } // if
+
+    void * area = &(block->data);						// adjust off header to user bytes
+
+	#ifdef __CFA_DEBUG__
+    assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ?
+    __atomic_add_fetch( &allocfree, tsize, __ATOMIC_SEQ_CST );
+//    if ( uHeapControl::traceHeap() ) {
+//		enum { BufferSize = 64 };
+//		char helpText[BufferSize];
+//		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
+		//int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
+//		uDebugWrite( STDERR_FILENO, helpText, len );
+//    } // if
+	#endif // __CFA_DEBUG__
+
+    return area;
+} // doMalloc
+
+
+static inline void doFree( void * addr ) with ( heapManager ) {
+	#ifdef __CFA_DEBUG__
+    if ( unlikely( heapManager.heapBegin == 0 ) ) {
+		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
+    } // if
+	#endif // __CFA_DEBUG__
+
+    HeapManager.Storage.Header * header;
+    HeapManager.FreeHeader * freeElem;
+    size_t size, alignment;								// not used (see realloc)
+
+    if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+		if ( munmap( header, size ) == -1 ) {
+			#ifdef __CFA_DEBUG__
+			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
+					"Possible cause is invalid pointer.",
+					addr );
+			#endif // __CFA_DEBUG__
+		} // if
+    } else {
+		#ifdef __CFA_DEBUG__
+		// Set free memory to garbage so subsequent usages might fail.
+		memset( ((HeapManager.Storage *)header)->data, '\377', freeElem->blockSize - sizeof( HeapManager.Storage ) );
+		#endif // __CFA_DEBUG__
+
+		#ifdef __STATISTICS__
+		free_storage += size;
+		#endif // __STATISTICS__
+		#if defined( SPINLOCK )
+		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
+		header->kind.real.next = freeElem->freeList;	// push on stack
+		freeElem->freeList = (HeapManager.Storage *)header;
+		unlock( freeElem->lock );						// release spin lock
+		#else
+		freeElem->freeList.push( *(HeapManager.Storage *)header );
+		#endif // SPINLOCK
+    } // if
+
+	#ifdef __CFA_DEBUG__
+    __atomic_add_fetch( &allocfree, -size, __ATOMIC_SEQ_CST );
+    // if ( uHeapControl::traceHeap() ) {
+	// 	enum { BufferSize = 64 };
+	// 	char helpText[BufferSize];
+	// 	int len = snprintf( helpText, BufferSize, "Free( %p ) size:%zu\n", addr, size );
+	// 	uDebugWrite( STDERR_FILENO, helpText, len );
+    // } // if
+	#endif // __CFA_DEBUG__
+} // doFree
+
+
+// size_t checkFree( _Bool prt ) {
+//     size_t total = 0;
+// #ifdef __STATISTICS__
+//     uDebugAcquire();
+//     if ( prt ) uDebugPrt2( "\nBin lists (bin size : free blocks on list)\n" );
+// #endif // __STATISTICS__
+//     for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
+// 	size_t size = freeLists[i].blockSize;
+// #ifdef __STATISTICS__
+// 	unsigned int N = 0;
+// #endif // __STATISTICS__
+// #if defined( SPINLOCK )
+// 	for ( Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) {
+// #else
+// 	    for ( Storage * p = freeLists[i].freeList.top(); p != 0; p = p->header.kind.real.next.top ) {
+// #endif // SPINLOCK
+// 		total += size;
+// #ifdef __STATISTICS__
+// 		N += 1;
+// #endif // __STATISTICS__
+// 	    } // for
+// #ifdef __STATISTICS__
+// 	    if ( prt ) uDebugPrt2( "%7zu, %-7u  ", size, N );
+// 	    if ( (i + 1) % 8 == 0 ) uDebugPrt2( "\n" );
+// #endif // __STATISTICS__
+// 	} // for
+// #ifdef __STATISTICS__
+// 	if ( prt ) uDebugPrt2( "\ntotal free blocks:%zu\n", total );
+// 	uDebugRelease();
+// #endif // __STATISTICS__
+// 	return (char *)heapEnd - (char *)heapBegin - total;
+//     } // for
+// } // checkFree
+
+
+static void ?{}( HeapManager & manager ) with ( manager ) {
+    pageSize = sysconf( _SC_PAGESIZE );
+    
+    for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
+		freeLists[i].blockSize = bucketSizes[i];
+    } // for
+
+	#ifdef FASTLOOKUP
+    unsigned int idx = 0;
+    for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
+		if ( i > bucketSizes[idx] ) idx += 1;
+		lookup[i] = idx;
+    } // for
+	#endif // FASTLOOKUP
+
+    if ( setMmapStart( default_mmap_start() ) ) {
+		abort( "HeapManager : internal error, mmap start initialization failure." );
+    } // if
+    heapExpand = default_heap_expansion();
+
+    char * End = (char *)sbrk( 0 );
+    sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
+    heapBegin = heapEnd = sbrk( 0 );					// get new start point
+} // HeapManager
+
+
+static void ^?{}( HeapManager & ) {
+	#ifdef __STATISTICS__
+	// if ( prtHeapterm ) {
+	// 	print();
+	// 	heapManager.checkFree( true );
+	// } // if
+	#endif // __STATISTICS__
+
+	#ifdef __CFA_DEBUG__
+    if ( allocfree != 0 ) {
+		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+		char helpText[512];
+		int len = snprintf( helpText, 512, "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
+							"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+							(long int)getpid(), allocfree, allocfree ); // always print the UNIX pid
+		__cfaabi_dbg_bits_write( helpText, len );
+    } // if
+	#endif // __CFA_DEBUG__
+} // ~HeapManager
+
+
+static inline void * malloc2( size_t size ) {			// necessary for malloc statistics
+    if ( unlikely( heapManager.heapBegin == 0 ) ) {
+		#ifdef __CFA_DEBUG__
+		if ( unlikely( heapBoot ) ) {					// check for recursion during system boot
+			// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+			abort( "boot() : internal error, recursively invoked during system boot." );
+		} // if
+		heapBoot = true;
+		#endif // __CFA_DEBUG__
+
+		heapManager{};
+	} // if
+
+    void * area = doMalloc( size );
+    if ( unlikely( area == 0 ) ) errno = ENOMEM;		// POSIX
+    return area;
+} // malloc2
+
+
+static inline void * memalign2( size_t alignment, size_t size ) { // necessary for malloc statistics
+#ifdef __CFA_DEBUG__
+    checkAlign( alignment );							// check alignment
+#endif // __CFA_DEBUG__
+
+    // if alignment <= default alignment, do normal malloc as two headers are unnecessary
+    if ( unlikely( alignment <= libAlign() ) ) return malloc2( size );
+
+    // Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
+    // administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
+    //      .-------------v-----------------v----------------v----------,
+    //      | Real Header | ... padding ... |   Fake Header  | data ... |
+    //      `-------------^-----------------^-+--------------^----------'
+    //      |<--------------------------------' offset/align |<-- alignment boundary
+
+    // subtract libAlign() because it is already the minimum alignment
+    // add sizeof(Storage) for fake header
+    char * area = (char *)doMalloc( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
+    if ( unlikely( area == 0 ) ) return area;
+
+    // address in the block of the "next" alignment address
+    char * user = (char *)libCeiling( (uintptr_t)(area + sizeof(HeapManager.Storage)), alignment );
+
+    // address of header from malloc
+    HeapManager.Storage.Header * realHeader = headerAddr( area );
+    // address of fake header * before* the alignment location
+    HeapManager.Storage.Header * fakeHeader = headerAddr( user );
+    // SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
+    fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
+    // SKULLDUGGERY: odd alignment imples fake header
+    fakeHeader->kind.fake.alignment = alignment | 1;
+
+    return user;
+} // memalign2
+
+
+extern "C" {
+    void * malloc( size_t size ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		return malloc2( size );
+    } // malloc
+
+
+    void * calloc( size_t noOfElems, size_t elemSize ) {
+		size_t size = noOfElems * elemSize;
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &calloc_storage, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		char * area = (char *)malloc2( size );
+		if ( unlikely( area == 0 ) ) return 0;
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t asize, alignment;
+		_Bool mapped __attribute__(( unused )) = headers( "calloc", area, header, freeElem, asize, alignment );
+		#ifndef __CFA_DEBUG__
+		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 
+		if ( ! mapped )
+		#endif // __CFA_DEBUG__
+			memset( area, '\0', asize - sizeof(HeapManager.Storage) ); // set to zeros
+		header->kind.real.blockSize |= 2;		// mark as zero filled
+		return area;
+    } // calloc
+
+
+    void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
+		size_t size = noOfElems * elemSize;
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		char * area = (char *)memalign2( alignment, size );
+		if ( unlikely( area == 0 ) ) return 0;
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t asize;
+		_Bool mapped __attribute__(( unused )) = headers( "cmemalign", area, header, freeElem, asize, alignment );
+		#ifndef __CFA_DEBUG__
+		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 
+		if ( ! mapped )
+		#endif // __CFA_DEBUG__
+			memset( area, '\0', asize - ( (char *)area - (char *)header ) ); // set to zeros
+		header->kind.real.blockSize |= 2;				// mark as zero filled
+
+		return area;
+    } // cmemalign
+
+
+    void * realloc( void * addr, size_t size ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		if ( unlikely( addr == 0 ) ) return malloc2( size ); // special cases
+		if ( unlikely( size == 0 ) ) { free( addr ); return 0; }
+
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t asize, alignment = 0;
+		headers( "realloc", addr, header, freeElem, asize, alignment );
+
+		size_t usize = asize - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
+		if ( usize >= size ) {							// already sufficient storage
+			// This case does not result in a new profiler entry because the previous one still exists and it must match with
+			// the free for this memory.  Hence, this realloc does not appear in the profiler output.
+			return addr;
+		} // if
+
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		void * area;
+		if ( unlikely( alignment != 0 ) ) {				// previous request memalign?
+			area = memalign( alignment, size );			// create new area
+		} else {
+			area = malloc2( size );	// create new area
+		} // if
+		if ( unlikely( area == 0 ) ) return 0;
+		if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill (calloc/cmemalign) ?
+			assert( (header->kind.real.blockSize & 1) == 0 );
+			_Bool mapped __attribute__(( unused )) = headers( "realloc", area, header, freeElem, asize, alignment );
+			#ifndef __CFA_DEBUG__
+			// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 
+			if ( ! mapped )
+			#endif // __CFA_DEBUG__
+				memset( (char *)area + usize, '\0', asize - ( (char *)area - (char *)header ) - usize ); // zero-fill back part
+			header->kind.real.blockSize |= 2;			// mark new request as zero fill
+		} // if
+		memcpy( area, addr, usize );					// copy bytes
+		free( addr );
+		return area;
+    } // realloc
+
+
+    void * memalign( size_t alignment, size_t size ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		void * area = memalign2( alignment, size );
+
+		return area;
+    } // memalign
+
+
+    void * aligned_alloc( size_t alignment, size_t size ) {
+		return memalign( alignment, size );
+    } // aligned_alloc
+
+
+    int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
+		if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
+		* memptr = memalign( alignment, size );
+		if ( unlikely( * memptr == 0 ) ) return ENOMEM;
+		return 0;
+    } // posix_memalign
+
+
+    void * valloc( size_t size ) {
+		return memalign( pageSize, size );
+    } // valloc
+
+
+    void free( void * addr ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		if ( unlikely( addr == 0 ) ) {					// special case
+// #ifdef __CFA_DEBUG__
+// 	    if ( uHeapControl::traceHeap() ) {
+// #		define nullmsg "Free( 0x0 ) size:0\n"
+// 		// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
+// 		uDebugWrite( STDERR_FILENO, nullmsg, sizeof(nullmsg) - 1 );
+// 	    } // if
+// #endif // __CFA_DEBUG__
+			return;
+		} // exit
+
+		doFree( addr );
+		// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
+    } // free
+
+    int mallopt( int option, int value ) {
+		choose( option ) {
+		  case M_TOP_PAD:
+			if ( setHeapExpand( value ) ) fallthru default;
+		  case M_MMAP_THRESHOLD:
+			if ( setMmapStart( value ) ) fallthru default;
+		  default:
+			return 1;									// success, or unsupported
+		} // switch
+		return 0;										// error
+    } // mallopt
+
+
+	int malloc_trim( size_t ) {
+		return 0;										// => impossible to release memory
+	} // malloc_trim
+
+    size_t malloc_usable_size( void * addr ) {
+		if ( unlikely( addr == 0 ) ) return 0;			// null allocation has 0 size
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t size, alignment;
+
+		headers( "malloc_usable_size", addr, header, freeElem, size, alignment );
+		size_t usize = size - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
+		return usize;
+    } // malloc_usable_size
+
+
+    size_t malloc_alignment( void * addr ) {
+		if ( unlikely( addr == 0 ) ) return libAlign();	// minimum alignment
+		HeapManager.Storage.Header * header = (HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) );
+		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
+			return header->kind.fake.alignment & -2;	// remove flag from value
+		} else {
+			return libAlign ();							// minimum alignment
+		} // if
+    } // malloc_alignment
+
+
+    _Bool malloc_zero_fill( void * addr ) {
+		if ( unlikely( addr == 0 ) ) return false;		// null allocation is not zero fill
+		HeapManager.Storage.Header * header = (HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) );
+		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
+			header = (HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset);
+		} // if
+		return (header->kind.real.blockSize & 2) != 0;	// zero filled (calloc/cmemalign) ?
+    } // malloc_zero_fill
+
+
+    void malloc_stats( void ) {
+		#ifdef __STATISTICS__
+		print();
+		// heapManager.checkFree( true );
+		#endif // __STATISTICS__
+    } // malloc_stats
+
+
+    int malloc_stats_fd( int fd ) {
+		#ifdef __STATISTICS__
+		int temp = statfd;
+		statfd = fd;
+		return temp;
+		#else
+		return -1;
+		#endif // __STATISTICS__
+    } // malloc_stats_fd
+
+
+	int malloc_info( int options, FILE * stream ) {
+		return printXML( stream );
+	} // malloc_info
+
+
+	void * malloc_get_state( void ) {
+		return 0;
+	} // malloc_get_state
+
+
+	int malloc_set_state( void * ptr ) {
+		return 0;
+	} // malloc_set_state
+} // extern "C"
+
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa -nodebug -O2 heap.c" //
+// End: //
