// 
// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
//
// The contents of this file are covered under the licence agreement in the
// file "LICENCE" distributed with Cforall.
// 
// heap.c -- 
// 
// Author           : Peter A. Buhr
// Created On       : Tue Dec 19 21:58:35 2017
// Last Modified By : Peter A. Buhr
// Last Modified On : Thu Jul 26 22:28:23 2018
// Update Count     : 449
// 

#include <unistd.h>										// sbrk, sysconf
#include <stdbool.h>									// true, false
#include <stdio.h>										// snprintf, fileno
#include <errno.h>										// errno
extern "C" {
#include <sys/mman.h>									// mmap, munmap
} // extern "C"

#include "bits/align.h"									// libPow2
#include "bits/defs.h"									// likely, unlikely
#include "bits/locks.h"									// __spinlock_t
#include "startup.h"									// STARTUP_PRIORITY_MEMORY
#include "stdlib"										// bsearchl
#include "malloc.h"


enum {
	__CFA_DEFAULT_MMAP_START__ = (512 * 1024 + 1),
	__CFA_DEFAULT_HEAP_EXPANSION__ = (1 * 1024 * 1024),
};

size_t default_mmap_start() __attribute__(( weak )) {
    return __CFA_DEFAULT_MMAP_START__;
} // default_mmap_start

size_t default_heap_expansion() __attribute__(( weak )) {
    return __CFA_DEFAULT_HEAP_EXPANSION__;
} // default_heap_expansion


// supported mallopt options
#ifndef M_MMAP_THRESHOLD
#define M_MMAP_THRESHOLD (-1)
#endif // M_TOP_PAD
#ifndef M_TOP_PAD
#define M_TOP_PAD (-2)
#endif // M_TOP_PAD

#define FASTLOOKUP
#define __STATISTICS__

#define SPINLOCK 0
#define LOCKFREE 1
#define BUCKETLOCK SPINLOCK
#if BUCKETLOCK == LOCKFREE
#include <uStackLF.h>
#endif // LOCKFREE

#define ALIGN 16

// enum { NoBucketSizes = 93,								// number of buckets sizes
// #ifdef FASTLOOKUP
// 	   LookupSizes = 65536,								// number of fast lookup sizes
// #endif // FASTLOOKUP
// };
#define NoBucketSizes 93								// number of buckets sizes
#ifdef FASTLOOKUP
#define LookupSizes 65536								// number of fast lookup sizes
#endif // FASTLOOKUP


static _Bool traceHeap = false;

inline _Bool traceHeap() {
	return traceHeap;
} // traceHeap

_Bool traceHeapOn() {
	_Bool temp = traceHeap;
	traceHeap = true;
	return temp;
} // traceHeapOn

_Bool traceHeapOff() {
	_Bool temp = traceHeap;
	traceHeap = false;
	return temp;
} // traceHeapOff


// static _Bool prtHeapTerm = false;

// inline _Bool prtHeapTerm() {
// 	return prtHeapTerm;
// } // prtHeapTerm

// _Bool prtHeapTermOn() {
// 	_Bool temp = traceHeap;
// 	traceHeap = true;
// 	return temp;
// } // prtHeapTermOn

// _Bool prtHeapTermOff() {
// 	_Bool temp = traceHeap;
// 	traceHeap = false;
// 	return temp;
// } // prtHeapTermOff


#ifdef __CFA_DEBUG__
static unsigned int allocfree;							// running total of allocations minus frees
static unsigned int appStart;							// storage allocation when application starts

static void checkUnfreed() {
	unsigned int total = allocfree - appStart;
    if ( total != 0 ) {
		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
		// char helpText[512];
		// int len = snprintf( helpText, 512, "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
		// 					"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
		// 					(long int)getpid(), total, total ); // always print the UNIX pid
		// __cfaabi_dbg_bits_write( helpText, len );
    } // if
} // checkUnfreed

extern "C" {
void heapAppStart() {									// called by __cfaabi_appready_startup
	appStart = allocfree;
} // heapAppStart

void heapAppStop() {									// called by __cfaabi_appready_startdown
	checkUnfreed();
} // heapAppStop
} // extern "C"
#endif // __CFA_DEBUG__


// statically allocated variables => zero filled.

static size_t pageSize;									// architecture pagesize
static size_t heapExpand;								// sbrk advance
static size_t mmapStart;								// cross over point for mmap
static unsigned int maxBucketsUsed;						// maximum number of buckets in use
static unsigned int bucketSizes[NoBucketSizes] = {		// different bucket sizes
    16, 32, 48, 64,
    80, 96, 112, 128, 144, 160, 192, 224,
    256, 320, 384, 448, 512, 640, 768, 896,
    1024, 1536, 2048, 2560, 3072, 3584, 4096, 6144,
    8192, 9216, 10240, 11264, 12288, 13312, 14336, 15360,
    16384, 18432, 20480, 22528, 24576, 26624, 28672, 30720,
    32768, 36864, 40960, 45056, 49152, 53248, 57344, 61440,
    65536, 73728, 81920, 90112, 98304, 106496, 114688, 122880,
    131072, 147456, 163840, 180224, 196608, 212992, 229376, 245760,
    262144, 294912, 327680, 360448, 393216, 425984, 458752, 491520,
    524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792,
    1572864, 1703936, 1835008, 1966080, 2097152, 2621440, 3145728, 3670016,
    4194304
};
#ifdef FASTLOOKUP
static unsigned char lookup[LookupSizes];				// O(1) lookup for small sizes
#endif // FASTLOOKUP
static int mmapFd = -1;									// fake or actual fd for anonymous file


struct HeapManager {
//	struct FreeHeader;									// forward declaration

	struct Storage {
	    struct Header {									// header
			union Kind {
				struct RealHeader {
					union {
						struct {						// 32-bit word => 64-bit header, 64-bit word => 128-bit header
							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
							#endif // __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32

							union {
//								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
								void * home;			// allocated block points back to home locations (must overlay alignment)
								size_t blockSize;		// size for munmap (must overlay alignment)
								#if BUCKLOCK == SPINLOCK
								Storage * next;			// freed block points next freed block of same size
								#endif // SPINLOCK
							};

							#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
							#endif // __ORDER_LITTLE_ENDIAN__ && __U_WORDSIZE__ == 32

						};
						#if BUCKLOCK == LOCKFREE
						Stack<Storage>::Link next;		// freed block points next freed block of same size (double-wide)
						#endif // LOCKFREE
					};
				} real;
				struct FakeHeader {
					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
					#endif // __ORDER_LITTLE_ENDIAN__

					uint32_t offset;

					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
					#endif // __ORDER_BIG_ENDIAN__
				} fake;
			} kind;
	    } header; // Header
	    char pad[ALIGN - sizeof( Header )];
	    char data[0];									// storage
	}; // Storage

	static_assert( ALIGN >= sizeof( Storage ), "ALIGN < sizeof( Storage )" );

	struct FreeHeader {
		#if BUCKLOCK == SPINLOCK
	    __spinlock_t lock;								// must be first field for alignment
	    Storage * freeList;
		#elif BUCKLOCK == LOCKFREE
	    StackLF<Storage> freeList;
		#else
			#error undefined lock type for bucket lock
		#endif // SPINLOCK
	    size_t blockSize;								// size of allocations on this list
	}; // FreeHeader

	// must be first fields for alignment
	__spinlock_t extlock;								// protects allocation-buffer extension
	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes

	void * heapBegin;									// start of heap
	void * heapEnd;										// logical end of heap
	size_t heapRemaining;								// amount of storage not allocated in the current chunk
}; // HeapManager


static inline _Bool setMmapStart( size_t value ) {
    if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
    mmapStart = value;									// set global

    // find the closest bucket size less than or equal to the mmapStart size
    maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
    assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
    assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
    return false;
} // setMmapStart


static void ?{}( HeapManager & manager ) with ( manager ) {
    pageSize = sysconf( _SC_PAGESIZE );
    
    for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
		freeLists[i].blockSize = bucketSizes[i];
    } // for

	#ifdef FASTLOOKUP
    unsigned int idx = 0;
    for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
		if ( i > bucketSizes[idx] ) idx += 1;
		lookup[i] = idx;
    } // for
	#endif // FASTLOOKUP

    if ( setMmapStart( default_mmap_start() ) ) {
		abort( "HeapManager : internal error, mmap start initialization failure." );
    } // if
    heapExpand = default_heap_expansion();

    char * End = (char *)sbrk( 0 );
    sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
    heapBegin = heapEnd = sbrk( 0 );					// get new start point
} // HeapManager


static void ^?{}( HeapManager & ) {
	#ifdef __STATISTICS__
	// if ( prtHeapTerm() ) {
	// 	printStats();
	// 	checkFree( heapManager, true );
	// } // if
	#endif // __STATISTICS__
} // ~HeapManager


#ifdef __CFA_DEBUG__
static _Bool heapBoot = 0;								// detect recursion during boot
#endif // __CFA_DEBUG__
static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing

static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
void memory_startup( void ) {
	#ifdef __CFA_DEBUG__
	if ( unlikely( heapBoot ) ) {					// check for recursion during system boot
		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
		abort( "boot() : internal error, recursively invoked during system boot." );
	} // if
	heapBoot = true;
	#endif // __CFA_DEBUG__

	assert( heapManager.heapBegin == 0 );
	heapManager{};
} // memory_startup

static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
void memory_shutdown( void ) {
	^heapManager{};
} // memory_shutdown

static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }


#ifdef __STATISTICS__
static unsigned long long int mmap_storage;				// heap statistics counters
static unsigned int mmap_calls;
static unsigned long long int munmap_storage;
static unsigned int munmap_calls;
static unsigned long long int sbrk_storage;
static unsigned int sbrk_calls;
static unsigned long long int malloc_storage;
static unsigned int malloc_calls;
static unsigned long long int free_storage;
static unsigned int free_calls;
static unsigned long long int calloc_storage;
static unsigned int calloc_calls;
static unsigned long long int memalign_storage;
static unsigned int memalign_calls;
static unsigned long long int cmemalign_storage;
static unsigned int cmemalign_calls;
static unsigned long long int realloc_storage;
static unsigned int realloc_calls;

static int statfd;										// statistics file descriptor (changed by malloc_stats_fd)


// Use "write" because streams may be shutdown when calls are made.
static void printStats() {
    char helpText[512];
    int len = snprintf( helpText, 512,
						"\nHeap statistics:\n"
						"  malloc: calls %u / storage %llu\n"
						"  calloc: calls %u / storage %llu\n"
						"  memalign: calls %u / storage %llu\n"
						"  cmemalign: calls %u / storage %llu\n"
						"  realloc: calls %u / storage %llu\n"
						"  free: calls %u / storage %llu\n"
						"  mmap: calls %u / storage %llu\n"
						"  munmap: calls %u / storage %llu\n"
						"  sbrk: calls %u / storage %llu\n",
						malloc_calls, malloc_storage,
						calloc_calls, calloc_storage,
						memalign_calls, memalign_storage,
						cmemalign_calls, cmemalign_storage,
						realloc_calls, realloc_storage,
						free_calls, free_storage,
						mmap_calls, mmap_storage,
						munmap_calls, munmap_storage,
						sbrk_calls, sbrk_storage
		);
    write( statfd, helpText, len );
} // printStats


static int printStatsXML( FILE * stream ) {
    char helpText[512];
    int len = snprintf( helpText, 512,
						"<malloc version=\"1\">\n"
						"<heap nr=\"0\">\n"
						"<sizes>\n"
						"</sizes>\n"
						"<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"mmap\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"munmap\" count=\"%u\" size=\"%llu\"/>\n"
						"<total type=\"sbrk\" count=\"%u\" size=\"%llu\"/>\n"
						"</malloc>",
						malloc_calls, malloc_storage,
						calloc_calls, calloc_storage,
						memalign_calls, memalign_storage,
						cmemalign_calls, cmemalign_storage,
						realloc_calls, realloc_storage,
						free_calls, free_storage,
						mmap_calls, mmap_storage,
						munmap_calls, munmap_storage,
						sbrk_calls, sbrk_storage
		);
    return write( fileno( stream ), helpText, len );	// -1 => error
} // printStatsXML
#endif // __STATISTICS__


static inline void noMemory() {
    abort( "Heap memory exhausted at %zu bytes.\n"
			"Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
			((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
} // noMemory


static inline void checkAlign( size_t alignment ) {
    if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) {
		abort( "Alignment %zu for memory allocation is less than sizeof(void *) and/or not a power of 2.", alignment );
    } // if
} // checkAlign


static inline _Bool setHeapExpand( size_t value ) {
    if ( heapExpand < pageSize ) return true;
    heapExpand = value;
    return false;
} // setHeapExpand


static inline void checkHeader( _Bool check, const char * name, void * addr ) {
    if ( unlikely( check ) ) {							// bad address ?
		abort( "Attempt to %s storage %p with address outside the heap.\n"
				"Possible cause is duplicate free on same block or overwriting of memory.",
				name, addr );
    } // if
} // checkHeader


static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & size, size_t & alignment ) {
    if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
		size_t offset = header->kind.fake.offset;
		alignment = header->kind.fake.alignment & -2;	// remove flag from value
		#ifdef __CFA_DEBUG__
		checkAlign( alignment );						// check alignment
		#endif // __CFA_DEBUG__
		header = (HeapManager.Storage.Header *)((char *)header - offset);
    } // if
} // fakeHeader


#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))

static inline _Bool headers( const char * name, void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
    header = headerAddr( addr );

    if ( unlikely( heapEnd < addr ) ) {					// mmapped ?
		fakeHeader( header, size, alignment );
		size = header->kind.real.blockSize & -3;		// mmap size
		return true;
    } // if

	#ifdef __CFA_DEBUG__
    checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
	#endif // __CFA_DEBUG__
    // header may be safe to dereference
    fakeHeader( header, size, alignment );
	#ifdef __CFA_DEBUG__
    checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
	#endif // __CFA_DEBUG__

    freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
	#ifdef __CFA_DEBUG__
    if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
		abort( "Attempt to %s storage %p with corrupted header.\n"
			   "Possible cause is duplicate free on same block or overwriting of header information.",
			   name, addr );
    } // if
	#endif // __CFA_DEBUG__
    size = freeElem->blockSize;
    return false;
} // headers


static inline void * extend( size_t size ) with ( heapManager ) {
    lock( extlock __cfaabi_dbg_ctx2 );
    ptrdiff_t rem = heapRemaining - size;
    if ( rem < 0 ) {
		// If the size requested is bigger than the current remaining storage, increase the size of the heap.

		size_t increase = libCeiling( size > heapExpand ? size : heapExpand, libAlign() );
		if ( sbrk( increase ) == (void *)-1 ) {
			unlock( extlock );
			errno = ENOMEM;
			return 0;
		} // if
#ifdef __STATISTICS__
		sbrk_calls += 1;
		sbrk_storage += increase;
#endif // __STATISTICS__
#ifdef __CFA_DEBUG__
		// Set new memory to garbage so subsequent uninitialized usages might fail.
		memset( (char *)heapEnd + heapRemaining, '\377', increase );
#endif // __CFA_DEBUG__
		rem = heapRemaining + increase - size;
    } // if

    HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
    heapRemaining = rem;
    heapEnd = (char *)heapEnd + size;
    unlock( extlock );
    return block;
} // extend


static inline void * doMalloc( size_t size ) with ( heapManager ) {
    HeapManager.Storage * block;

    // Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
    // along with the block and is a multiple of the alignment size.

    size_t tsize = size + sizeof(HeapManager.Storage);
    if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
		HeapManager.FreeHeader * freeElem =
			#ifdef FASTLOOKUP
			tsize < LookupSizes ? &freeLists[lookup[tsize]] :
			#endif // FASTLOOKUP
			bsearchl( tsize, freeLists, (size_t)maxBucketsUsed ); // binary search
		assert( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
		assert( tsize <= freeElem->blockSize );			// search failure ?
		tsize = freeElem->blockSize;					// total space needed for request

		// Spin until the lock is acquired for this particular size of block.

		#if defined( SPINLOCK )
		lock( freeElem->lock __cfaabi_dbg_ctx2 );
		block = freeElem->freeList;						// remove node from stack
		#else
		block = freeElem->freeList.pop();
		#endif // SPINLOCK
		if ( unlikely( block == 0 ) ) {					// no free block ?
			#if defined( SPINLOCK )
			unlock( freeElem->lock );
			#endif // SPINLOCK
			// Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
			// and then carve it off.

			block = (HeapManager.Storage *)extend( tsize );	// mutual exclusion on call
			if ( unlikely( block == 0 ) ) return 0;
			#if defined( SPINLOCK )
		} else {
			freeElem->freeList = block->header.kind.real.next;
			unlock( freeElem->lock );
			#endif // SPINLOCK
		} // if

		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
    } else {											// large size => mmap
		tsize = libCeiling( tsize, pageSize );			// must be multiple of page size
		#ifdef __STATISTICS__
		__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
		__atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__
		block = (HeapManager.Storage *)mmap( 0, tsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
		if ( block == (HeapManager.Storage *)MAP_FAILED ) {
			// Do not call strerror( errno ) as it may call malloc.
			abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu error:%d.", &heapManager, tsize, errno );
		} // if
#ifdef __CFA_DEBUG__
		// Set new memory to garbage so subsequent uninitialized usages might fail.
		memset( block, '\377', tsize );
#endif // __CFA_DEBUG__
		block->header.kind.real.blockSize = tsize;		// storage size for munmap
    } // if

    void * area = &(block->data);						// adjust off header to user bytes

	#ifdef __CFA_DEBUG__
    assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ?
    __atomic_add_fetch( &allocfree, tsize, __ATOMIC_SEQ_CST );
	if ( traceHeap() ) {
		enum { BufferSize = 64 };
		char helpText[BufferSize];
		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
		// int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
		__cfaabi_dbg_bits_write( helpText, len );
	} // if
	#endif // __CFA_DEBUG__

    return area;
} // doMalloc


static inline void doFree( void * addr ) with ( heapManager ) {
	#ifdef __CFA_DEBUG__
    if ( unlikely( heapManager.heapBegin == 0 ) ) {
		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
    } // if
	#endif // __CFA_DEBUG__

    HeapManager.Storage.Header * header;
    HeapManager.FreeHeader * freeElem;
    size_t size, alignment;								// not used (see realloc)

    if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
		#ifdef __STATISTICS__
		__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
		__atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__
		if ( munmap( header, size ) == -1 ) {
			#ifdef __CFA_DEBUG__
			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
					"Possible cause is invalid pointer.",
					addr );
			#endif // __CFA_DEBUG__
		} // if
    } else {
		#ifdef __CFA_DEBUG__
		// Set free memory to garbage so subsequent usages might fail.
		memset( ((HeapManager.Storage *)header)->data, '\377', freeElem->blockSize - sizeof( HeapManager.Storage ) );
		#endif // __CFA_DEBUG__

		#ifdef __STATISTICS__
		free_storage += size;
		#endif // __STATISTICS__
		#if defined( SPINLOCK )
		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
		header->kind.real.next = freeElem->freeList;	// push on stack
		freeElem->freeList = (HeapManager.Storage *)header;
		unlock( freeElem->lock );						// release spin lock
		#else
		freeElem->freeList.push( *(HeapManager.Storage *)header );
		#endif // SPINLOCK
    } // if

	#ifdef __CFA_DEBUG__
    __atomic_add_fetch( &allocfree, -size, __ATOMIC_SEQ_CST );
    if ( traceHeap() ) {
		enum { BufferSize = 64 };
		char helpText[BufferSize];
		int len = snprintf( helpText, BufferSize, "Free( %p ) size:%zu\n", addr, size );
		__cfaabi_dbg_bits_write( helpText, len );
    } // if
	#endif // __CFA_DEBUG__
} // doFree


size_t checkFree( HeapManager & manager, _Bool prt ) with ( manager ) {
    size_t total = 0;
	#ifdef __STATISTICS__
    __cfaabi_dbg_bits_acquire();
    if ( prt ) __cfaabi_dbg_bits_print_nolock( "\nBin lists (bin size : free blocks on list)\n" );
	#endif // __STATISTICS__
    for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
		size_t size = freeLists[i].blockSize;
		#ifdef __STATISTICS__
		unsigned int N = 0;
		#endif // __STATISTICS__
		#if defined( SPINLOCK )
		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) {
		#else
		for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0; p = p->header.kind.real.next.top ) {
		#endif // SPINLOCK
			total += size;
			#ifdef __STATISTICS__
			N += 1;
			#endif // __STATISTICS__
	    } // for
		#ifdef __STATISTICS__
	    if ( prt ) __cfaabi_dbg_bits_print_nolock( "%7zu, %-7u  ", size, N );
	    if ( (i + 1) % 8 == 0 ) __cfaabi_dbg_bits_print_nolock( "\n" );
		#endif // __STATISTICS__
	} // for
	#ifdef __STATISTICS__
	if ( prt ) __cfaabi_dbg_bits_print_nolock( "\ntotal free blocks:%zu\n", total );
	__cfaabi_dbg_bits_release();
	#endif // __STATISTICS__
	return (char *)heapEnd - (char *)heapBegin - total;
} // checkFree


static inline void * malloc2( size_t size ) {			// necessary for malloc statistics
	assert( heapManager.heapBegin != 0 );
    void * area = doMalloc( size );
    if ( unlikely( area == 0 ) ) errno = ENOMEM;		// POSIX
    return area;
} // malloc2


static inline void * memalign2( size_t alignment, size_t size ) { // necessary for malloc statistics
#ifdef __CFA_DEBUG__
    checkAlign( alignment );							// check alignment
#endif // __CFA_DEBUG__

    // if alignment <= default alignment, do normal malloc as two headers are unnecessary
    if ( unlikely( alignment <= libAlign() ) ) return malloc2( size );

    // Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
    // administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
    //      .-------------v-----------------v----------------v----------,
    //      | Real Header | ... padding ... |   Fake Header  | data ... |
    //      `-------------^-----------------^-+--------------^----------'
    //      |<--------------------------------' offset/align |<-- alignment boundary

    // subtract libAlign() because it is already the minimum alignment
    // add sizeof(Storage) for fake header
    char * area = (char *)doMalloc( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
    if ( unlikely( area == 0 ) ) return area;

    // address in the block of the "next" alignment address
    char * user = (char *)libCeiling( (uintptr_t)(area + sizeof(HeapManager.Storage)), alignment );

    // address of header from malloc
    HeapManager.Storage.Header * realHeader = headerAddr( area );
    // address of fake header * before* the alignment location
    HeapManager.Storage.Header * fakeHeader = headerAddr( user );
    // SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
    fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
    // SKULLDUGGERY: odd alignment imples fake header
    fakeHeader->kind.fake.alignment = alignment | 1;

    return user;
} // memalign2


extern "C" {
    void * malloc( size_t size ) {
		#ifdef __STATISTICS__
		__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
		__atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		return malloc2( size );
    } // malloc


    void * calloc( size_t noOfElems, size_t elemSize ) {
		size_t size = noOfElems * elemSize;
		#ifdef __STATISTICS__
		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
		__atomic_add_fetch( &calloc_storage, size, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		char * area = (char *)malloc2( size );
		if ( unlikely( area == 0 ) ) return 0;
		HeapManager.Storage.Header * header;
		HeapManager.FreeHeader * freeElem;
		size_t asize, alignment;
		_Bool mapped __attribute__(( unused )) = headers( "calloc", area, header, freeElem, asize, alignment );
		#ifndef __CFA_DEBUG__
		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 
		if ( ! mapped )
		#endif // __CFA_DEBUG__
			memset( area, '\0', asize - sizeof(HeapManager.Storage) ); // set to zeros
		header->kind.real.blockSize |= 2;		// mark as zero filled
		return area;
    } // calloc


    void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
		size_t size = noOfElems * elemSize;
		#ifdef __STATISTICS__
		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
		__atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		char * area = (char *)memalign2( alignment, size );
		if ( unlikely( area == 0 ) ) return 0;
		HeapManager.Storage.Header * header;
		HeapManager.FreeHeader * freeElem;
		size_t asize;
		_Bool mapped __attribute__(( unused )) = headers( "cmemalign", area, header, freeElem, asize, alignment );
		#ifndef __CFA_DEBUG__
		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 
		if ( ! mapped )
		#endif // __CFA_DEBUG__
			memset( area, '\0', asize - ( (char *)area - (char *)header ) ); // set to zeros
		header->kind.real.blockSize |= 2;				// mark as zero filled

		return area;
    } // cmemalign


    void * realloc( void * addr, size_t size ) {
		#ifdef __STATISTICS__
		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		if ( unlikely( addr == 0 ) ) return malloc2( size ); // special cases
		if ( unlikely( size == 0 ) ) { free( addr ); return 0; }

		HeapManager.Storage.Header * header;
		HeapManager.FreeHeader * freeElem;
		size_t asize, alignment = 0;
		headers( "realloc", addr, header, freeElem, asize, alignment );

		size_t usize = asize - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
		if ( usize >= size ) {							// already sufficient storage
			// This case does not result in a new profiler entry because the previous one still exists and it must match with
			// the free for this memory.  Hence, this realloc does not appear in the profiler output.
			return addr;
		} // if

		#ifdef __STATISTICS__
		__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		void * area;
		if ( unlikely( alignment != 0 ) ) {				// previous request memalign?
			area = memalign( alignment, size );			// create new area
		} else {
			area = malloc2( size );	// create new area
		} // if
		if ( unlikely( area == 0 ) ) return 0;
		if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill (calloc/cmemalign) ?
			assert( (header->kind.real.blockSize & 1) == 0 );
			_Bool mapped __attribute__(( unused )) = headers( "realloc", area, header, freeElem, asize, alignment );
			#ifndef __CFA_DEBUG__
			// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero. 
			if ( ! mapped )
			#endif // __CFA_DEBUG__
				memset( (char *)area + usize, '\0', asize - ( (char *)area - (char *)header ) - usize ); // zero-fill back part
			header->kind.real.blockSize |= 2;			// mark new request as zero fill
		} // if
		memcpy( area, addr, usize );					// copy bytes
		free( addr );
		return area;
    } // realloc


    void * memalign( size_t alignment, size_t size ) {
		#ifdef __STATISTICS__
		__atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST );
		__atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		void * area = memalign2( alignment, size );

		return area;
    } // memalign


    void * aligned_alloc( size_t alignment, size_t size ) {
		return memalign( alignment, size );
    } // aligned_alloc


    int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
		if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
		* memptr = memalign( alignment, size );
		if ( unlikely( * memptr == 0 ) ) return ENOMEM;
		return 0;
    } // posix_memalign


    void * valloc( size_t size ) {
		return memalign( pageSize, size );
    } // valloc


    void free( void * addr ) {
		#ifdef __STATISTICS__
		__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
		#endif // __STATISTICS__

		if ( unlikely( addr == 0 ) ) {					// special case
			#ifdef __CFA_DEBUG__
			if ( traceHeap() ) {
				#define nullmsg "Free( 0x0 ) size:0\n"
				// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
				__cfaabi_dbg_bits_write( nullmsg, sizeof(nullmsg) - 1 );
			} // if
			#endif // __CFA_DEBUG__
			return;
		} // exit

		doFree( addr );
    } // free

    int mallopt( int option, int value ) {
		choose( option ) {
		  case M_TOP_PAD:
			if ( setHeapExpand( value ) ) fallthru default;
		  case M_MMAP_THRESHOLD:
			if ( setMmapStart( value ) ) fallthru default;
		  default:
			return 1;									// success, or unsupported
		} // switch
		return 0;										// error
    } // mallopt


	int malloc_trim( size_t ) {
		return 0;										// => impossible to release memory
	} // malloc_trim

    size_t malloc_usable_size( void * addr ) {
		if ( unlikely( addr == 0 ) ) return 0;			// null allocation has 0 size
		HeapManager.Storage.Header * header;
		HeapManager.FreeHeader * freeElem;
		size_t size, alignment;

		headers( "malloc_usable_size", addr, header, freeElem, size, alignment );
		size_t usize = size - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
		return usize;
    } // malloc_usable_size


    size_t malloc_alignment( void * addr ) {
		if ( unlikely( addr == 0 ) ) return libAlign();	// minimum alignment
		HeapManager.Storage.Header * header = (HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) );
		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
			return header->kind.fake.alignment & -2;	// remove flag from value
		} else {
			return libAlign ();							// minimum alignment
		} // if
    } // malloc_alignment


    _Bool malloc_zero_fill( void * addr ) {
		if ( unlikely( addr == 0 ) ) return false;		// null allocation is not zero fill
		HeapManager.Storage.Header * header = (HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) );
		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
			header = (HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset);
		} // if
		return (header->kind.real.blockSize & 2) != 0;	// zero filled (calloc/cmemalign) ?
    } // malloc_zero_fill


    void malloc_stats( void ) {
		#ifdef __STATISTICS__
		printStats();
		checkFree( heapManager, true );
		#endif // __STATISTICS__
    } // malloc_stats


    int malloc_stats_fd( int fd ) {
		#ifdef __STATISTICS__
		int temp = statfd;
		statfd = fd;
		return temp;
		#else
		return -1;
		#endif // __STATISTICS__
    } // malloc_stats_fd


	int malloc_info( int options, FILE * stream ) {
		return printStatsXML( stream );
	} // malloc_info


	void * malloc_get_state( void ) {
		return 0;
	} // malloc_get_state


	int malloc_set_state( void * ptr ) {
		return 0;
	} // malloc_set_state
} // extern "C"


// Local Variables: //
// tab-width: 4 //
// compile-command: "cfa -nodebug -O2 heap.c" //
// End: //
