Index: src/libcfa/heap.c
===================================================================
--- src/libcfa/heap.c	(revision 891790ef574169e94ce79c6bee7f17e97e98b8b6)
+++ src/libcfa/heap.c	(revision d46ed6e6d373f0d84c7cf499d51da48c2bb6685b)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jul 24 08:57:22 2018
-// Update Count     : 388
+// Last Modified On : Wed Jul 25 16:42:02 2018
+// Update Count     : 438
 // 
 
@@ -25,4 +25,5 @@
 #include "bits/defs.h"									// likely, unlikely
 #include "bits/locks.h"									// __spinlock_t
+#include "startup.h"									// STARTUP_PRIORITY_MEMORY
 #include "stdlib"										// bsearchl
 #include "malloc.h"
@@ -74,82 +75,41 @@
 
 
-struct HeapManager {
-//	struct FreeHeader;									// forward declaration
-
-	struct Storage {
-	    struct Header {									// header
-			union Kind {
-				struct RealHeader {
-					union {
-						struct {						// 32-bit word => 64-bit header, 64-bit word => 128-bit header
-							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32
-							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
-							#endif // __U_WORDSIZE__ == 32 && __U_WORDSIZE__ == 32
-
-							union {
-//								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
-								void * home;			// allocated block points back to home locations (must overlay alignment)
-								size_t blockSize;		// size for munmap (must overlay alignment)
-								#if BUCKLOCK == SPINLOCK
-								Storage * next;			// freed block points next freed block of same size
-								#endif // SPINLOCK
-							};
-
-							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32
-							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
-							#endif // __U_WORDSIZE__ == 32 && __U_WORDSIZE__ == 32
-
-						};
-						#if BUCKLOCK == LOCKFREE
-						Stack<Storage>::Link next;		// freed block points next freed block of same size (double-wide)
-						#endif // LOCKFREE
-					};
-				} real;
-				struct FakeHeader {
-					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
-					#endif // __BYTE_ORDER__
-
-					uint32_t offset;
-
-					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
-					#endif // __BYTE_ORDER__
-				} fake;
-			} kind;
-	    } header; // Header
-	    char pad[ALIGN - sizeof( Header )];
-	    char data[0];									// storage
-	}; // Storage
-
-	static_assert( ALIGN >= sizeof( Storage ), "ALIGN < sizeof( Storage )" );
-
-	struct FreeHeader {
-		#if BUCKLOCK == SPINLOCK
-	    __spinlock_t lock;								// must be first field for alignment
-	    Storage * freeList;
-		#elif BUCKLOCK == LOCKFREE
-	    StackLF<Storage> freeList;
-		#else
-			#error undefined lock type for bucket lock
-		#endif // SPINLOCK
-	    size_t blockSize;								// size of allocations on this list
-	}; // FreeHeader
-
-	// must be first fields for alignment
-	__spinlock_t extlock;								// protects allocation-buffer extension
-	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
-
-	void * heapBegin;									// start of heap
-	void * heapEnd;										// logical end of heap
-	size_t heapRemaining;								// amount of storage not allocated in the current chunk
-}; // HeapManager
-
-#ifdef __CFA_DEBUG__
-static _Bool heapBoot = 0;								// detect recursion during boot
-#endif // __CFA_DEBUG__
-static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
-
-static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
+static _Bool traceHeap = false;
+
+inline _Bool traceHeap() {
+	return traceHeap;
+} // traceHeap
+
+_Bool traceHeapOn() {
+	_Bool temp = traceHeap;
+	traceHeap = true;
+	return temp;
+} // traceHeapOn
+
+_Bool traceHeapOff() {
+	_Bool temp = traceHeap;
+	traceHeap = false;
+	return temp;
+} // traceHeapOff
+
+
+// static _Bool prtHeapTerm = false;
+
+// inline _Bool prtHeapTerm() {
+// 	return prtHeapTerm;
+// } // prtHeapTerm
+
+// _Bool prtHeapTermOn() {
+// 	_Bool temp = traceHeap;
+// 	traceHeap = true;
+// 	return temp;
+// } // prtHeapTermOn
+
+// _Bool prtHeapTermOff() {
+// 	_Bool temp = traceHeap;
+// 	traceHeap = false;
+// 	return temp;
+// } // prtHeapTermOff
+
 
 // statically allocated variables => zero filled.
@@ -178,12 +138,184 @@
 #endif // FASTLOOKUP
 static int mmapFd = -1;									// fake or actual fd for anonymous file
+
+static unsigned int allocfree;							// running total of allocations minus frees
+static unsigned int appStart;							// storage allocation when application starts
+
+static void checkUnfreed() {
+	#ifdef __CFA_DEBUG__
+	unsigned int total = allocfree - appStart;
+    if ( total != 0 ) {
+		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+		// char helpText[512];
+		// int len = snprintf( helpText, 512, "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
+		// 					"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+		// 					(long int)getpid(), total, total ); // always print the UNIX pid
+		// __cfaabi_dbg_bits_write( helpText, len );
+    } // if
+	#endif // __CFA_DEBUG__
+} // checkUnfreed
+
 #ifdef __CFA_DEBUG__
-static unsigned int allocfree;							// running total of allocations minus frees
+extern "C" {
+void heapAppStart() {									// called by __cfaabi_appready_startup
+	appStart = allocfree;
+} // heapAppStart
+
+void heapAppStop() {									// called by __cfaabi_appready_startdown
+	checkUnfreed();
+} // heapAppStop
+} // extern "C"
 #endif // __CFA_DEBUG__
 
 
+struct HeapManager {
+//	struct FreeHeader;									// forward declaration
+
+	struct Storage {
+	    struct Header {									// header
+			union Kind {
+				struct RealHeader {
+					union {
+						struct {						// 32-bit word => 64-bit header, 64-bit word => 128-bit header
+							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32
+							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __U_WORDSIZE__ == 32 && __U_WORDSIZE__ == 32
+
+							union {
+//								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
+								void * home;			// allocated block points back to home locations (must overlay alignment)
+								size_t blockSize;		// size for munmap (must overlay alignment)
+								#if BUCKLOCK == SPINLOCK
+								Storage * next;			// freed block points next freed block of same size
+								#endif // SPINLOCK
+							};
+
+							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __U_WORDSIZE__ == 32
+							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __U_WORDSIZE__ == 32 && __U_WORDSIZE__ == 32
+
+						};
+						#if BUCKLOCK == LOCKFREE
+						Stack<Storage>::Link next;		// freed block points next freed block of same size (double-wide)
+						#endif // LOCKFREE
+					};
+				} real;
+				struct FakeHeader {
+					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
+					#endif // __BYTE_ORDER__
+
+					uint32_t offset;
+
+					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
+					#endif // __BYTE_ORDER__
+				} fake;
+			} kind;
+	    } header; // Header
+	    char pad[ALIGN - sizeof( Header )];
+	    char data[0];									// storage
+	}; // Storage
+
+	static_assert( ALIGN >= sizeof( Storage ), "ALIGN < sizeof( Storage )" );
+
+	struct FreeHeader {
+		#if BUCKLOCK == SPINLOCK
+	    __spinlock_t lock;								// must be first field for alignment
+	    Storage * freeList;
+		#elif BUCKLOCK == LOCKFREE
+	    StackLF<Storage> freeList;
+		#else
+			#error undefined lock type for bucket lock
+		#endif // SPINLOCK
+	    size_t blockSize;								// size of allocations on this list
+	}; // FreeHeader
+
+	// must be first fields for alignment
+	__spinlock_t extlock;								// protects allocation-buffer extension
+	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
+
+	void * heapBegin;									// start of heap
+	void * heapEnd;										// logical end of heap
+	size_t heapRemaining;								// amount of storage not allocated in the current chunk
+}; // HeapManager
+
+
+static inline _Bool setMmapStart( size_t value ) {
+    if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
+    mmapStart = value;									// set global
+
+    // find the closest bucket size less than or equal to the mmapStart size
+    maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
+    assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+    assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+    return false;
+} // setMmapStart
+
+
+static void ?{}( HeapManager & manager ) with ( manager ) {
+    pageSize = sysconf( _SC_PAGESIZE );
+    
+    for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
+		freeLists[i].blockSize = bucketSizes[i];
+    } // for
+
+	#ifdef FASTLOOKUP
+    unsigned int idx = 0;
+    for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
+		if ( i > bucketSizes[idx] ) idx += 1;
+		lookup[i] = idx;
+    } // for
+	#endif // FASTLOOKUP
+
+    if ( setMmapStart( default_mmap_start() ) ) {
+		abort( "HeapManager : internal error, mmap start initialization failure." );
+    } // if
+    heapExpand = default_heap_expansion();
+
+    char * End = (char *)sbrk( 0 );
+    sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
+    heapBegin = heapEnd = sbrk( 0 );					// get new start point
+} // HeapManager
+
+
+static void ^?{}( HeapManager & ) {
+	#ifdef __STATISTICS__
+	// if ( prtHeapTerm() ) {
+	// 	printStats();
+	// 	checkFree( heapManager, true );
+	// } // if
+	#endif // __STATISTICS__
+} // ~HeapManager
+
+
+#ifdef __CFA_DEBUG__
+static _Bool heapBoot = 0;								// detect recursion during boot
+#endif // __CFA_DEBUG__
+static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
+
+static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
+void memory_startup( void ) {
+	#ifdef __CFA_DEBUG__
+	if ( unlikely( heapBoot ) ) {					// check for recursion during system boot
+		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+		abort( "boot() : internal error, recursively invoked during system boot." );
+	} // if
+	heapBoot = true;
+	#endif // __CFA_DEBUG__
+
+	heapManager{};
+} // memory_startup
+
+static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
+void memory_shutdown( void ) {
+	^heapManager{};
+} // memory_shutdown
+
+static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
+
+
 #ifdef __STATISTICS__
-// Heap statistics
-static unsigned long long int mmap_storage;
+static unsigned long long int mmap_storage;				// heap statistics counters
 static unsigned int mmap_calls;
 static unsigned long long int munmap_storage;
@@ -203,9 +335,10 @@
 static unsigned long long int realloc_storage;
 static unsigned int realloc_calls;
-static int statfd;
+
+static int statfd;										// statistics file descriptor (changed by malloc_stats_fd)
 
 
 // Use "write" because streams may be shutdown when calls are made.
-static void print() {
+static void printStats() {
     char helpText[512];
     int len = snprintf( helpText, 512,
@@ -231,8 +364,8 @@
 		);
     write( statfd, helpText, len );
-} // print
-
-
-static int printXML( FILE * stream ) {
+} // printStats
+
+
+static int printStatsXML( FILE * stream ) {
     char helpText[512];
     int len = snprintf( helpText, 512,
@@ -262,5 +395,5 @@
 		);
     return write( fileno( stream ), helpText, len );	// -1 => error
-} // printXML
+} // printStatsXML
 #endif // __STATISTICS__
 
@@ -287,16 +420,4 @@
 
 
-static inline _Bool setMmapStart( size_t value ) {
-    if ( value < pageSize || bucketSizes[NoBucketSizes-1] < value ) return true;
-    mmapStart = value;									// set global
-
-    // find the closest bucket size less than or equal to the mmapStart size
-    maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
-    assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
-    assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
-    return false;
-} // setMmapStart
-
-
 static inline void checkHeader( _Bool check, const char * name, void * addr ) {
     if ( unlikely( check ) ) {							// bad address ?
@@ -306,4 +427,5 @@
     } // if
 } // checkHeader
+
 
 static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & size, size_t & alignment ) {
@@ -448,11 +570,11 @@
     assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ?
     __atomic_add_fetch( &allocfree, tsize, __ATOMIC_SEQ_CST );
-//    if ( uHeapControl::traceHeap() ) {
-//		enum { BufferSize = 64 };
-//		char helpText[BufferSize];
-//		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
-		//int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
-//		uDebugWrite( STDERR_FILENO, helpText, len );
-//    } // if
+	if ( traceHeap() ) {
+		enum { BufferSize = 64 };
+		char helpText[BufferSize];
+		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
+		// int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
+		__cfaabi_dbg_bits_write( helpText, len );
+	} // if
 	#endif // __CFA_DEBUG__
 
@@ -505,109 +627,50 @@
 	#ifdef __CFA_DEBUG__
     __atomic_add_fetch( &allocfree, -size, __ATOMIC_SEQ_CST );
-    // if ( uHeapControl::traceHeap() ) {
-	// 	enum { BufferSize = 64 };
-	// 	char helpText[BufferSize];
-	// 	int len = snprintf( helpText, BufferSize, "Free( %p ) size:%zu\n", addr, size );
-	// 	uDebugWrite( STDERR_FILENO, helpText, len );
-    // } // if
+    if ( traceHeap() ) {
+		enum { BufferSize = 64 };
+		char helpText[BufferSize];
+		int len = snprintf( helpText, BufferSize, "Free( %p ) size:%zu\n", addr, size );
+		__cfaabi_dbg_bits_write( helpText, len );
+    } // if
 	#endif // __CFA_DEBUG__
 } // doFree
 
 
-// size_t checkFree( _Bool prt ) {
-//     size_t total = 0;
-// #ifdef __STATISTICS__
-//     uDebugAcquire();
-//     if ( prt ) uDebugPrt2( "\nBin lists (bin size : free blocks on list)\n" );
-// #endif // __STATISTICS__
-//     for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
-// 	size_t size = freeLists[i].blockSize;
-// #ifdef __STATISTICS__
-// 	unsigned int N = 0;
-// #endif // __STATISTICS__
-// #if defined( SPINLOCK )
-// 	for ( Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) {
-// #else
-// 	    for ( Storage * p = freeLists[i].freeList.top(); p != 0; p = p->header.kind.real.next.top ) {
-// #endif // SPINLOCK
-// 		total += size;
-// #ifdef __STATISTICS__
-// 		N += 1;
-// #endif // __STATISTICS__
-// 	    } // for
-// #ifdef __STATISTICS__
-// 	    if ( prt ) uDebugPrt2( "%7zu, %-7u  ", size, N );
-// 	    if ( (i + 1) % 8 == 0 ) uDebugPrt2( "\n" );
-// #endif // __STATISTICS__
-// 	} // for
-// #ifdef __STATISTICS__
-// 	if ( prt ) uDebugPrt2( "\ntotal free blocks:%zu\n", total );
-// 	uDebugRelease();
-// #endif // __STATISTICS__
-// 	return (char *)heapEnd - (char *)heapBegin - total;
-//     } // for
-// } // checkFree
-
-
-static void ?{}( HeapManager & manager ) with ( manager ) {
-    pageSize = sysconf( _SC_PAGESIZE );
-    
-    for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
-		freeLists[i].blockSize = bucketSizes[i];
-    } // for
-
-	#ifdef FASTLOOKUP
-    unsigned int idx = 0;
-    for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
-		if ( i > bucketSizes[idx] ) idx += 1;
-		lookup[i] = idx;
-    } // for
-	#endif // FASTLOOKUP
-
-    if ( setMmapStart( default_mmap_start() ) ) {
-		abort( "HeapManager : internal error, mmap start initialization failure." );
-    } // if
-    heapExpand = default_heap_expansion();
-
-    char * End = (char *)sbrk( 0 );
-    sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
-    heapBegin = heapEnd = sbrk( 0 );					// get new start point
-} // HeapManager
-
-
-static void ^?{}( HeapManager & ) {
+size_t checkFree( HeapManager & manager, _Bool prt ) with ( manager ) {
+    size_t total = 0;
 	#ifdef __STATISTICS__
-	// if ( prtHeapterm ) {
-	// 	print();
-	// 	heapManager.checkFree( true );
-	// } // if
+    __cfaabi_dbg_bits_acquire();
+    if ( prt ) __cfaabi_dbg_bits_print_nolock( "\nBin lists (bin size : free blocks on list)\n" );
 	#endif // __STATISTICS__
-
-	#ifdef __CFA_DEBUG__
-    if ( allocfree != 0 ) {
-		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-		char helpText[512];
-		int len = snprintf( helpText, 512, "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
-							"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
-							(long int)getpid(), allocfree, allocfree ); // always print the UNIX pid
-		__cfaabi_dbg_bits_write( helpText, len );
-    } // if
-	#endif // __CFA_DEBUG__
-} // ~HeapManager
+    for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
+		size_t size = freeLists[i].blockSize;
+		#ifdef __STATISTICS__
+		unsigned int N = 0;
+		#endif // __STATISTICS__
+		#if defined( SPINLOCK )
+		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) {
+		#else
+		for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0; p = p->header.kind.real.next.top ) {
+		#endif // SPINLOCK
+			total += size;
+			#ifdef __STATISTICS__
+			N += 1;
+			#endif // __STATISTICS__
+	    } // for
+		#ifdef __STATISTICS__
+	    if ( prt ) __cfaabi_dbg_bits_print_nolock( "%7zu, %-7u  ", size, N );
+	    if ( (i + 1) % 8 == 0 ) __cfaabi_dbg_bits_print_nolock( "\n" );
+		#endif // __STATISTICS__
+	} // for
+	#ifdef __STATISTICS__
+	if ( prt ) __cfaabi_dbg_bits_print_nolock( "\ntotal free blocks:%zu\n", total );
+	__cfaabi_dbg_bits_release();
+	#endif // __STATISTICS__
+	return (char *)heapEnd - (char *)heapBegin - total;
+} // checkFree
 
 
 static inline void * malloc2( size_t size ) {			// necessary for malloc statistics
-    if ( unlikely( heapManager.heapBegin == 0 ) ) {
-		#ifdef __CFA_DEBUG__
-		if ( unlikely( heapBoot ) ) {					// check for recursion during system boot
-			// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-			abort( "boot() : internal error, recursively invoked during system boot." );
-		} // if
-		heapBoot = true;
-		#endif // __CFA_DEBUG__
-
-		heapManager{};
-	} // if
-
+    assert( heapManager.heapBegin != 0 ) ;				// heap started
     void * area = doMalloc( size );
     if ( unlikely( area == 0 ) ) errno = ENOMEM;		// POSIX
@@ -793,16 +856,15 @@
 
 		if ( unlikely( addr == 0 ) ) {					// special case
-// #ifdef __CFA_DEBUG__
-// 	    if ( uHeapControl::traceHeap() ) {
-// #		define nullmsg "Free( 0x0 ) size:0\n"
-// 		// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
-// 		uDebugWrite( STDERR_FILENO, nullmsg, sizeof(nullmsg) - 1 );
-// 	    } // if
-// #endif // __CFA_DEBUG__
+			#ifdef __CFA_DEBUG__
+			if ( traceHeap() ) {
+				#define nullmsg "Free( 0x0 ) size:0\n"
+				// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
+				__cfaabi_dbg_bits_write( nullmsg, sizeof(nullmsg) - 1 );
+			} // if
+			#endif // __CFA_DEBUG__
 			return;
 		} // exit
 
 		doFree( addr );
-		// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
     } // free
 
@@ -859,6 +921,6 @@
     void malloc_stats( void ) {
 		#ifdef __STATISTICS__
-		print();
-		// heapManager.checkFree( true );
+		printStats();
+		checkFree( heapManager, true );
 		#endif // __STATISTICS__
     } // malloc_stats
@@ -877,5 +939,5 @@
 
 	int malloc_info( int options, FILE * stream ) {
-		return printXML( stream );
+		return printStatsXML( stream );
 	} // malloc_info
 
Index: src/libcfa/stdhdr/malloc.h
===================================================================
--- src/libcfa/stdhdr/malloc.h	(revision 891790ef574169e94ce79c6bee7f17e97e98b8b6)
+++ src/libcfa/stdhdr/malloc.h	(revision d46ed6e6d373f0d84c7cf499d51da48c2bb6685b)
@@ -10,7 +10,17 @@
 // Created On       : Thu Jul 20 15:58:16 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Jul 20 16:00:12 2017
-// Update Count     : 4
+// Last Modified On : Mon Jul 23 18:20:32 2018
+// Update Count     : 8
 // 
+
+
+size_t default_mmap_start();							// CFA extras
+size_t default_heap_expansion();
+extern "C" {
+size_t malloc_alignment( void * );
+_Bool malloc_zero_fill( void * );
+int malloc_stats_fd( int fd );
+void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
+} // extern "C"
 
 extern "C" {
Index: src/libcfa/stdlib
===================================================================
--- src/libcfa/stdlib	(revision 891790ef574169e94ce79c6bee7f17e97e98b8b6)
+++ src/libcfa/stdlib	(revision d46ed6e6d373f0d84c7cf499d51da48c2bb6685b)
@@ -10,15 +10,16 @@
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Jul 12 08:03:58 2018
-// Update Count     : 337
+// Last Modified On : Mon Jul 23 07:44:47 2018
+// Update Count     : 341
 //
 
 #pragma once
 
-#include <stdlib.h>										// allocation, strto*, ato*
+#include <stdlib.h>										// *alloc, strto*, ato*
 extern "C" {
 	void * memalign( size_t align, size_t size );		// malloc.h
 	void * memset( void * dest, int c, size_t size );	// string.h
 	void * memcpy( void * dest, const void * src, size_t size ); // string.h
+    void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA
 } // extern "C"
 
@@ -36,5 +37,4 @@
 
 	T * malloc( void ) {
-		// printf( "* malloc\n" );
 		return (T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc
 	} // malloc
@@ -48,25 +48,20 @@
 
 	T * calloc( size_t dim ) {
-		//printf( "X2\n" );
 		return (T *)(void *)calloc( dim, sizeof(T) );	// C calloc
 	} // calloc
 
 	T * realloc( T * ptr, size_t size ) {
-		//printf( "X3\n" );
 		return (T *)(void *)realloc( (void *)ptr, size );
 	} // realloc
 
 	T * memalign( size_t align ) {
-		//printf( "X4\n" );
 		return (T *)memalign( align, sizeof(T) );
 	} // memalign
 
 	T * aligned_alloc( size_t align ) {
-		//printf( "X5\n" );
 		return (T *)aligned_alloc( align, sizeof(T) );
 	} // aligned_alloc
 
 	int posix_memalign( T ** ptr, size_t align ) {
-		//printf( "X6\n" );
 		return posix_memalign( (void **)ptr, align, sizeof(T) ); // C posix_memalign
 	} // posix_memalign
@@ -76,10 +71,8 @@
 
 	T * alloc( void ) {
-		//printf( "X7\n" );
 		return (T *)(void *)malloc( (size_t)sizeof(T) ); // C malloc
 	} // alloc
 
 	T * alloc( char fill ) {
-		//printf( "X8\n" );
 		T * ptr = (T *)(void *)malloc( (size_t)sizeof(T) );	// C malloc
 		return (T *)memset( ptr, (int)fill, sizeof(T) );	// initial with fill value
@@ -87,10 +80,8 @@
 
 	T * alloc( size_t dim ) {
-		//printf( "X9\n" );
 		return (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C malloc
 	} // alloc
 
 	T * alloc( size_t dim, char fill ) {
-		//printf( "X10\n" );
 		T * ptr = (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C malloc
 		return (T *)memset( ptr, (int)fill, dim * sizeof(T) );	  // initial with fill value
@@ -98,5 +89,4 @@
 
 	T * alloc( T ptr[], size_t dim ) {
-		//printf( "X11\n" );
 		return (T *)(void *)realloc( (void *)ptr, dim * (size_t)sizeof(T) ); // C realloc
 	} // alloc
@@ -109,10 +99,8 @@
 static inline forall( dtype T | sized(T) ) {
 	T * align_alloc( size_t align ) {
-		//printf( "X13\n" );
 		return (T *)memalign( align, sizeof(T) );
 	} // align_alloc
 
 	T * align_alloc( size_t align, char fill ) {
-		//printf( "X14\n" );
 		T * ptr = (T *)memalign( align, sizeof(T) );
 		return (T *)memset( ptr, (int)fill, sizeof(T) );
@@ -120,12 +108,16 @@
 
 	T * align_alloc( size_t align, size_t dim ) {
-		//printf( "X15\n" );
 		return (T *)memalign( align, dim * sizeof(T) );
 	} // align_alloc
 
 	T * align_alloc( size_t align, size_t dim, char fill ) {
-		//printf( "X16\n" );
-		T * ptr = (T *)memalign( align, dim * sizeof(T) );
-		return (T *)memset( ptr, (int)fill, dim * sizeof(T) );
+		T * ptr;
+		if ( fill == '\0' ) {
+			ptr = (T *)cmemalign( align, dim, sizeof(T) );
+		} else {
+			ptr = (T *)memalign( align, dim * sizeof(T) );
+			return (T *)memset( ptr, (int)fill, dim * sizeof(T) );
+		} // if
+		return ptr;
 	} // align_alloc
 } // distribution
@@ -136,10 +128,8 @@
 
 	T * memset( T * dest, char c ) {
-		//printf( "X17\n" );
 		return (T *)memset( dest, c, sizeof(T) );
 	} // memset
 
 	T * memcpy( T * dest, const T * src ) {
-		//printf( "X18\n" );
 		return (T *)memcpy( dest, src, sizeof(T) );
 	} // memcpy
@@ -150,10 +140,8 @@
 
 	T * memset( T dest[], size_t dim, char c ) {
-		//printf( "X19\n" );
 		return (T *)(void *)memset( dest, c, dim * sizeof(T) );	// C memset
 	} // memset
 
 	T * memcpy( T dest[], const T src[], size_t dim ) {
-		//printf( "X20\n" );
 		return (T *)(void *)memcpy( dest, src, dim * sizeof(T) ); // C memcpy
 	} // memcpy
