Index: libcfa/src/bits/locks.hfa
===================================================================
--- libcfa/src/bits/locks.hfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/bits/locks.hfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -13,6 +13,6 @@
 // Created On       : Tue Oct 31 15:14:38 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Sep 19 18:51:53 2022
-// Update Count     : 17
+// Last Modified On : Tue Sep 20 22:09:50 2022
+// Update Count     : 18
 //
 
@@ -64,5 +64,5 @@
 			#ifndef NOEXPBACK
 				// exponential spin
-			for ( volatile unsigned int s; 0 ~ spin ) Pause();
+				for ( volatile unsigned int s; 0 ~ spin ) Pause();
 
 				// slowly increase by powers of 2
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -276,5 +276,5 @@
 			// intented to be use by wait, wait_any, waitfor, etc. rather than used directly
 			bool retract( future_t & this, oneshot & wait_ctx ) {
-				struct oneshot * expected = this.ptr;
+				struct oneshot * expected = &wait_ctx;
 
 				// attempt to remove the context so it doesn't get consumed.
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -184,4 +184,7 @@
 
 
+extern void heapManagerCtor();
+extern void heapManagerDtor();
+
 //=============================================================================================
 // Kernel Setup logic
@@ -365,4 +368,6 @@
 	proc->local_data = &__cfaabi_tls;
 
+	heapManagerCtor();									// initialize heap
+
 	__cfa_io_start( proc );
 	register_tls( proc );
@@ -416,4 +421,6 @@
 	unregister_tls( proc );
 	__cfa_io_stop( proc );
+
+	heapManagerDtor();									// de-initialize heap
 
 	return 0p;
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/concurrency/preemption.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -232,42 +232,9 @@
 // available.
 
-//-----------------------------------------------------------------------------
-// Some assembly required
-#define __cfaasm_label(label, when) when: asm volatile goto(".global __cfaasm_" #label "_" #when "\n" "__cfaasm_" #label "_" #when ":":::"memory":when)
-
 //----------
 // special case for preemption since used often
-__attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_nopreempt libcfa_public {
-	// create a assembler label before
-	// marked as clobber all to avoid movement
-	__cfaasm_label(check, before);
-
+bool __preemption_enabled() libcfa_nopreempt libcfa_public {
 	// access tls as normal
-	bool enabled = __cfaabi_tls.preemption_state.enabled;
-
-	// Check if there is a pending preemption
-	processor   * proc = __cfaabi_tls.this_processor;
-	bool pending = proc ? proc->pending_preemption : false;
-	if( enabled && pending ) proc->pending_preemption = false;
-
-	// create a assembler label after
-	// marked as clobber all to avoid movement
-	__cfaasm_label(check, after);
-
-	// If we can preempt and there is a pending one
-	// this is a good time to yield
-	if( enabled && pending ) {
-		force_yield( __POLL_PREEMPTION );
-	}
-	return enabled;
-}
-
-struct asm_region {
-	void * before;
-	void * after;
-};
-
-static inline bool __cfaasm_in( void * ip, struct asm_region & region ) {
-	return ip >= region.before && ip <= region.after;
+	return __cfaabi_tls.preemption_state.enabled;
 }
 
@@ -293,15 +260,7 @@
 uintptr_t __cfatls_get( unsigned long int offset ) libcfa_nopreempt libcfa_public; //no inline to avoid problems
 uintptr_t __cfatls_get( unsigned long int offset ) {
-	// create a assembler label before
-	// marked as clobber all to avoid movement
-	__cfaasm_label(get, before);
-
 	// access tls as normal (except for pointer arithmetic)
 	uintptr_t val = *(uintptr_t*)((uintptr_t)&__cfaabi_tls + offset);
 
-	// create a assembler label after
-	// marked as clobber all to avoid movement
-	__cfaasm_label(get, after);
-
 	// This is used everywhere, to avoid cost, we DO NOT poll pending preemption
 	return val;
@@ -310,31 +269,20 @@
 extern "C" {
 	// Disable interrupts by incrementing the counter
-	void disable_interrupts() libcfa_nopreempt libcfa_public {
-		// create a assembler label before
-		// marked as clobber all to avoid movement
-		__cfaasm_label(dsable, before);
-
-		with( __cfaabi_tls.preemption_state ) {
-			#if GCC_VERSION > 50000
-			static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
-			#endif
-
-			// Set enabled flag to false
-			// should be atomic to avoid preemption in the middle of the operation.
-			// use memory order RELAXED since there is no inter-thread on this variable requirements
-			__atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
-
-			// Signal the compiler that a fence is needed but only for signal handlers
-			__atomic_signal_fence(__ATOMIC_ACQUIRE);
-
-			__attribute__((unused)) unsigned short new_val = disable_count + 1;
-			disable_count = new_val;
-			verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
-		}
-
-		// create a assembler label after
-		// marked as clobber all to avoid movement
-		__cfaasm_label(dsable, after);
-
+	void disable_interrupts() libcfa_nopreempt libcfa_public with( __cfaabi_tls.preemption_state ) {
+		#if GCC_VERSION > 50000
+		static_assert(__atomic_always_lock_free(sizeof(enabled), &enabled), "Must be lock-free");
+		#endif
+
+		// Set enabled flag to false
+		// should be atomic to avoid preemption in the middle of the operation.
+		// use memory order RELAXED since there is no inter-thread on this variable requirements
+		__atomic_store_n(&enabled, false, __ATOMIC_RELAXED);
+
+		// Signal the compiler that a fence is needed but only for signal handlers
+		__atomic_signal_fence(__ATOMIC_ACQUIRE);
+
+		__attribute__((unused)) unsigned short new_val = disable_count + 1;
+		disable_count = new_val;
+		verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
 	}
 
@@ -379,17 +327,15 @@
 	// i.e. on a real processor and not in the kernel
 	// (can return true even if no preemption was pending)
-	bool poll_interrupts() libcfa_public {
+	bool poll_interrupts() libcfa_nopreempt libcfa_public {
 		// Cache the processor now since interrupts can start happening after the atomic store
-		processor   * proc = publicTLS_get( this_processor );
+		processor   * proc =  __cfaabi_tls.this_processor;
 		if ( ! proc ) return false;
-		if ( ! __preemption_enabled() ) return false;
-
-		with( __cfaabi_tls.preemption_state ){
-			// Signal the compiler that a fence is needed but only for signal handlers
-			__atomic_signal_fence(__ATOMIC_RELEASE);
-			if( proc->pending_preemption ) {
-				proc->pending_preemption = false;
-				force_yield( __POLL_PREEMPTION );
-			}
+		if ( ! __cfaabi_tls.preemption_state.enabled ) return false;
+
+		// Signal the compiler that a fence is needed but only for signal handlers
+		__atomic_signal_fence(__ATOMIC_RELEASE);
+		if( unlikely( proc->pending_preemption ) ) {
+			proc->pending_preemption = false;
+			force_yield( __POLL_PREEMPTION );
 		}
 
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/heap.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,8 +10,9 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Apr 29 19:05:03 2022
-// Update Count     : 1167
+// Last Modified On : Thu Oct 13 22:21:52 2022
+// Update Count     : 1557
 //
 
+#include <stdio.h>
 #include <string.h>										// memset, memcpy
 #include <limits.h>										// ULONG_MAX
@@ -21,15 +22,51 @@
 #include <malloc.h>										// memalign, malloc_usable_size
 #include <sys/mman.h>									// mmap, munmap
+extern "C" {
 #include <sys/sysinfo.h>								// get_nprocs
+} // extern "C"
 
 #include "bits/align.hfa"								// libAlign
 #include "bits/defs.hfa"								// likely, unlikely
-#include "bits/locks.hfa"								// __spinlock_t
+#include "concurrency/kernel/fwd.hfa"					// __POLL_PREEMPTION
 #include "startup.hfa"									// STARTUP_PRIORITY_MEMORY
-#include "math.hfa"										// min
+#include "math.hfa"										// ceiling, min
 #include "bitmanip.hfa"									// is_pow2, ceiling2
 
-#define FASTLOOKUP
-#define __STATISTICS__
+// supported mallopt options
+#ifndef M_MMAP_THRESHOLD
+#define M_MMAP_THRESHOLD (-1)
+#endif // M_MMAP_THRESHOLD
+
+#ifndef M_TOP_PAD
+#define M_TOP_PAD (-2)
+#endif // M_TOP_PAD
+
+#define FASTLOOKUP										// use O(1) table lookup from allocation size to bucket size
+#define RETURNSPIN										// toggle spinlock / lockfree stack
+#define OWNERSHIP										// return freed memory to owner thread
+
+#define CACHE_ALIGN 64
+#define CALIGN __attribute__(( aligned(CACHE_ALIGN) ))
+
+#define TLSMODEL __attribute__(( tls_model("initial-exec") ))
+
+//#define __STATISTICS__
+
+enum {
+	// The default extension heap amount in units of bytes. When the current heap reaches the brk address, the brk
+	// address is extended by the extension amount.
+	__CFA_DEFAULT_HEAP_EXPANSION__ = 10 * 1024 * 1024,
+
+	// The mmap crossover point during allocation. Allocations less than this amount are allocated from buckets; values
+	// greater than or equal to this value are mmap from the operating system.
+	__CFA_DEFAULT_MMAP_START__ = 512 * 1024 + 1,
+
+	// The default unfreed storage amount in units of bytes. When the uC++ program ends it subtracts this amount from
+	// the malloc/free counter to adjust for storage the program does not free.
+	__CFA_DEFAULT_HEAP_UNFREED__ = 0
+}; // enum
+
+
+//####################### Heap Trace/Print ####################
 
 
@@ -55,9 +92,9 @@
 static bool prtFree = false;
 
-static bool prtFree() {
+bool prtFree() {
 	return prtFree;
 } // prtFree
 
-static bool prtFreeOn() {
+bool prtFreeOn() {
 	bool temp = prtFree;
 	prtFree = true;
@@ -65,5 +102,5 @@
 } // prtFreeOn
 
-static bool prtFreeOff() {
+bool prtFreeOff() {
 	bool temp = prtFree;
 	prtFree = false;
@@ -72,17 +109,34 @@
 
 
-enum {
-	// The default extension heap amount in units of bytes. When the current heap reaches the brk address, the brk
-	// address is extended by the extension amount.
-	__CFA_DEFAULT_HEAP_EXPANSION__ = 10 * 1024 * 1024,
-
-	// The mmap crossover point during allocation. Allocations less than this amount are allocated from buckets; values
-	// greater than or equal to this value are mmap from the operating system.
-	__CFA_DEFAULT_MMAP_START__ = 512 * 1024 + 1,
-
-	// The default unfreed storage amount in units of bytes. When the uC++ program ends it subtracts this amount from
-	// the malloc/free counter to adjust for storage the program does not free.
-	__CFA_DEFAULT_HEAP_UNFREED__ = 0
-}; // enum
+//######################### Spin Lock #########################
+
+
+// pause to prevent excess processor bus usage
+#if defined( __i386 ) || defined( __x86_64 )
+	#define Pause() __asm__ __volatile__ ( "pause" : : : )
+#elif defined(__ARM_ARCH)
+	#define Pause() __asm__ __volatile__ ( "YIELD" : : : )
+#else
+	#error unsupported architecture
+#endif
+
+typedef volatile uintptr_t SpinLock_t CALIGN;			// aligned addressable word-size
+
+static inline __attribute__((always_inline)) void lock( volatile SpinLock_t & slock ) {
+	enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
+	unsigned int spin = SPIN_START;
+
+	for ( unsigned int i = 1;; i += 1 ) {
+	  if ( slock == 0 && __atomic_test_and_set( &slock, __ATOMIC_SEQ_CST ) == 0 ) break; // Fence
+		for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin
+		spin += spin;									// powers of 2
+		//if ( i % 64 == 0 ) spin += spin;				// slowly increase by powers of 2
+		if ( spin > SPIN_END ) spin = SPIN_END;			// cap spinning
+	} // for
+} // spin_lock
+
+static inline __attribute__((always_inline)) void unlock( volatile SpinLock_t & slock ) {
+	__atomic_clear( &slock, __ATOMIC_SEQ_CST );			// Fence
+} // spin_unlock
 
 
@@ -120,6 +174,6 @@
 		unsigned int free_calls, free_null_calls;
 		unsigned long long int free_storage_request, free_storage_alloc;
-		unsigned int away_pulls, away_pushes;
-		unsigned long long int away_storage_request, away_storage_alloc;
+		unsigned int return_pulls, return_pushes;
+		unsigned long long int return_storage_request, return_storage_alloc;
 		unsigned int mmap_calls, mmap_0_calls;			// no zero calls
 		unsigned long long int mmap_storage_request, mmap_storage_alloc;
@@ -131,5 +185,5 @@
 
 static_assert( sizeof(HeapStatistics) == CntTriples * sizeof(StatsOverlay),
- 			   "Heap statistics counter-triplets does not match with array size" );
+			   "Heap statistics counter-triplets does not match with array size" );
 
 static void HeapStatisticsCtor( HeapStatistics & stats ) {
@@ -203,24 +257,41 @@
 	static_assert( libAlign() >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" );
 
-	struct FreeHeader {
-		size_t blockSize __attribute__(( aligned (8) )); // size of allocations on this list
+	struct __attribute__(( aligned (8) )) FreeHeader {
+		size_t blockSize __attribute__(( aligned(8) )); // size of allocations on this list
 		#if BUCKETLOCK == SPINLOCK
-		__spinlock_t lock;
-		Storage * freeList;
+		#ifdef OWNERSHIP
+		#ifdef RETURNSPIN
+		SpinLock_t returnLock;
+		#endif // RETURNSPIN
+		Storage * returnList;							// other thread return list
+		#endif // OWNERSHIP
+		Storage * freeList;								// thread free list
 		#else
 		StackLF(Storage) freeList;
 		#endif // BUCKETLOCK
-	} __attribute__(( aligned (8) )); // FreeHeader
+		Heap * homeManager;								// heap owner (free storage to bucket, from bucket to heap)
+	}; // FreeHeader
 
 	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
-
-	__spinlock_t extlock;								// protects allocation-buffer extension
-	void * heapBegin;									// start of heap
-	void * heapEnd;										// logical end of heap
-	size_t heapRemaining;								// amount of storage not allocated in the current chunk
+	void * heapBuffer;									// start of free storage in buffer
+	size_t heapReserve;									// amount of remaining free storage in buffer
+
+	#if defined( __STATISTICS__ ) || defined( __CFA_DEBUG__ )
+	Heap * nextHeapManager;								// intrusive link of existing heaps; traversed to collect statistics or check unfreed storage
+	#endif // __STATISTICS__ || __CFA_DEBUG__
+	Heap * nextFreeHeapManager;							// intrusive link of free heaps from terminated threads; reused by new threads
+
+	#ifdef __CFA_DEBUG__
+	int64_t allocUnfreed;								// running total of allocations minus frees; can be negative
+	#endif // __CFA_DEBUG__
+
+	#ifdef __STATISTICS__
+	HeapStatistics stats;								// local statistic table for this heap
+	#endif // __STATISTICS__
 }; // Heap
 
 #if BUCKETLOCK == LOCKFREE
-static inline {
+inline __attribute__((always_inline))
+static {
 	Link(Heap.Storage) * ?`next( Heap.Storage * this ) { return &this->header.kind.real.next; }
 	void ?{}( Heap.FreeHeader & ) {}
@@ -229,16 +300,46 @@
 #endif // LOCKFREE
 
-static inline size_t getKey( const Heap.FreeHeader & freeheader ) { return freeheader.blockSize; }
+
+struct HeapMaster {
+	SpinLock_t extLock;									// protects allocation-buffer extension
+	SpinLock_t mgrLock;									// protects freeHeapManagersList, heapManagersList, heapManagersStorage, heapManagersStorageEnd
+
+	void * heapBegin;									// start of heap
+	void * heapEnd;										// logical end of heap
+	size_t heapRemaining;								// amount of storage not allocated in the current chunk
+	size_t pageSize;									// architecture pagesize
+	size_t heapExpand;									// sbrk advance
+	size_t mmapStart;									// cross over point for mmap
+	unsigned int maxBucketsUsed;						// maximum number of buckets in use
+
+	Heap * heapManagersList;							// heap-list head
+	Heap * freeHeapManagersList;						// free-list head
+
+	// Heap superblocks are not linked; heaps in superblocks are linked via intrusive links.
+	Heap * heapManagersStorage;							// next heap to use in heap superblock
+	Heap * heapManagersStorageEnd;						// logical heap outside of superblock's end
+
+	#ifdef __STATISTICS__
+	HeapStatistics stats;								// global stats for thread-local heaps to add there counters when exiting
+	unsigned long int threads_started, threads_exited;	// counts threads that have started and exited
+	unsigned long int reused_heap, new_heap;			// counts reusability of heaps
+	unsigned int sbrk_calls;
+	unsigned long long int sbrk_storage;
+	int stats_fd;
+	#endif // __STATISTICS__
+}; // HeapMaster
 
 
 #ifdef FASTLOOKUP
-enum { LookupSizes = 65_536 + sizeof(Heap.Storage) }; // number of fast lookup sizes
+enum { LookupSizes = 65_536 + sizeof(Heap.Storage) };	// number of fast lookup sizes
 static unsigned char lookup[LookupSizes];				// O(1) lookup for small sizes
 #endif // FASTLOOKUP
 
-static const off_t mmapFd = -1;							// fake or actual fd for anonymous file
-#ifdef __CFA_DEBUG__
-static bool heapBoot = 0;								// detect recursion during boot
-#endif // __CFA_DEBUG__
+static volatile bool heapMasterBootFlag = false;		// trigger for first heap
+static HeapMaster heapMaster @= {};						// program global
+
+static void heapMasterCtor();
+static void heapMasterDtor();
+static Heap * getHeap();
 
 
@@ -268,45 +369,241 @@
 static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" );
 
-// The constructor for heapManager is called explicitly in memory_startup.
-static Heap heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
+
+// extern visibility, used by runtime kernel
+libcfa_public size_t __page_size;						// architecture pagesize
+libcfa_public int __map_prot;							// common mmap/mprotect protection
+
+
+// Thread-local storage is allocated lazily when the storage is accessed.
+static __thread size_t PAD1 CALIGN TLSMODEL __attribute__(( unused )); // protect false sharing
+static __thread Heap * volatile heapManager CALIGN TLSMODEL;
+static __thread size_t PAD2 CALIGN TLSMODEL __attribute__(( unused )); // protect further false sharing
+
+
+// declare helper functions for HeapMaster
+void noMemory();										// forward, called by "builtin_new" when malloc returns 0
+
+
+// generic Bsearchl does not inline, so substitute with hand-coded binary-search.
+inline __attribute__((always_inline))
+static size_t Bsearchl( unsigned int key, const unsigned int vals[], size_t dim ) {
+	size_t l = 0, m, h = dim;
+	while ( l < h ) {
+		m = (l + h) / 2;
+		if ( (unsigned int &)(vals[m]) < key ) {		// cast away const
+			l = m + 1;
+		} else {
+			h = m;
+		} // if
+	} // while
+	return l;
+} // Bsearchl
+
+
+void heapMasterCtor() with( heapMaster ) {
+	// Singleton pattern to initialize heap master
+
+	verify( bucketSizes[0] == (16 + sizeof(Heap.Storage)) );
+
+	__page_size = sysconf( _SC_PAGESIZE );
+	__map_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+
+	?{}( extLock );
+	?{}( mgrLock );
+
+	char * end = (char *)sbrk( 0 );
+	heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
+	heapRemaining = 0;
+	heapExpand = malloc_expansion();
+	mmapStart = malloc_mmap_start();
+
+	// find the closest bucket size less than or equal to the mmapStart size
+	maxBucketsUsed = Bsearchl( mmapStart, bucketSizes, NoBucketSizes ); // binary search
+
+	verify( (mmapStart >= pageSize) && (bucketSizes[NoBucketSizes - 1] >= mmapStart) );
+	verify( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+	verify( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+
+	heapManagersList = 0p;
+	freeHeapManagersList = 0p;
+
+	heapManagersStorage = 0p;
+	heapManagersStorageEnd = 0p;
+
+	#ifdef __STATISTICS__
+	HeapStatisticsCtor( stats );						// clear statistic counters
+	threads_started = threads_exited = 0;
+	reused_heap = new_heap = 0;
+	sbrk_calls = sbrk_storage = 0;
+	stats_fd = STDERR_FILENO;
+	#endif // __STATISTICS__
+
+	#ifdef FASTLOOKUP
+	for ( unsigned int i = 0, idx = 0; i < LookupSizes; i += 1 ) {
+		if ( i > bucketSizes[idx] ) idx += 1;
+		lookup[i] = idx;
+		verify( i <= bucketSizes[idx] );
+		verify( (i <= 32 && idx == 0) || (i > bucketSizes[idx - 1]) );
+	} // for
+	#endif // FASTLOOKUP
+
+	heapMasterBootFlag = true;
+} // heapMasterCtor
+
+
+#define NO_MEMORY_MSG "**** Error **** insufficient heap memory available to allocate %zd new bytes."
+
+Heap * getHeap() with( heapMaster ) {
+	Heap * heap;
+	if ( freeHeapManagersList ) {						// free heap for reused ?
+		heap = freeHeapManagersList;
+		freeHeapManagersList = heap->nextFreeHeapManager;
+
+		#ifdef __STATISTICS__
+		reused_heap += 1;
+		#endif // __STATISTICS__
+	} else {											// free heap not found, create new
+		// Heap size is about 12K, FreeHeader (128 bytes because of cache alignment) * NoBucketSizes (91) => 128 heaps *
+		// 12K ~= 120K byte superblock.  Where 128-heap superblock handles a medium sized multi-processor server.
+		size_t remaining = heapManagersStorageEnd - heapManagersStorage; // remaining free heaps in superblock
+		if ( ! heapManagersStorage || remaining != 0 ) {
+			// Each block of heaps is a multiple of the number of cores on the computer.
+			int HeapDim = get_nprocs();					// get_nprocs_conf does not work
+			size_t size = HeapDim * sizeof( Heap );
+
+			heapManagersStorage = (Heap *)mmap( 0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 );
+			if ( unlikely( heapManagersStorage == (Heap *)MAP_FAILED ) ) { // failed ?
+				if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, size ); // no memory
+				// Do not call strerror( errno ) as it may call malloc.
+				abort( "**** Error **** attempt to allocate block of heaps of size %zu bytes and mmap failed with errno %d.", size, errno );
+			} // if
+			heapManagersStorageEnd = &heapManagersStorage[HeapDim]; // outside array
+		} // if
+
+		heap = heapManagersStorage;
+		heapManagersStorage = heapManagersStorage + 1; // bump next heap
+
+		#if defined( __STATISTICS__ ) || defined( __CFA_DEBUG__ )
+		heap->nextHeapManager = heapManagersList;
+		#endif // __STATISTICS__ || __CFA_DEBUG__
+		heapManagersList = heap;
+
+		#ifdef __STATISTICS__
+		new_heap += 1;
+		#endif // __STATISTICS__
+
+		with( *heap ) {
+			for ( unsigned int j = 0; j < NoBucketSizes; j += 1 ) { // initialize free lists
+				#ifdef OWNERSHIP
+				#ifdef RETURNSPIN
+				?{}( freeLists[j].returnLock );
+				#endif // RETURNSPIN
+				freeLists[j].returnList = 0p;
+				#endif // OWNERSHIP
+				freeLists[j].freeList = 0p;
+				freeLists[j].homeManager = heap;
+				freeLists[j].blockSize = bucketSizes[j];
+			} // for
+	
+			heapBuffer = 0p;
+			heapReserve = 0;
+			nextFreeHeapManager = 0p;
+			#ifdef __CFA_DEBUG__
+			allocUnfreed = 0;
+			#endif // __CFA_DEBUG__
+		} // with
+	} // if
+
+	return heap;
+} // getHeap
+
+
+void heapManagerCtor() libcfa_public {
+	if ( unlikely( ! heapMasterBootFlag ) ) heapMasterCtor();
+
+	lock( heapMaster.mgrLock );		// protect heapMaster counters
+
+	// get storage for heap manager
+
+	heapManager = getHeap();
+
+	#ifdef __STATISTICS__
+	HeapStatisticsCtor( heapManager->stats );			// heap local
+	heapMaster.threads_started += 1;
+	#endif // __STATISTICS__
+
+	unlock( heapMaster.mgrLock );
+} // heapManagerCtor
+
+
+void heapManagerDtor() libcfa_public {
+	lock( heapMaster.mgrLock );
+
+	// place heap on list of free heaps for reusability
+	heapManager->nextFreeHeapManager = heapMaster.freeHeapManagersList;
+	heapMaster.freeHeapManagersList = heapManager;
+
+	#ifdef __STATISTICS__
+	heapMaster.threads_exited += 1;
+	#endif // __STATISTICS__
+
+	// Do not set heapManager to NULL because it is used after Cforall is shutdown but before the program shuts down.
+
+	unlock( heapMaster.mgrLock );
+} // heapManagerDtor
 
 
 //####################### Memory Allocation Routines Helpers ####################
 
-
-#ifdef __CFA_DEBUG__
-static size_t allocUnfreed;								// running total of allocations minus frees
-
-static void prtUnfreed() {
-	if ( allocUnfreed != 0 ) {
-		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-		char helpText[512];
-		__cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
-									"CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"
-									"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
-									(long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
-	} // if
-} // prtUnfreed
 
 extern int cfa_main_returned;							// from interpose.cfa
 extern "C" {
+	void memory_startup( void ) {
+		if ( ! heapMasterBootFlag ) heapManagerCtor();	// sanity check
+	} // memory_startup
+
+	void memory_shutdown( void ) {
+		heapManagerDtor();
+	} // memory_shutdown
+
 	void heapAppStart() {								// called by __cfaabi_appready_startup
-		allocUnfreed = 0;
+		verify( heapManager );
+		#ifdef __CFA_DEBUG__
+		heapManager->allocUnfreed = 0;					// clear prior allocation counts
+		#endif // __CFA_DEBUG__
+
+		#ifdef __STATISTICS__
+		HeapStatisticsCtor( heapManager->stats );		// clear prior statistic counters
+		#endif // __STATISTICS__
 	} // heapAppStart
 
 	void heapAppStop() {								// called by __cfaabi_appready_startdown
-		fclose( stdin ); fclose( stdout );
-		if ( cfa_main_returned ) prtUnfreed();			// do not check unfreed storage if exit called
+		fclose( stdin ); fclose( stdout );				// free buffer storage
+	  if ( ! cfa_main_returned ) return;				// do not check unfreed storage if exit called
+
+		#ifdef __CFA_DEBUG__
+		// allocUnfreed is set to 0 when a heap is created and it accumulates any unfreed storage during its multiple thread
+		// usages.  At the end, add up each heap allocUnfreed value across all heaps to get the total unfreed storage.
+		int64_t allocUnfreed = 0;
+		for ( Heap * heap = heapMaster.heapManagersList; heap; heap = heap->nextHeapManager ) {
+			allocUnfreed += heap->allocUnfreed;
+		} // for
+
+		allocUnfreed -= malloc_unfreed();				// subtract any user specified unfreed storage
+		if ( allocUnfreed > 0 ) {
+			// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+			char helpText[512];
+			__cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
+										"CFA warning (UNIX pid:%ld) : program terminating with %ju(0x%jx) bytes of storage allocated but not freed.\n"
+										"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+										(long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
+		} // if
+		#endif // __CFA_DEBUG__
 	} // heapAppStop
 } // extern "C"
-#endif // __CFA_DEBUG__
 
 
 #ifdef __STATISTICS__
 static HeapStatistics stats;							// zero filled
-static unsigned int sbrk_calls;
-static unsigned long long int sbrk_storage;
-// Statistics file descriptor (changed by malloc_stats_fd).
-static int stats_fd = STDERR_FILENO;					// default stderr
 
 #define prtFmt \
@@ -321,24 +618,30 @@
 	"  realloc   >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n" \
 	"  free      !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n" \
-	"  sbrk      calls %'u; storage %'llu bytes\n"						\
-	"  mmap      calls %'u; storage %'llu / %'llu bytes\n"				\
-	"  munmap    calls %'u; storage %'llu / %'llu bytes\n"				\
+	"  return    pulls %'u; pushes %'u; storage %'llu / %'llu bytes\n" \
+	"  sbrk      calls %'u; storage %'llu bytes\n" \
+	"  mmap      calls %'u; storage %'llu / %'llu bytes\n" \
+	"  munmap    calls %'u; storage %'llu / %'llu bytes\n" \
+	"  threads   started %'lu; exited %'lu\n" \
+	"  heaps     new %'lu; reused %'lu\n"
 
 // Use "write" because streams may be shutdown when calls are made.
-static int printStats() {								// see malloc_stats
+static int printStats( HeapStatistics & stats ) with( heapMaster, stats ) {	// see malloc_stats
 	char helpText[sizeof(prtFmt) + 1024];				// space for message and values
-	return __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText), prtFmt,
-			stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc,
-			stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc,
-			stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc,
-			stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc,
-			stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc,
-			stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc,
-			stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc,
-			stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc,
-			stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc,
+	return __cfaabi_bits_print_buffer( stats_fd, helpText, sizeof(helpText), prtFmt,
+			malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
+			aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
+			calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
+			memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
+			amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
+			cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
+			resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
+			realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
+			free_calls, free_null_calls, free_storage_request, free_storage_alloc,
+			return_pulls, return_pushes, return_storage_request, return_storage_alloc,
 			sbrk_calls, sbrk_storage,
-			stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc,
-			stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc
+			mmap_calls, mmap_storage_request, mmap_storage_alloc,
+			munmap_calls, munmap_storage_request, munmap_storage_alloc,
+			threads_started, threads_exited,
+			new_heap, reused_heap
 		);
 } // printStats
@@ -358,62 +661,55 @@
 	"<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \
 	"<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \
+	"<total type=\"return\" pulls=\"%'u;\" 0 pushes=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \
 	"<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n" \
 	"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n" \
 	"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n" \
+	"<total type=\"threads\" started=\"%'lu;\" exited=\"%'lu\"/>\n" \
+	"<total type=\"heaps\" new=\"%'lu;\" reused=\"%'lu\"/>\n" \
 	"</malloc>"
 
-static int printStatsXML( FILE * stream ) {				// see malloc_info
+static int printStatsXML( HeapStatistics & stats, FILE * stream ) with( heapMaster, stats ) { // see malloc_info
 	char helpText[sizeof(prtFmtXML) + 1024];			// space for message and values
 	return __cfaabi_bits_print_buffer( fileno( stream ), helpText, sizeof(helpText), prtFmtXML,
-			stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc,
-			stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc,
-			stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc,
-			stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc,
-			stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc,
-			stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc,
-			stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc,
-			stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc,
-			stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc,
+			malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
+			aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
+			calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
+			memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
+			amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
+			cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
+			resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
+			realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
+			free_calls, free_null_calls, free_storage_request, free_storage_alloc,
+			return_pulls, return_pushes, return_storage_request, return_storage_alloc,
 			sbrk_calls, sbrk_storage,
-			stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc,
-			stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc
+			mmap_calls, mmap_storage_request, mmap_storage_alloc,
+		    munmap_calls, munmap_storage_request, munmap_storage_alloc,
+			threads_started, threads_exited,
+			new_heap, reused_heap
 		);
 } // printStatsXML
+
+static HeapStatistics & collectStats( HeapStatistics & stats ) with( heapMaster ) {
+	lock( mgrLock );
+
+	stats += heapMaster.stats;
+	for ( Heap * heap = heapManagersList; heap; heap = heap->nextHeapManager ) {
+		stats += heap->stats;
+	} // for
+
+	unlock( mgrLock );
+	return stats;
+} // collectStats
 #endif // __STATISTICS__
 
 
-// statically allocated variables => zero filled.
-static size_t heapExpand;								// sbrk advance
-static size_t mmapStart;								// cross over point for mmap
-static unsigned int maxBucketsUsed;						// maximum number of buckets in use
-// extern visibility, used by runtime kernel
-// would be cool to remove libcfa_public but it's needed for libcfathread
-libcfa_public size_t __page_size;							// architecture pagesize
-libcfa_public int __map_prot;								// common mmap/mprotect protection
-
-
-// thunk problem
-size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
-	size_t l = 0, m, h = dim;
-	while ( l < h ) {
-		m = (l + h) / 2;
-		if ( (unsigned int &)(vals[m]) < key ) {		// cast away const
-			l = m + 1;
-		} else {
-			h = m;
-		} // if
-	} // while
-	return l;
-} // Bsearchl
-
-
-static inline bool setMmapStart( size_t value ) {		// true => mmapped, false => sbrk
+static bool setMmapStart( size_t value ) with( heapMaster ) { // true => mmapped, false => sbrk
   if ( value < __page_size || bucketSizes[NoBucketSizes - 1] < value ) return false;
 	mmapStart = value;									// set global
 
 	// find the closest bucket size less than or equal to the mmapStart size
-	maxBucketsUsed = Bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
-	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
-	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+	maxBucketsUsed = Bsearchl( mmapStart, bucketSizes, NoBucketSizes ); // binary search
+	verify( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+	verify( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
 	return true;
 } // setMmapStart
@@ -438,5 +734,6 @@
 
 
-static inline void checkAlign( size_t alignment ) {
+inline __attribute__((always_inline))
+static void checkAlign( size_t alignment ) {
 	if ( unlikely( alignment < libAlign() || ! is_pow2( alignment ) ) ) {
 		abort( "**** Error **** alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
@@ -445,5 +742,6 @@
 
 
-static inline void checkHeader( bool check, const char name[], void * addr ) {
+inline __attribute__((always_inline))
+static void checkHeader( bool check, const char name[], void * addr ) {
 	if ( unlikely( check ) ) {							// bad address ?
 		abort( "**** Error **** attempt to %s storage %p with address outside the heap.\n"
@@ -470,5 +768,6 @@
 
 
-static inline void fakeHeader( Heap.Storage.Header *& header, size_t & alignment ) {
+inline __attribute__((always_inline))
+static void fakeHeader( Heap.Storage.Header *& header, size_t & alignment ) {
 	if ( unlikely( AlignmentBit( header ) ) ) {			// fake header ?
 		alignment = ClearAlignmentBit( header );		// clear flag from value
@@ -483,6 +782,7 @@
 
 
-static inline bool headers( const char name[] __attribute__(( unused )), void * addr, Heap.Storage.Header *& header,
-							Heap.FreeHeader *& freeHead, size_t & size, size_t & alignment ) with( heapManager ) {
+inline __attribute__((always_inline))
+static bool headers( const char name[] __attribute__(( unused )), void * addr, Heap.Storage.Header *& header,
+							Heap.FreeHeader *& freeHead, size_t & size, size_t & alignment ) with( heapMaster, *heapManager ) {
 	header = HeaderAddr( addr );
 
@@ -509,7 +809,9 @@
 	checkHeader( header < (Heap.Storage.Header *)heapBegin || (Heap.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
 
+	Heap * homeManager;
 	if ( unlikely( freeHead == 0p || // freed and only free-list node => null link
 				   // freed and link points at another free block not to a bucket in the bucket array.
-				   freeHead < &freeLists[0] || &freeLists[NoBucketSizes] <= freeHead ) ) {
+				   (homeManager = freeHead->homeManager, freeHead < &homeManager->freeLists[0] ||
+					&homeManager->freeLists[NoBucketSizes] <= freeHead ) ) ) {
 		abort( "**** Error **** attempt to %s storage %p with corrupted header.\n"
 			   "Possible cause is duplicate free on same block or overwriting of header information.",
@@ -521,27 +823,7 @@
 } // headers
 
-// #ifdef __CFA_DEBUG__
-// #if __SIZEOF_POINTER__ == 4
-// #define MASK 0xdeadbeef
-// #else
-// #define MASK 0xdeadbeefdeadbeef
-// #endif
-// #define STRIDE size_t
-
-// static void * Memset( void * addr, STRIDE size ) {		// debug only
-// 	if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) );
-// 	if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) );
-
-// 	STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE);
-// 	for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK;
-// 	return addr;
-// } // Memset
-// #endif // __CFA_DEBUG__
-
-
-#define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes."
-
-static inline void * extend( size_t size ) with( heapManager ) {
-	lock( extlock __cfaabi_dbg_ctx2 );
+
+static void * master_extend( size_t size ) with( heapMaster ) {
+	lock( extLock );
 
 	ptrdiff_t rem = heapRemaining - size;
@@ -549,18 +831,18 @@
 		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
 
-		size_t increase = ceiling2( size > heapExpand ? size : heapExpand, __page_size );
+		size_t increase = ceiling2( size > heapExpand ? size : heapExpand, libAlign() );
 		// Do not call abort or strerror( errno ) as they may call malloc.
-		if ( sbrk( increase ) == (void *)-1 ) {			// failed, no memory ?
-			unlock( extlock );
-			__cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size );
-			_exit( EXIT_FAILURE );						// give up
+		if ( unlikely( sbrk( increase ) == (void *)-1 ) ) {	// failed, no memory ?
+			unlock( extLock );
+			abort( NO_MEMORY_MSG, size );				// no memory
 		} // if
 
 		// Make storage executable for thunks.
 		if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) {
-			unlock( extlock );
-			__cfaabi_bits_print_nolock( STDERR_FILENO, "extend() : internal error, mprotect failure, heapEnd:%p size:%zd, errno:%d.\n", heapEnd, increase, errno );
-			_exit( EXIT_FAILURE );
-		} // if
+			unlock( extLock );
+			abort( "**** Error **** attempt to make heap storage executable for thunks and mprotect failed with errno %d.", errno );
+		} // if
+
+		rem = heapRemaining + increase - size;
 
 		#ifdef __STATISTICS__
@@ -568,12 +850,4 @@
 		sbrk_storage += increase;
 		#endif // __STATISTICS__
-
-		#ifdef __CFA_DEBUG__
-		// Set new memory to garbage so subsequent uninitialized usages might fail.
-		memset( (char *)heapEnd + heapRemaining, '\xde', increase );
-		//Memset( (char *)heapEnd + heapRemaining, increase );
-		#endif // __CFA_DEBUG__
-
-		rem = heapRemaining + increase - size;
 	} // if
 
@@ -581,75 +855,200 @@
 	heapRemaining = rem;
 	heapEnd = (char *)heapEnd + size;
-	unlock( extlock );
+
+	unlock( extLock );
 	return block;
-} // extend
-
-
-static inline void * doMalloc( size_t size ) with( heapManager ) {
-	Heap.Storage * block;						// pointer to new block of storage
+} // master_extend
+
+
+__attribute__(( noinline ))
+static void * manager_extend( size_t size ) with( *heapManager ) {
+	ptrdiff_t rem = heapReserve - size;
+
+	if ( unlikely( rem < 0 ) ) {						// negative
+		// If the size requested is bigger than the current remaining reserve, use the current reserve to populate
+		// smaller freeLists, and increase the reserve.
+
+		rem = heapReserve;								// positive
+
+		if ( rem >= bucketSizes[0] ) {					// minimal size ? otherwise ignore
+			size_t bucket;
+			#ifdef FASTLOOKUP
+			if ( likely( rem < LookupSizes ) ) bucket = lookup[rem];
+			#endif // FASTLOOKUP
+				bucket = Bsearchl( rem, bucketSizes, heapMaster.maxBucketsUsed );
+			verify( 0 <= bucket && bucket <= heapMaster.maxBucketsUsed );
+			Heap.FreeHeader * freeHead = &(freeLists[bucket]);
+
+			// The remaining storage many not be bucket size, whereas all other allocations are. Round down to previous
+			// bucket size in this case.
+			if ( unlikely( freeHead->blockSize > (size_t)rem ) ) freeHead -= 1;
+			Heap.Storage * block = (Heap.Storage *)heapBuffer;
+
+			block->header.kind.real.next = freeHead->freeList; // push on stack
+			freeHead->freeList = block;
+		} // if
+
+		size_t increase = ceiling( size > ( heapMaster.heapExpand / 10 ) ? size : ( heapMaster.heapExpand / 10 ), libAlign() );
+		heapBuffer = master_extend( increase );
+		rem = increase - size;
+	} // if
+
+	Heap.Storage * block = (Heap.Storage *)heapBuffer;
+	heapReserve = rem;
+	heapBuffer = (char *)heapBuffer + size;
+
+	return block;
+} // manager_extend
+
+
+#define BOOT_HEAP_MANAGER \
+  	if ( unlikely( ! heapMasterBootFlag ) ) { \
+		heapManagerCtor(); /* trigger for first heap */ \
+	} /* if */
+
+#ifdef __STATISTICS__
+#define STAT_NAME __counter
+#define STAT_PARM , unsigned int STAT_NAME
+#define STAT_ARG( name ) , name
+#define STAT_0_CNT( counter ) stats.counters[counter].calls_0 += 1
+#else
+#define STAT_NAME
+#define STAT_PARM
+#define STAT_ARG( name )
+#define STAT_0_CNT( counter )
+#endif // __STATISTICS__
+
+#define PROLOG( counter, ... ) \
+	BOOT_HEAP_MANAGER; \
+	if ( unlikely( size == 0 ) ||						/* 0 BYTE ALLOCATION RETURNS NULL POINTER */ \
+		unlikely( size > ULONG_MAX - sizeof(Heap.Storage) ) ) { /* error check */ \
+		STAT_0_CNT( counter ); \
+		__VA_ARGS__; \
+		return 0p; \
+	} /* if */
+
+
+#define SCRUB_SIZE 1024lu
+// Do not use '\xfe' for scrubbing because dereferencing an address composed of it causes a SIGSEGV *without* a valid IP
+// pointer in the interrupt frame.
+#define SCRUB '\xff'
+
+static void * doMalloc( size_t size STAT_PARM ) libcfa_nopreempt with( *heapManager ) {
+	PROLOG( STAT_NAME );
+
+	verify( heapManager );
+	Heap.Storage * block;								// pointer to new block of storage
 
 	// Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
 	// along with the block and is a multiple of the alignment size.
-
 	size_t tsize = size + sizeof(Heap.Storage);
 
-	if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
-		size_t posn;
+	#ifdef __STATISTICS__
+	stats.counters[STAT_NAME].calls += 1;
+	stats.counters[STAT_NAME].request += size;
+	#endif // __STATISTICS__
+
+	#ifdef __CFA_DEBUG__
+	allocUnfreed += size;
+	#endif // __CFA_DEBUG__
+
+	if ( likely( tsize < heapMaster.mmapStart ) ) {		// small size => sbrk
+		size_t bucket;
 		#ifdef FASTLOOKUP
-		if ( tsize < LookupSizes ) posn = lookup[tsize];
+		if ( likely( tsize < LookupSizes ) ) bucket = lookup[tsize];
 		else
 		#endif // FASTLOOKUP
-			posn = Bsearchl( (unsigned int)tsize, bucketSizes, (size_t)maxBucketsUsed );
-		Heap.FreeHeader * freeElem = &freeLists[posn];
-		verify( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
-		verify( tsize <= freeElem->blockSize );			// search failure ?
-		tsize = freeElem->blockSize;					// total space needed for request
+			bucket = Bsearchl( tsize, bucketSizes, heapMaster.maxBucketsUsed );
+		verify( 0 <= bucket && bucket <= heapMaster.maxBucketsUsed );
+		Heap.FreeHeader * freeHead = &freeLists[bucket];
+
+		verify( freeHead <= &freeLists[heapMaster.maxBucketsUsed] ); // subscripting error ?
+		verify( tsize <= freeHead->blockSize );			// search failure ?
+
+		tsize = freeHead->blockSize;					// total space needed for request
+		#ifdef __STATISTICS__
+		stats.counters[STAT_NAME].alloc += tsize;
+		#endif // __STATISTICS__
 
 		// Spin until the lock is acquired for this particular size of block.
 
 		#if BUCKETLOCK == SPINLOCK
-		lock( freeElem->lock __cfaabi_dbg_ctx2 );
-		block = freeElem->freeList;						// remove node from stack
+		block = freeHead->freeList;						// remove node from stack
 		#else
-		block = pop( freeElem->freeList );
+		block = pop( freeHead->freeList );
 		#endif // BUCKETLOCK
 		if ( unlikely( block == 0p ) ) {				// no free block ?
+			#ifdef OWNERSHIP
+			// Freelist for that size is empty, so carve it out of the heap, if there is enough left, or get some more
+			// and then carve it off.
+			#ifdef RETURNSPIN
 			#if BUCKETLOCK == SPINLOCK
-			unlock( freeElem->lock );
+			lock( freeHead->returnLock );
+			block = freeHead->returnList;
+			freeHead->returnList = 0p;
+			unlock( freeHead->returnLock );
+			#else
+			block = __atomic_exchange_n( &freeHead->returnList, nullptr, __ATOMIC_SEQ_CST );
+			#endif // RETURNSPIN
+
+			if ( likely( block == 0p ) ) {			// return list also empty?
+			#endif // OWNERSHIP
+				// Do not leave kernel thread as manager_extend accesses heapManager.
+				disable_interrupts();
+				block = (Heap.Storage *)manager_extend( tsize ); // mutual exclusion on call
+				enable_interrupts( false );
+
+				// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
+
+				#ifdef __CFA_DEBUG__
+				// Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first 1024 bytes.
+				memset( block->data, SCRUB, min( SCRUB_SIZE, tsize - sizeof(Heap.Storage) ) );
+				#endif // __CFA_DEBUG__
 			#endif // BUCKETLOCK
-
-			// Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
-			// and then carve it off.
-
-			block = (Heap.Storage *)extend( tsize );	// mutual exclusion on call
-		#if BUCKETLOCK == SPINLOCK
+			#ifdef OWNERSHIP
+			} else {									// merge returnList into freeHead
+				#ifdef __STATISTICS__
+				stats.return_pulls += 1;
+				#endif // __STATISTICS__
+
+				// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
+
+				freeHead->freeList = block->header.kind.real.next;
+			} // if
+			#endif // OWNERSHIP
 		} else {
-			freeElem->freeList = block->header.kind.real.next;
-			unlock( freeElem->lock );
-		#endif // BUCKETLOCK
-		} // if
-
-		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
+			// Memory is scrubbed in doFree.
+			freeHead->freeList = block->header.kind.real.next;
+		} // if
+
+		block->header.kind.real.home = freeHead;		// pointer back to free list of apropriate size
 	} else {											// large size => mmap
   if ( unlikely( size > ULONG_MAX - __page_size ) ) return 0p;
 		tsize = ceiling2( tsize, __page_size );			// must be multiple of page size
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.mmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.mmap_storage_request, size, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST );
+		stats.counters[STAT_NAME].alloc += tsize;
+		stats.mmap_calls += 1;
+		stats.mmap_storage_request += size;
+		stats.mmap_storage_alloc += tsize;
 		#endif // __STATISTICS__
 
-		block = (Heap.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
-		if ( block == (Heap.Storage *)MAP_FAILED ) { // failed ?
+		disable_interrupts();
+		block = (Heap.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 );
+		enable_interrupts( false );
+
+		// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
+
+		if ( unlikely( block == (Heap.Storage *)MAP_FAILED ) ) { // failed ?
 			if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
 			// Do not call strerror( errno ) as it may call malloc.
-			abort( "(Heap &)0x%p.doMalloc() : internal error, mmap failure, size:%zu errno:%d.", &heapManager, tsize, errno );
-		} //if
+			abort( "**** Error **** attempt to allocate large object (> %zu) of size %zu bytes and mmap failed with errno %d.", size, heapMaster.mmapStart, errno );
+		} // if
+		block->header.kind.real.blockSize = MarkMmappedBit( tsize ); // storage size for munmap
+
 		#ifdef __CFA_DEBUG__
-		// Set new memory to garbage so subsequent uninitialized usages might fail.
-		memset( block, '\xde', tsize );
-		//Memset( block, tsize );
+		// Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first 1024 bytes.  The rest of
+		// the storage set to 0 by mmap.
+		memset( block->data, SCRUB, min( SCRUB_SIZE, tsize - sizeof(Heap.Storage) ) );
 		#endif // __CFA_DEBUG__
-		block->header.kind.real.blockSize = MarkMmappedBit( tsize ); // storage size for munmap
 	} // if
 
@@ -659,5 +1058,4 @@
 
 	#ifdef __CFA_DEBUG__
-	__atomic_add_fetch( &allocUnfreed, tsize, __ATOMIC_SEQ_CST );
 	if ( traceHeap() ) {
 		char helpText[64];
@@ -667,55 +1065,102 @@
 	#endif // __CFA_DEBUG__
 
+//	poll_interrupts();									// call rollforward
+
 	return addr;
 } // doMalloc
 
 
-static inline void doFree( void * addr ) with( heapManager ) {
+static void doFree( void * addr ) libcfa_nopreempt with( *heapManager ) {
+	verify( addr );
+
+	// detect free after thread-local storage destruction and use global stats in that case
+
+	Heap.Storage.Header * header;
+	Heap.FreeHeader * freeHead;
+	size_t size, alignment;
+
+	bool mapped = headers( "free", addr, header, freeHead, size, alignment );
+	#if defined( __STATISTICS__ ) || defined( __CFA_DEBUG__ )
+	size_t rsize = header->kind.real.size;				// optimization
+	#endif // __STATISTICS__ || __CFA_DEBUG__
+
+	#ifdef __STATISTICS__
+	stats.free_storage_request += rsize;
+	stats.free_storage_alloc += size;
+	#endif // __STATISTICS__
+
 	#ifdef __CFA_DEBUG__
-	if ( unlikely( heapManager.heapBegin == 0p ) ) {
-		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
-	} // if
+	allocUnfreed -= rsize;
 	#endif // __CFA_DEBUG__
 
-	Heap.Storage.Header * header;
-	Heap.FreeHeader * freeElem;
-	size_t size, alignment;								// not used (see realloc)
-
-	if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
+	if ( unlikely( mapped ) ) {							// mmapped ?
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.munmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.munmap_storage_alloc, size, __ATOMIC_SEQ_CST );
+		stats.munmap_calls += 1;
+		stats.munmap_storage_request += rsize;
+		stats.munmap_storage_alloc += size;
 		#endif // __STATISTICS__
-		if ( munmap( header, size ) == -1 ) {
-			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
-				   "Possible cause is invalid pointer.",
-				   addr );
+
+		// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
+
+		// Does not matter where this storage is freed.
+		if ( unlikely( munmap( header, size ) == -1 ) ) {
+			// Do not call strerror( errno ) as it may call malloc.
+			abort( "**** Error **** attempt to deallocate large object %p and munmap failed with errno %d.\n"
+				   "Possible cause is invalid delete pointer: either not allocated or with corrupt header.",
+				   addr, errno );
 		} // if
 	} else {
 		#ifdef __CFA_DEBUG__
-		// Set free memory to garbage so subsequent usages might fail.
-		memset( ((Heap.Storage *)header)->data, '\xde', freeElem->blockSize - sizeof( Heap.Storage ) );
-		//Memset( ((Heap.Storage *)header)->data, freeElem->blockSize - sizeof( Heap.Storage ) );
+		// memset is NOT always inlined!
+		disable_interrupts();
+		// Scrub old memory so subsequent usages might fail. Only scrub the first/last SCRUB_SIZE bytes.
+		char * data = ((Heap.Storage *)header)->data;	// data address
+		size_t dsize = size - sizeof(Heap.Storage);		// data size
+		if ( dsize <= SCRUB_SIZE * 2 ) {
+			memset( data, SCRUB, dsize );				// scrub all
+		} else {
+			memset( data, SCRUB, SCRUB_SIZE );			// scrub front
+			memset( data + dsize - SCRUB_SIZE, SCRUB, SCRUB_SIZE ); // scrub back
+		} // if
+		enable_interrupts( false );
 		#endif // __CFA_DEBUG__
 
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.free_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.free_storage_alloc, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		#if BUCKETLOCK == SPINLOCK
-		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
-		header->kind.real.next = freeElem->freeList;	// push on stack
-		freeElem->freeList = (Heap.Storage *)header;
-		unlock( freeElem->lock );						// release spin lock
-		#else
-		push( freeElem->freeList, *(Heap.Storage *)header );
-		#endif // BUCKETLOCK
+		if ( likely( heapManager == freeHead->homeManager ) ) { // belongs to this thread
+			header->kind.real.next = freeHead->freeList; // push on stack
+			freeHead->freeList = (Heap.Storage *)header;
+		} else {										// return to thread owner
+			verify( heapManager );
+
+			#ifdef OWNERSHIP
+			#ifdef RETURNSPIN
+			lock( freeHead->returnLock );
+			header->kind.real.next = freeHead->returnList; // push to bucket return list
+			freeHead->returnList = (Heap.Storage *)header;
+			unlock( freeHead->returnLock );
+			#else										// lock free
+			header->kind.real.next = freeHead->returnList; // link new node to top node
+			// CAS resets header->kind.real.next = freeHead->returnList on failure
+			while ( ! __atomic_compare_exchange_n( &freeHead->returnList, &header->kind.real.next, header,
+												   false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) );
+			#endif // RETURNSPIN
+
+			#else										// no OWNERSHIP
+
+			freeHead = &heap->freeLists[ClearStickyBits( header->kind.real.home ) - &freeHead->homeManager->freeLists[0]];
+			header->kind.real.next = freeHead->freeList; // push on stack
+			freeHead->freeList = (Heap.Storage *)header;
+			#endif // ! OWNERSHIP
+
+			#ifdef __U_STATISTICS__
+			stats.return_pushes += 1;
+			stats.return_storage_request += rsize;
+			stats.return_storage_alloc += size;
+			#endif // __U_STATISTICS__
+
+			// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
+		} // if
 	} // if
 
 	#ifdef __CFA_DEBUG__
-	__atomic_add_fetch( &allocUnfreed, -size, __ATOMIC_SEQ_CST );
 	if ( traceHeap() ) {
 		char helpText[64];
@@ -724,8 +1169,10 @@
 	} // if
 	#endif // __CFA_DEBUG__
+
+//	poll_interrupts();									// call rollforward
 } // doFree
 
 
-static size_t prtFree( Heap & manager ) with( manager ) {
+size_t prtFree( Heap & manager ) with( manager ) {
 	size_t total = 0;
 	#ifdef __STATISTICS__
@@ -733,5 +1180,5 @@
 	__cfaabi_bits_print_nolock( STDERR_FILENO, "\nBin lists (bin size : free blocks on list)\n" );
 	#endif // __STATISTICS__
-	for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
+	for ( unsigned int i = 0; i < heapMaster.maxBucketsUsed; i += 1 ) {
 		size_t size = freeLists[i].blockSize;
 		#ifdef __STATISTICS__
@@ -764,84 +1211,30 @@
 	__cfaabi_bits_release();
 	#endif // __STATISTICS__
-	return (char *)heapEnd - (char *)heapBegin - total;
+	return (char *)heapMaster.heapEnd - (char *)heapMaster.heapBegin - total;
 } // prtFree
 
 
-static void ?{}( Heap & manager ) with( manager ) {
-	__page_size = sysconf( _SC_PAGESIZE );
-	__map_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
-
-	for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
-		freeLists[i].blockSize = bucketSizes[i];
-	} // for
-
-	#ifdef FASTLOOKUP
-	unsigned int idx = 0;
-	for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
-		if ( i > bucketSizes[idx] ) idx += 1;
-		lookup[i] = idx;
-	} // for
-	#endif // FASTLOOKUP
-
-	if ( ! setMmapStart( malloc_mmap_start() ) ) {
-		abort( "Heap : internal error, mmap start initialization failure." );
-	} // if
-	heapExpand = malloc_expansion();
-
-	char * end = (char *)sbrk( 0 );
-	heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, __page_size ) - end ); // move start of heap to multiple of alignment
-} // Heap
-
-
-static void ^?{}( Heap & ) {
-	#ifdef __STATISTICS__
-	if ( traceHeapTerm() ) {
-		printStats();
-		// prtUnfreed() called in heapAppStop()
-	} // if
-	#endif // __STATISTICS__
-} // ~Heap
-
-
-static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
-void memory_startup( void ) {
-	#ifdef __CFA_DEBUG__
-	if ( heapBoot ) {									// check for recursion during system boot
-		abort( "boot() : internal error, recursively invoked during system boot." );
-	} // if
-	heapBoot = true;
-	#endif // __CFA_DEBUG__
-
-	//verify( heapManager.heapBegin != 0 );
-	//heapManager{};
-	if ( heapManager.heapBegin == 0p ) heapManager{};	// sanity check
-} // memory_startup
-
-static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
-void memory_shutdown( void ) {
-	^heapManager{};
-} // memory_shutdown
-
-
-static inline void * mallocNoStats( size_t size ) {		// necessary for malloc statistics
-	verify( heapManager.heapBegin != 0p );				// called before memory_startup ?
-  if ( unlikely( size ) == 0 ) return 0p;				// 0 BYTE ALLOCATION RETURNS NULL POINTER
-
-#if __SIZEOF_POINTER__ == 8
-	verify( size < ((typeof(size_t))1 << 48) );
-#endif // __SIZEOF_POINTER__ == 8
-	return doMalloc( size );
-} // mallocNoStats
-
-
-static inline void * memalignNoStats( size_t alignment, size_t size ) {
-  if ( unlikely( size ) == 0 ) return 0p;				// 0 BYTE ALLOCATION RETURNS NULL POINTER
-
-	#ifdef __CFA_DEBUG__
+#ifdef __STATISTICS__
+static void incCalls( intptr_t statName ) libcfa_nopreempt {
+	heapManager->stats.counters[statName].calls += 1;
+} // incCalls
+
+static void incZeroCalls( intptr_t statName ) libcfa_nopreempt {
+	heapManager->stats.counters[statName].calls_0 += 1;
+} // incZeroCalls
+#endif // __STATISTICS__
+
+#ifdef __CFA_DEBUG__
+static void incUnfreed( intptr_t offset ) libcfa_nopreempt {
+	heapManager->allocUnfreed += offset;
+} // incUnfreed
+#endif // __CFA_DEBUG__
+
+
+static void * memalignNoStats( size_t alignment, size_t size STAT_PARM ) {
 	checkAlign( alignment );							// check alignment
-	#endif // __CFA_DEBUG__
-
-	// if alignment <= default alignment, do normal malloc as two headers are unnecessary
-  if ( unlikely( alignment <= libAlign() ) ) return mallocNoStats( size );
+
+	// if alignment <= default alignment or size == 0, do normal malloc as two headers are unnecessary
+  if ( unlikely( alignment <= libAlign() || size == 0 ) ) return doMalloc( size STAT_ARG( STAT_NAME ) );
 
 	// Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
@@ -854,5 +1247,6 @@
 	// subtract libAlign() because it is already the minimum alignment
 	// add sizeof(Storage) for fake header
-	char * addr = (char *)mallocNoStats( size + alignment - libAlign() + sizeof(Heap.Storage) );
+	size_t offset = alignment - libAlign() + sizeof(Heap.Storage);
+	char * addr = (char *)doMalloc( size + offset STAT_ARG( STAT_NAME ) );
 
 	// address in the block of the "next" alignment address
@@ -860,10 +1254,15 @@
 
 	// address of header from malloc
-	Heap.Storage.Header * RealHeader = HeaderAddr( addr );
-	RealHeader->kind.real.size = size;					// correct size to eliminate above alignment offset
-	// address of fake header * before* the alignment location
+	Heap.Storage.Header * realHeader = HeaderAddr( addr );
+	realHeader->kind.real.size = size;					// correct size to eliminate above alignment offset
+	#ifdef __CFA_DEBUG__
+	incUnfreed( -offset );								// adjustment off the offset from call to doMalloc
+	#endif // __CFA_DEBUG__
+
+	// address of fake header *before* the alignment location
 	Heap.Storage.Header * fakeHeader = HeaderAddr( user );
+
 	// SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
-	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)RealHeader;
+	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
 	// SKULLDUGGERY: odd alignment implies fake header
 	fakeHeader->kind.fake.alignment = MarkAlignmentBit( alignment );
@@ -880,14 +1279,5 @@
 	// then malloc() returns a unique pointer value that can later be successfully passed to free().
 	void * malloc( size_t size ) libcfa_public {
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &stats.malloc_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &stats.malloc_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &stats.malloc_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return mallocNoStats( size );
+		return doMalloc( size STAT_ARG( MALLOC ) );
 	} // malloc
 
@@ -895,15 +1285,5 @@
 	// Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
 	void * aalloc( size_t dim, size_t elemSize ) libcfa_public {
-		size_t size = dim * elemSize;
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &stats.aalloc_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &stats.aalloc_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &stats.aalloc_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return mallocNoStats( size );
+		return doMalloc( dim * elemSize STAT_ARG( AALLOC ) );
 	} // aalloc
 
@@ -912,19 +1292,10 @@
 	void * calloc( size_t dim, size_t elemSize ) libcfa_public {
 		size_t size = dim * elemSize;
-	  if ( unlikely( size ) == 0 ) {			// 0 BYTE ALLOCATION RETURNS NULL POINTER
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.calloc_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.calloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.calloc_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		char * addr = (char *)mallocNoStats( size );
+		char * addr = (char *)doMalloc( size STAT_ARG( CALLOC ) );
+
+	  if ( unlikely( addr == NULL ) ) return NULL;		// stop further processing if 0p is returned
 
 		Heap.Storage.Header * header;
-		Heap.FreeHeader * freeElem;
+		Heap.FreeHeader * freeHead;
 		size_t bsize, alignment;
 
@@ -932,9 +1303,9 @@
 		bool mapped =
 			#endif // __CFA_DEBUG__
-			headers( "calloc", addr, header, freeElem, bsize, alignment );
+			headers( "calloc", addr, header, freeHead, bsize, alignment );
 
 		#ifndef __CFA_DEBUG__
 		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		if ( ! mapped )
+		if ( likely( ! mapped ) )
 		#endif // __CFA_DEBUG__
 			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
@@ -952,27 +1323,14 @@
 	// call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
 	void * resize( void * oaddr, size_t size ) libcfa_public {
-		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-	  if ( unlikely( size == 0 ) ) {					// special cases
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.resize_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			free( oaddr );
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.resize_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-	  if ( unlikely( oaddr == 0p ) ) {
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return mallocNoStats( size );
-		} // if
+	  if ( unlikely( oaddr == 0p ) ) {				// => malloc( size )
+			return doMalloc( size STAT_ARG( RESIZE ) );
+		} // if
+
+		PROLOG( RESIZE, doFree( oaddr ) );				// => free( oaddr )
 
 		Heap.Storage.Header * header;
-		Heap.FreeHeader * freeElem;
+		Heap.FreeHeader * freeHead;
 		size_t bsize, oalign;
-		headers( "resize", oaddr, header, freeElem, bsize, oalign );
+		headers( "resize", oaddr, header, freeHead, bsize, oalign );
 
 		size_t odsize = DataStorage( bsize, oaddr, header ); // data storage available in bucket
@@ -980,15 +1338,18 @@
 		if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
 			ClearZeroFillBit( header );					// no alignment and turn off 0 fill
+			#ifdef __CFA_DEBUG__
+			incUnfreed( size - header->kind.real.size ); // adjustment off the size difference
+			#endif // __CFA_DEBUG__
 			header->kind.real.size = size;				// reset allocation size
+			#ifdef __STATISTICS__
+			incCalls( RESIZE );
+			#endif // __STATISTICS__
 			return oaddr;
 		} // if
 
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
 		// change size, DO NOT preserve STICKY PROPERTIES.
-		free( oaddr );
-		return mallocNoStats( size );					// create new area
+		doFree( oaddr );								// free previous storage
+
+		return doMalloc( size STAT_ARG( RESIZE ) );		// create new area
 	} // resize
 
@@ -997,27 +1358,14 @@
 	// the old and new sizes.
 	void * realloc( void * oaddr, size_t size ) libcfa_public {
-		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-	  if ( unlikely( size == 0 ) ) {					// special cases
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.realloc_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			free( oaddr );
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.realloc_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-	  if ( unlikely( oaddr == 0p ) ) {
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return mallocNoStats( size );
-		} // if
+	  if ( unlikely( oaddr == 0p ) ) {					// => malloc( size )
+		  return doMalloc( size STAT_ARG( REALLOC ) );
+		} // if
+
+		PROLOG( REALLOC, doFree( oaddr ) );				// => free( oaddr )
 
 		Heap.Storage.Header * header;
-		Heap.FreeHeader * freeElem;
+		Heap.FreeHeader * freeHead;
 		size_t bsize, oalign;
-		headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+		headers( "realloc", oaddr, header, freeHead, bsize, oalign );
 
 		size_t odsize = DataStorage( bsize, oaddr, header ); // data storage available in bucket
@@ -1025,27 +1373,30 @@
 		bool ozfill = ZeroFillBit( header );			// old allocation zero filled
 	  if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
-	  		header->kind.real.size = size;				// reset allocation size
+			#ifdef __CFA_DEBUG__
+			incUnfreed( size - header->kind.real.size ); // adjustment off the size difference
+			#endif // __CFA_DEBUG__
+			header->kind.real.size = size;				// reset allocation size
 	  		if ( unlikely( ozfill ) && size > osize ) {	// previous request zero fill and larger ?
 	  			memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage
 	  		} // if
+			#ifdef __STATISTICS__
+			incCalls( REALLOC );
+			#endif // __STATISTICS__
 			return oaddr;
 		} // if
 
-		#ifdef __STATISTICS__
-	  	__atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
 		// change size and copy old content to new storage
 
 		void * naddr;
-		if ( likely( oalign == libAlign() ) ) {			// previous request not aligned ?
-			naddr = mallocNoStats( size );				// create new area
+		if ( likely( oalign <= libAlign() ) ) {			// previous request not aligned ?
+			naddr = doMalloc( size STAT_ARG( REALLOC ) ); // create new area
 		} else {
-			naddr = memalignNoStats( oalign, size );	// create new aligned area
-		} // if
-
-		headers( "realloc", naddr, header, freeElem, bsize, oalign );
+			naddr = memalignNoStats( oalign, size STAT_ARG( REALLOC ) ); // create new aligned area
+		} // if
+
+		headers( "realloc", naddr, header, freeHead, bsize, oalign );
+		// To preserve prior fill, the entire bucket must be copied versus the size.
 		memcpy( naddr, oaddr, min( osize, size ) );		// copy bytes
-		free( oaddr );
+		doFree( oaddr );								// free previous storage
 
 		if ( unlikely( ozfill ) ) {						// previous request zero fill ?
@@ -1067,14 +1418,5 @@
 	// Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
 	void * memalign( size_t alignment, size_t size ) libcfa_public {
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &stats.memalign_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &stats.memalign_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &stats.memalign_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return memalignNoStats( alignment, size );
+		return memalignNoStats( alignment, size STAT_ARG( MEMALIGN ) );
 	} // memalign
 
@@ -1082,15 +1424,5 @@
 	// Same as aalloc() with memory alignment.
 	void * amemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public {
-		size_t size = dim * elemSize;
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &stats.cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &stats.cmemalign_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &stats.cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return memalignNoStats( alignment, size );
+		return memalignNoStats( alignment, dim * elemSize STAT_ARG( AMEMALIGN ) );
 	} // amemalign
 
@@ -1099,19 +1431,10 @@
 	void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public {
 		size_t size = dim * elemSize;
-	  if ( unlikely( size ) == 0 ) {					// 0 BYTE ALLOCATION RETURNS NULL POINTER
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.cmemalign_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		char * addr = (char *)memalignNoStats( alignment, size );
+		char * addr = (char *)memalignNoStats( alignment, size STAT_ARG( CMEMALIGN ) );
+
+	  if ( unlikely( addr == NULL ) ) return NULL;		// stop further processing if 0p is returned
 
 		Heap.Storage.Header * header;
-		Heap.FreeHeader * freeElem;
+		Heap.FreeHeader * freeHead;
 		size_t bsize;
 
@@ -1119,5 +1442,5 @@
 		bool mapped =
 			#endif // __CFA_DEBUG__
-			headers( "cmemalign", addr, header, freeElem, bsize, alignment );
+			headers( "cmemalign", addr, header, freeHead, bsize, alignment );
 
 		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
@@ -1169,20 +1492,19 @@
 	// 0p, no operation is performed.
 	void free( void * addr ) libcfa_public {
+//		verify( heapManager );
+
 	  if ( unlikely( addr == 0p ) ) {					// special case
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &stats.free_null_calls, 1, __ATOMIC_SEQ_CST );
+		  if ( heapManager )
+			incZeroCalls( FREE );
 			#endif // __STATISTICS__
-
-			// #ifdef __CFA_DEBUG__
-			// if ( traceHeap() ) {
-			// 	#define nullmsg "Free( 0x0 ) size:0\n"
-			// 	// Do not debug print free( 0p ), as it can cause recursive entry from sprintf.
-			// 	__cfaabi_dbg_write( nullmsg, sizeof(nullmsg) - 1 );
-			// } // if
-			// #endif // __CFA_DEBUG__
 			return;
-		} // exit
-
-		doFree( addr );
+		} // if
+
+		#ifdef __STATISTICS__
+		incCalls( FREE );
+		#endif // __STATISTICS__
+
+		doFree( addr );									// handles heapManager == nullptr
 	} // free
 
@@ -1227,8 +1549,8 @@
 	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has 0 size
 		Heap.Storage.Header * header;
-		Heap.FreeHeader * freeElem;
+		Heap.FreeHeader * freeHead;
 		size_t bsize, alignment;
 
-		headers( "malloc_usable_size", addr, header, freeElem, bsize, alignment );
+		headers( "malloc_usable_size", addr, header, freeHead, bsize, alignment );
 		return DataStorage( bsize, addr, header );		// data storage in bucket
 	} // malloc_usable_size
@@ -1238,7 +1560,13 @@
 	void malloc_stats( void ) libcfa_public {
 		#ifdef __STATISTICS__
-		printStats();
-		if ( prtFree() ) prtFree( heapManager );
+		HeapStatistics stats;
+		HeapStatisticsCtor( stats );
+		if ( printStats( collectStats( stats ) ) == -1 ) {
+		#else
+		#define MALLOC_STATS_MSG "malloc_stats statistics disabled.\n"
+		if ( write( STDERR_FILENO, MALLOC_STATS_MSG, sizeof( MALLOC_STATS_MSG ) - 1 /* size includes '\0' */ ) == -1 ) {
 		#endif // __STATISTICS__
+			abort( "**** Error **** write failed in malloc_stats" );
+		} // if
 	} // malloc_stats
 
@@ -1247,6 +1575,6 @@
 	int malloc_stats_fd( int fd __attribute__(( unused )) ) libcfa_public {
 		#ifdef __STATISTICS__
-		int temp = stats_fd;
-		stats_fd = fd;
+		int temp = heapMaster.stats_fd;
+		heapMaster.stats_fd = fd;
 		return temp;
 		#else
@@ -1262,5 +1590,7 @@
 	  if ( options != 0 ) { errno = EINVAL; return -1; }
 		#ifdef __STATISTICS__
-		return printStatsXML( stream );
+		HeapStatistics stats;
+		HeapStatisticsCtor( stats );
+		return printStatsXML( collectStats( stats ), stream ); // returns bytes written or -1
 		#else
 		return 0;										// unsupported
@@ -1275,5 +1605,5 @@
 		choose( option ) {
 		  case M_TOP_PAD:
-			heapExpand = ceiling2( value, __page_size );
+			heapMaster.heapExpand = ceiling2( value, __page_size );
 			return 1;
 		  case M_MMAP_THRESHOLD:
@@ -1319,25 +1649,9 @@
 // Must have CFA linkage to overload with C linkage realloc.
 void * resize( void * oaddr, size_t nalign, size_t size ) libcfa_public {
-	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( unlikely( size == 0 ) ) {						// special cases
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.resize_0_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		free( oaddr );
-		return 0p;
+  if ( unlikely( oaddr == 0p ) ) {						// => malloc( size )
+		return memalignNoStats( nalign, size STAT_ARG( RESIZE ) );
 	} // if
 
-	if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
-	#ifdef __CFA_DEBUG__
-	else checkAlign( nalign );							// check alignment
-	#endif // __CFA_DEBUG__
-
-  if ( unlikely( oaddr == 0p ) ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.resize_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		return memalignNoStats( nalign, size );
-	} // if
+	PROLOG( RESIZE, doFree( oaddr ) );					// => free( oaddr )
 
 	// Attempt to reuse existing alignment.
@@ -1347,4 +1661,5 @@
 
 	if ( unlikely( isFakeHeader ) ) {
+		checkAlign( nalign );							// check alignment
 		oalign = ClearAlignmentBit( header );			// old alignment
 		if ( unlikely( (uintptr_t)oaddr % nalign == 0	// lucky match ?
@@ -1353,7 +1668,7 @@
 			) ) {
 			HeaderAddr( oaddr )->kind.fake.alignment = MarkAlignmentBit( nalign ); // update alignment (could be the same)
-			Heap.FreeHeader * freeElem;
+			Heap.FreeHeader * freeHead;
 			size_t bsize, oalign;
-			headers( "resize", oaddr, header, freeElem, bsize, oalign );
+			headers( "resize", oaddr, header, freeHead, bsize, oalign );
 			size_t odsize = DataStorage( bsize, oaddr, header ); // data storage available in bucket
 
@@ -1361,5 +1676,11 @@
 				HeaderAddr( oaddr )->kind.fake.alignment = MarkAlignmentBit( nalign ); // update alignment (could be the same)
 				ClearZeroFillBit( header );				// turn off 0 fill
+				#ifdef __CFA_DEBUG__
+				incUnfreed( size - header->kind.real.size ); // adjustment off the size difference
+				#endif // __CFA_DEBUG__
 				header->kind.real.size = size;			// reset allocation size
+				#ifdef __STATISTICS__
+				incCalls( RESIZE );
+				#endif // __STATISTICS__
 				return oaddr;
 			} // if
@@ -1370,36 +1691,16 @@
 	} // if
 
-	#ifdef __STATISTICS__
-	__atomic_add_fetch( &stats.resize_storage_request, size, __ATOMIC_SEQ_CST );
-	#endif // __STATISTICS__
-
 	// change size, DO NOT preserve STICKY PROPERTIES.
-	free( oaddr );
-	return memalignNoStats( nalign, size );				// create new aligned area
+	doFree( oaddr );									// free previous storage
+	return memalignNoStats( nalign, size STAT_ARG( RESIZE ) ); // create new aligned area
 } // resize
 
 
 void * realloc( void * oaddr, size_t nalign, size_t size ) libcfa_public {
-	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( unlikely( size == 0 ) ) {						// special cases
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.realloc_0_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		free( oaddr );
-		return 0p;
+  if ( unlikely( oaddr == 0p ) ) {						// => malloc( size )
+		return memalignNoStats( nalign, size STAT_ARG( REALLOC ) );
 	} // if
 
-	if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
-	#ifdef __CFA_DEBUG__
-	else checkAlign( nalign );							// check alignment
-	#endif // __CFA_DEBUG__
-
-  if ( unlikely( oaddr == 0p ) ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &stats.realloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		return memalignNoStats( nalign, size );
-	} // if
+	PROLOG( REALLOC, doFree( oaddr ) );					// => free( oaddr )
 
 	// Attempt to reuse existing alignment.
@@ -1408,4 +1709,5 @@
 	size_t oalign;
 	if ( unlikely( isFakeHeader ) ) {
+		checkAlign( nalign );							// check alignment
 		oalign = ClearAlignmentBit( header );			// old alignment
 		if ( unlikely( (uintptr_t)oaddr % nalign == 0	// lucky match ?
@@ -1421,12 +1723,7 @@
 	} // if
 
-	#ifdef __STATISTICS__
-	__atomic_add_fetch( &stats.realloc_calls, 1, __ATOMIC_SEQ_CST );
-	__atomic_add_fetch( &stats.realloc_storage_request, size, __ATOMIC_SEQ_CST );
-	#endif // __STATISTICS__
-
-	Heap.FreeHeader * freeElem;
+	Heap.FreeHeader * freeHead;
 	size_t bsize;
-	headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+	headers( "realloc", oaddr, header, freeHead, bsize, oalign );
 
 	// change size and copy old content to new storage
@@ -1435,9 +1732,9 @@
 	bool ozfill = ZeroFillBit( header );				// old allocation zero filled
 
-	void * naddr = memalignNoStats( nalign, size );		// create new aligned area
-
-	headers( "realloc", naddr, header, freeElem, bsize, oalign );
+	void * naddr = memalignNoStats( nalign, size STAT_ARG( REALLOC ) ); // create new aligned area
+
+	headers( "realloc", naddr, header, freeHead, bsize, oalign );
 	memcpy( naddr, oaddr, min( osize, size ) );			// copy bytes
-	free( oaddr );
+	doFree( oaddr );									// free previous storage
 
 	if ( unlikely( ozfill ) ) {							// previous request zero fill ?
@@ -1451,4 +1748,9 @@
 
 
+void * reallocarray( void * oaddr, size_t nalign, size_t dim, size_t elemSize ) __THROW {
+	return realloc( oaddr, nalign, dim * elemSize );
+} // reallocarray
+
+
 // Local Variables: //
 // tab-width: 4 //
Index: libcfa/src/heap.hfa
===================================================================
--- libcfa/src/heap.hfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/heap.hfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
 // Created On       : Tue May 26 11:23:55 2020
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Apr 21 22:52:25 2022
-// Update Count     : 21
+// Last Modified On : Tue Oct  4 19:08:55 2022
+// Update Count     : 23
 // 
 
@@ -30,13 +30,4 @@
 bool checkFreeOff();
 
-// supported mallopt options
-#ifndef M_MMAP_THRESHOLD
-#define M_MMAP_THRESHOLD (-1)
-#endif // M_MMAP_THRESHOLD
-
-#ifndef M_TOP_PAD
-#define M_TOP_PAD (-2)
-#endif // M_TOP_PAD
-
 extern "C" {
 	// New allocation operations.
@@ -49,5 +40,4 @@
 	size_t malloc_size( void * addr );
 	int malloc_stats_fd( int fd );
-	size_t malloc_usable_size( void * addr );
 	size_t malloc_expansion();							// heap expansion size (bytes)
 	size_t malloc_mmap_start();							// crossover allocation size from sbrk to mmap
Index: libcfa/src/parseargs.cfa
===================================================================
--- libcfa/src/parseargs.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/parseargs.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -1,4 +1,21 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// parseargs.cfa
+// implementation of arguments parsing (argc, argv)
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Oct 12 15:28:01 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
 #include "parseargs.hfa"
 
+#include <assert.h>
 #include <ctype.h>
 #include <stdint.h>
@@ -146,23 +163,60 @@
 }
 
+static inline int next_newline(const char * str) {
+	int ret;
+	const char * ptr = strstr(str, "\n");
+	if(!ptr) return MAX;
+
+	/* paranoid */ verify( str <= ptr);
+	intptr_t low = (intptr_t)str;
+	intptr_t hi  = (intptr_t)ptr;
+	ret = hi - low;
+
+	return ret;
+}
+
 //-----------------------------------------------------------------------------
 // Print usage
 static void printopt(FILE * out, int width, int max, char sn, const char * ln, const char * help) {
+	// check how wide we should be printing
+	// this includes all options and the help message
 	int hwidth = max - (11 + width);
 	if(hwidth <= 0) hwidth = max;
 
-	char sname[4] = { ' ', ' ', ' ', '\0' };
-	if(sn != '\0') {
-		sname[0] = '-';
-		sname[1] = sn;
-		sname[2] = ',';
-	}
-
-	fprintf(out, "  %s --%-*s   %.*s\n", sname, width, ln, hwidth, help);
-	for() {
-		help += min(strlen(help), hwidth);
-		if('\0' == *help) break;
-		fprintf(out, "%*s%.*s\n", width + 11, "", hwidth, help);
-	}
+	// check which pieces we have
+	bool has_ln = ln && strcmp("", ln);
+	bool has_help = help && strcmp("", help);
+
+	// print the small name if present
+	if(sn != '\0') fprintf(out, "  -%c", sn);
+	else fprintf(out, "    ");
+
+	// print a comma if we have both short and long names
+	if(sn != '\0' && has_ln) fprintf(out, ", ");
+	else fprintf(out, "  ");
+
+	// print the long name if present
+	if(has_ln)        fprintf(out, "--%-*s", width, ln);
+	else if(has_help) fprintf(out, "  %-*s", width, "");
+
+	if(has_help) {
+		// print the help
+		// We need to wrap at the max width, and also indent newlines so everything is nice and pretty
+
+		// for each line to print
+		for() {
+			//find out if there is a newline
+			int nextnl = next_newline(help);
+			int real = min(min(strlen(help), hwidth), nextnl);
+
+			fprintf(out, "   %.*s", real, help);
+			// printf("%d %d\n", real, nextnl);
+			help += real;
+			if( nextnl == real ) help++;
+			if('\0' == *help) break;
+			fprintf(out, "\n%*s", width + 8, "");
+		}
+	}
+	fprintf(out, "\n");
 }
 
Index: libcfa/src/parseargs.hfa
===================================================================
--- libcfa/src/parseargs.hfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/parseargs.hfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -1,2 +1,17 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// parseargs.cfa -- PUBLIC
+// API for arguments parsing (argc, argv)
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Oct 12 15:28:01 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
 #pragma once
 
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/startup.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jul 24 16:21:57 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Jan 17 16:41:54 2022
-// Update Count     : 55
+// Last Modified On : Thu Oct  6 13:51:57 2022
+// Update Count     : 57
 //
 
@@ -24,19 +24,27 @@
 
 extern "C" {
+	void __cfaabi_memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
+	void __cfaabi_memory_startup( void ) {
+		extern void memory_startup();
+		memory_startup();
+	} // __cfaabi_memory_startup
+
+	void __cfaabi_memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
+	void __cfaabi_memory_shutdown( void ) {
+		extern void memory_shutdown();
+		memory_shutdown();
+	} // __cfaabi_memory_shutdown
+
 	void __cfaabi_appready_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_APPREADY ) ));
 	void __cfaabi_appready_startup( void ) {
 		tzset();										// initialize time global variables
-		#ifdef __CFA_DEBUG__
 		extern void heapAppStart();
 		heapAppStart();
-		#endif // __CFA_DEBUG__
 	} // __cfaabi_appready_startup
 
 	void __cfaabi_appready_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_APPREADY ) ));
 	void __cfaabi_appready_shutdown( void ) {
-		#ifdef __CFA_DEBUG__
 		extern void heapAppStop();
 		heapAppStop();
-		#endif // __CFA_DEBUG__
 	} // __cfaabi_appready_shutdown
 
Index: libcfa/src/stdhdr/assert.h
===================================================================
--- libcfa/src/stdhdr/assert.h	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ libcfa/src/stdhdr/assert.h	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
 // Created On       : Mon Jul  4 23:25:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Feb  4 12:58:49 2020
-// Update Count     : 15
+// Last Modified On : Sun Oct  9 21:28:22 2022
+// Update Count     : 16
 //
 
@@ -31,5 +31,5 @@
 #endif
 
-#if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
+#if ! defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
 	#define __CFA_WITH_VERIFY__
 	#define verify(x) assert(x)
Index: src/AST/Pass.hpp
===================================================================
--- src/AST/Pass.hpp	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ src/AST/Pass.hpp	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -167,5 +167,5 @@
 	const ast::Expr *             visit( const ast::UntypedExpr          * ) override final;
 	const ast::Expr *             visit( const ast::NameExpr             * ) override final;
-	const ast::Expr *			  visit( const ast::QualifiedNameExpr	 * ) override final;
+	const ast::Expr *             visit( const ast::QualifiedNameExpr	 * ) override final;
 	const ast::Expr *             visit( const ast::AddressExpr          * ) override final;
 	const ast::Expr *             visit( const ast::LabelAddressExpr     * ) override final;
Index: src/GenPoly/ScrubTyVars.h
===================================================================
--- src/GenPoly/ScrubTyVars.h	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ src/GenPoly/ScrubTyVars.h	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -116,5 +116,5 @@
 		node_t const * target, const TypeVarMap & typeVars ) {
 	return strict_dynamic_cast<node_t const *>(
-			scrubTypeVars<ast::Node>( target ) );
+			scrubTypeVars<ast::Node>( target, typeVars ) );
 }
 
Index: src/Parser/ParseNode.h
===================================================================
--- src/Parser/ParseNode.h	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ src/Parser/ParseNode.h	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
 // Created On       : Sat May 16 13:28:16 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Feb  2 09:15:49 2022
-// Update Count     : 905
+// Last Modified On : Tue Oct 18 14:15:37 2022
+// Update Count     : 936
 //
 
@@ -465,5 +465,9 @@
 			errors.append( e );
 		} // try
-		cur = dynamic_cast< NodeType * >( cur->get_next() );
+		const ParseNode * temp = (cur->get_next());
+		cur = dynamic_cast< const NodeType * >( temp );	// should not return nullptr
+		if ( ! cur && temp ) {							// non-homogeneous nodes ?
+			SemanticError( cur->location, "internal error, non-homogeneous nodes founds in buildList processing." );
+		} // if
 	} // while
 	if ( ! errors.isEmpty() ) {
Index: src/Parser/lex.ll
===================================================================
--- src/Parser/lex.ll	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ src/Parser/lex.ll	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
  * Created On       : Sat Sep 22 08:58:10 2001
  * Last Modified By : Peter A. Buhr
- * Last Modified On : Tue Sep 20 21:18:55 2022
- * Update Count     : 762
+ * Last Modified On : Thu Oct 13 20:46:04 2022
+ * Update Count     : 764
  */
 
@@ -331,5 +331,6 @@
 __uint128_t		{ KEYWORD_RETURN(UINT128); }			// GCC
 unsigned		{ KEYWORD_RETURN(UNSIGNED); }
-__builtin_va_list { KEYWORD_RETURN(VALIST); }			// GCC
+__builtin_va_arg { KEYWORD_RETURN(VA_ARG); }			// GCC
+__builtin_va_list { KEYWORD_RETURN(VA_LIST); }			// GCC
 virtual			{ KEYWORD_RETURN(VIRTUAL); }			// CFA
 void			{ KEYWORD_RETURN(VOID); }
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ src/Parser/parser.yy	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Oct  8 08:21:18 2022
-// Update Count     : 5709
+// Last Modified On : Fri Oct 14 14:04:43 2022
+// Update Count     : 5751
 //
 
@@ -305,5 +305,5 @@
 %token TYPEDEF
 %token EXTERN STATIC AUTO REGISTER
-%token THREADLOCALGCC THREADLOCALC11						// GCC, C11
+%token THREADLOCALGCC THREADLOCALC11					// GCC, C11
 %token INLINE FORTRAN									// C99, extension ISO/IEC 9899:1999 Section J.5.9(1)
 %token NORETURN											// C11
@@ -318,5 +318,5 @@
 %token DECIMAL32 DECIMAL64 DECIMAL128					// GCC
 %token ZERO_T ONE_T										// CFA
-%token SIZEOF TYPEOF VALIST AUTO_TYPE					// GCC
+%token SIZEOF TYPEOF VA_LIST VA_ARG AUTO_TYPE			// GCC
 %token OFFSETOF BASETYPEOF TYPEID						// CFA
 %token ENUM STRUCT UNION
@@ -409,5 +409,5 @@
 // declarations
 %type<decl> abstract_declarator abstract_ptr abstract_array abstract_function array_dimension multi_array_dimension
-%type<decl> abstract_parameter_declarator abstract_parameter_ptr abstract_parameter_array abstract_parameter_function array_parameter_dimension array_parameter_1st_dimension
+%type<decl> abstract_parameter_declarator_opt abstract_parameter_declarator abstract_parameter_ptr abstract_parameter_array abstract_parameter_function array_parameter_dimension array_parameter_1st_dimension
 %type<decl> abstract_parameter_declaration
 
@@ -698,7 +698,7 @@
 	primary_expression
 	| postfix_expression '[' assignment_expression ',' tuple_expression_list ']'
-			// Historic, transitional: Disallow commas in subscripts.
-			// Switching to this behaviour may help check if a C compatibilty case uses comma-exprs in subscripts.
-			// Current: Commas in subscripts make tuples.
+		// Historic, transitional: Disallow commas in subscripts.
+		// Switching to this behaviour may help check if a C compatibilty case uses comma-exprs in subscripts.
+		// Current: Commas in subscripts make tuples.
 		{ $$ = new ExpressionNode( build_binary_val( OperKinds::Index, $1, new ExpressionNode( build_tuple( (ExpressionNode *)($3->set_last( $5 ) ) )) ) ); }
 	| postfix_expression '[' assignment_expression ']'
@@ -720,4 +720,8 @@
 	| postfix_expression '(' argument_expression_list_opt ')'
 		{ $$ = new ExpressionNode( build_func( $1, $3 ) ); }
+	| VA_ARG '(' primary_expression ',' declaration_specifier_nobody abstract_parameter_declarator_opt ')'
+		// { SemanticError( yylloc, "va_arg is currently unimplemented." ); $$ = nullptr; }
+		{ $$ = new ExpressionNode( build_func( new ExpressionNode( build_varref( new string( "__builtin_va_arg") ) ),
+											   (ExpressionNode *)($3->set_last( (ExpressionNode *)($6 ? $6->addType( $5 ) : $5) )) ) ); }
 	| postfix_expression '`' identifier					// CFA, postfix call
 		{ $$ = new ExpressionNode( build_func( new ExpressionNode( build_varref( build_postfix_name( $3 ) ) ), $1 ) ); }
@@ -2156,5 +2160,5 @@
 	| LONG
 		{ $$ = DeclarationNode::newLength( DeclarationNode::Long ); }
-	| VALIST											// GCC, __builtin_va_list
+	| VA_LIST											// GCC, __builtin_va_list
 		{ $$ = DeclarationNode::newBuiltinType( DeclarationNode::Valist ); }
 	| AUTO_TYPE
@@ -3676,4 +3680,10 @@
 // functions.
 
+abstract_parameter_declarator_opt:
+	// empty
+		{ $$ = nullptr; }
+	| abstract_parameter_declarator
+	;
+
 abstract_parameter_declarator:
 	abstract_parameter_ptr
Index: tests/.expect/alloc.txt
===================================================================
--- tests/.expect/alloc.txt	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/.expect/alloc.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -35,4 +35,10 @@
 CFA realloc array alloc, fill
 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 
+CFA realloc array alloc, 5
+0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 
+CFA realloc array alloc, 5
+0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 
+CFA realloc array alloc, 5
+0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 0x5 
 
 C   memalign 42 42.5
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/Makefile.am	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -76,15 +76,16 @@
 	pybin/tools.py \
 	long_tests.hfa \
+	avltree/avl-private.h \
+	avltree/avl.h \
+	concurrent/clib_tls.c \
+	concurrent/clib.c \
 	configs/.in/parseconfig-all.txt \
 	configs/.in/parseconfig-errors.txt \
 	configs/.in/parseconfig-missing.txt \
+	exceptions/except-io.hfa \
+	exceptions/with-threads.hfa \
 	io/.in/io.data \
 	io/.in/many_read.data \
-	avltree/avl.h \
-	avltree/avl-private.h \
-	concurrent/clib.c \
-	concurrent/clib_tls.c \
-	exceptions/with-threads.hfa \
-	exceptions/except-io.hfa \
+	meta/fork+exec.hfa \
 	unified_locking/mutex_test.hfa
 
Index: tests/alloc.cfa
===================================================================
--- tests/alloc.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/alloc.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -10,6 +10,6 @@
 // Created On       : Wed Feb  3 07:56:22 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Apr 18 17:13:52 2022
-// Update Count     : 433
+// Last Modified On : Fri Oct 14 09:31:39 2022
+// Update Count     : 491
 //
 
@@ -34,142 +34,142 @@
 	ip = (int *)malloc( sizeof(*ip) );					// C malloc, type unsafe
 	*ip = 0xdeadbeef;
-	printf( "C   malloc %#x\n", *ip );
+	sout | "C   malloc" | hex(*ip);
 	free( ip );
 
 	ip = malloc();										// CFA malloc, type safe
 	*ip = 0xdeadbeef;
-	printf( "CFA malloc %#x\n", *ip );
+	sout | "CFA malloc" | hex(*ip);
 	free( ip );
 
 	ip = alloc();										// CFA alloc, type safe
 	*ip = 0xdeadbeef;
-	printf( "CFA alloc %#x\n", *ip );
+	sout | "CFA alloc" | hex(*ip);
 	free( ip );
 
 	ip = alloc( fill`fill );							// CFA alloc, fill
-	printf( "CFA alloc, fill %08x\n", *ip );
+	sout | "CFA alloc, fill" | wd(8, nobase(hex(*ip)));
 	free( ip );
 
 	ip = alloc( 3`fill );								// CFA alloc, fill
-	printf( "CFA alloc, fill %d\n", *ip );
+	sout | "CFA alloc, fill" | *ip;
 	free( ip );
 
 
 	// allocation, array types
-	printf( "\n" );
+	sout | nl;
 
 	ip = (int *)calloc( dim, sizeof( *ip ) );			// C array calloc, type unsafe
-	printf( "C   array calloc, fill 0\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "C   array calloc, fill 0";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	free( ip );
 
 	ip = calloc( dim );									// CFA array calloc, type safe
-	printf( "CFA array calloc, fill 0\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA array calloc, fill 0";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	free( ip );
 
 	ip = alloc( dim );									// CFA array alloc, type safe
 	for ( i; dim ) { ip[i] = 0xdeadbeef; }
-	printf( "CFA array alloc, no fill\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA array alloc, no fill";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	free( ip );
 
 	ip = alloc( 2 * dim, fill`fill );					// CFA array alloc, fill
-	printf( "CFA array alloc, fill %#hhx\n", fill );
-	for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA array alloc, fill" | hex(fill);
+	for ( i; 2 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	free( ip );
 
 	ip = alloc( 2 * dim, ((int)0xdeadbeef)`fill );		// CFA array alloc, fill
-	printf( "CFA array alloc, fill %#hhx\n", 0xdeadbeef );
-	for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA array alloc, fill" | hex((char)0xdeadbeef);
+	for ( i; 2 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip1 = alloc( 2 * dim, [ip, 2 * dim]`fill );			// CFA array alloc, fill
-	printf( "CFA array alloc, fill from array\n" );
-	for ( i; 2 * dim ) { printf( "%#x %#x, ", ip[i], ip1[i] ); }
+	sout | "CFA array alloc, fill from array";
+	for ( i; 2 * dim ) { sout | hex(ip[i]) | hex(ip1[i]) | ", " | nonl; }
 	free( ip1 );
-	printf( "\n" );
+	sout | nl;
 
 
 	// realloc, non-array types
-	printf( "\n" );
+	sout | nl;
 
 	ip = (int *)realloc( ip, dim * sizeof(*ip) );		// C realloc
-	printf( "C realloc\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "C realloc";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = realloc( ip, 2 * dim * sizeof(*ip) );			// CFA realloc
 	for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }
-	printf( "CFA realloc\n" );
-	for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc";
+	for ( i; 2 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 
 	// realloc, array types
-	printf( "\n" );
+	sout | nl;
 
 	ip = alloc( dim, ip`realloc );						// CFA realloc array alloc
 	for ( i; dim ) { ip[i] = 0xdeadbeef; }
-	printf( "CFA realloc array alloc\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( 2 * dim, ip`realloc );					// CFA realloc array alloc
 	for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }		// fill upper part
-	printf( "CFA realloc array alloc\n" );
-	for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc";
+	for ( i; 2 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( dim, ip`realloc );						// CFA realloc array alloc
-	printf( "CFA realloc array alloc\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( 3 * dim, ip`realloc, fill`fill );		// CFA realloc array alloc, fill
-	printf( "CFA realloc array alloc, fill\n" );
-	for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc, fill";
+	for ( i; 3 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( dim, ip`realloc, fill`fill );			// CFA realloc array alloc, fill
-	printf( "CFA realloc array alloc, fill\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc, fill";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( 3 * dim, ip`realloc, fill`fill );		// CFA realloc array alloc, fill
-	printf( "CFA realloc array alloc, fill\n" );
-	for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
-	// do not free
-#if 0 // FIX ME
+	sout | "CFA realloc array alloc, fill";
+	for ( i; 3 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
+	// do not free
+
 	ip = alloc( 5 * dim, ip`realloc, 5`fill );			// CFA realloc array alloc, 5
-	printf( "CFA realloc array alloc, 5\n" );
-	for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc, 5";
+	for ( i; 5 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( dim, ip`realloc, 5`fill );				// CFA realloc array alloc, 5
-	printf( "CFA realloc array alloc, 5\n" );
-	for ( i; dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc, 5";
+	for ( i; dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
 	// do not free
 
 	ip = alloc( 5 * dim, ip`realloc, 5`fill );			// CFA realloc array alloc, 5
-	printf( "CFA realloc array alloc, 5\n" );
-	for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
-	printf( "\n" );
-#endif // 0
+	sout | "CFA realloc array alloc, 5";
+	for ( i; 5 * dim ) { sout | hex(ip[i]) | ' ' | nonl; }
+	sout | nl;
+
 	free( ip );
 
@@ -180,30 +180,30 @@
 	};
 
-    ip = alloc();
+	ip = alloc();
 	*ip = 5;
-    double * dp = alloc( ip`resize );
+	double * dp = alloc( ip`resize );
 	*dp = 5.5;
-    S * sp = alloc( dp`resize );
+	S * sp = alloc( dp`resize );
 	*sp = (S){ {0, 1, 2, 3, 4} };
-    ip = alloc( sp`resize );
+	ip = alloc( sp`resize );
 	*ip = 3;
-    free( ip );
+	free( ip );
 
 
 	// resize, array types
 
-    ip = alloc( 5 );
+	ip = alloc( 5 );
 	for ( i; 5 ) { ip[i] = 5; }
-    dp = alloc( 5, ip`resize );
+	dp = alloc( 5, ip`resize );
 	for ( i; 5 ) { dp[i] = 5.5; }
-    sp = alloc( 5, dp`resize );
+	sp = alloc( 5, dp`resize );
 	for ( i; 5 ) { sp[i] = (S){ {0, 1, 2, 3, 4} }; }
-    ip = alloc( 3, sp`resize );
+	ip = alloc( 3, sp`resize );
 	for ( i; 3 ) { ip[i] = 3; }
-    ip = alloc( 7, ip`realloc );
+	ip = alloc( 7, ip`realloc );
 	for ( i; 7 ) { ip[i] = 7; }
-    ip = alloc( 7, ip`resize );
+	ip = alloc( 7, ip`resize );
 	for ( i; 7 ) { ip[i] = 7; }
-    free( ip );
+	free( ip );
 
 
@@ -217,19 +217,19 @@
 		const_count++;
 	}
-	void ^?{}( Struct & a ) {  dest_count++; }			// destruct
+	void ^?{}( Struct & a ) { dest_count++; }			// destruct
 	Struct st, st1, sta[dim], sta1[dim], * stp, * stp1;
 
 	// alignment, non-array types
-	printf( "\n" );
+	sout | nl;
 	enum { Alignment = 128 };
 
 	stp = &(*(Struct*)memalign( Alignment, sizeof( *stp ) ) ){ 42, 42.5 }; // C memalign
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "C   memalign %d %g\n", stp->x, stp->y );
+	sout | "C   memalign " | stp->x | stp->y;
 	free( stp );
 
 	stp = &(*memalign( Alignment )){ 42, 42.5 };		// CFA memalign
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA memalign %d %g\n", stp->x, stp->y );
+	sout | "CFA memalign" | stp->x | stp->y;
 	free( stp );
 
@@ -237,5 +237,5 @@
 	*stp = (Struct){ 42, 42.5 };
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA posix_memalign %d %g\n", stp->x, stp->y );
+	sout | "CFA posix_memalign" | stp->x | stp->y;
 	free( stp );
 
@@ -243,63 +243,63 @@
 	*stp = (Struct){ 42, 42.5 };
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA posix_memalign %d %g\n", stp->x, stp->y );
+	sout | "CFA posix_memalign" | stp->x | stp->y;
 	free( stp );
 
 	stp = &(*alloc( Alignment`align)){ 42, 42.5 };		// CFA alloc_align
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA alloc_align %d %g\n", stp->x, stp->y );
+	sout | "CFA alloc_align" | stp->x | stp->y;
 	free( stp );
 
 	stp = &(*alloc( Alignment`align )){ 42, 42.5 };		// CFA alloc_align
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA alloc_align %d %g\n", stp->x, stp->y );
+	sout | "CFA alloc_align" | stp->x | stp->y;
 	free( stp );
 
 	stp = alloc( Alignment`align, fill`fill );			// CFA memalign, fill
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA alloc_align fill %#x %a\n", stp->x, stp->y );
+	sout | "CFA alloc_align fill" | hex(stp->x) | hex(stp->y);
 	free( stp );
 
 	stp = alloc( Alignment`align, (Struct){ 42, 42.5 }`fill ); // CFA memalign, fill
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA alloc_align fill %d %g\n", stp->x, stp->y );
+	sout | "CFA alloc_align fill" | stp->x | stp->y;
 	// do not free
 
 	stp = &(*alloc( stp`realloc, 4096`align )){ 42, 42.5 };	// CFA realign
 	assert( (uintptr_t)stp % 4096 == 0 );
-	printf( "CFA alloc_align %d %g\n", stp->x, stp->y );
+	sout | "CFA alloc_align" | stp->x | stp->y;
 	free( stp );
 
 
 	// alignment, array types
-	printf( "\n" );
+	sout | nl;
 
 	stp = alloc( dim, Alignment`align );                // CFA array memalign
 	assert( (uintptr_t)stp % Alignment == 0 );
 	for ( i; dim ) { stp[i] = (Struct){ 42, 42.5 }; }
-	printf( "CFA array alloc_align\n" );
-	for ( i; dim ) { printf( "%d %g, ", stp[i].x, stp[i].y ); }
-	printf( "\n" );
+	sout | "CFA array alloc_align";
+	for ( i; dim ) { sout | stp[i].x | stp[i].y | ", " | nonl; }
+	sout | nl;
 	free( stp );
 
 	stp = alloc( dim, Alignment`align, fill`fill );		// CFA array memalign, fill
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA array alloc_align, fill\n" );
-	for ( i; dim ) { printf( "%#x %a, ", stp[i].x, stp[i].y ); }
-	printf( "\n" );
+	sout | "CFA array alloc_align, fill";
+	for ( i; dim ) { sout | hex(stp[i].x) | hex(stp[i].y) | ", " | nonl; }
+	sout | nl;
 	free( stp );
 
 	stp = alloc( dim, Alignment`align, ((Struct){ 42, 42.5 })`fill ); // CFA array memalign, fill
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA array alloc_align, fill\n" );
-	for ( i; dim ) { printf( "%d %g, ", stp[i].x, stp[i].y ); }
-	printf( "\n" );
+	sout | "CFA array alloc_align, fill";
+	for ( i; dim ) { sout | stp[i].x | stp[i].y | ", " | nonl; }
+	sout | nl;
 	// do not free
 
 	stp1 = alloc( dim, Alignment`align, [stp, dim]`fill );	// CFA array memalign, fill
 	assert( (uintptr_t)stp % Alignment == 0 );
-	printf( "CFA array alloc_align, fill array\n" );
-	for ( i; dim ) { printf( "%d %g, ", stp1[i].x, stp1[i].y ); }
-	printf( "\n" );
+	sout | "CFA array alloc_align, fill array";
+	for ( i; dim ) { sout | stp1[i].x | stp1[i].y | ", " | nonl; }
+	sout | nl;
 	free( stp1 );
 
@@ -307,34 +307,34 @@
 	assert( (uintptr_t)stp % 4096 == 0 );
 	for ( i; dim ) { stp[i] = (Struct){ 42, 42.5 }; }
-	printf( "CFA realloc array alloc_align\n" );
-	for ( i; dim ) { printf( "%d %g, ", stp[i].x, stp[i].y ); }
-	printf( "\n" );
+	sout | "CFA realloc array alloc_align";
+	for ( i; dim ) { sout | stp[i].x | stp[i].y | ", " | nonl; }
+	sout | nl;
 	free( stp );
 
 
 	// data, non-array types
-	printf( "\n" );
+	sout | nl;
 
 	memset( &st, fill );                                // CFA memset, type safe
-	printf( "CFA memset %#x %a\n", st.x, st.y );
+	sout | "CFA memset" | hex(st.x) | hex(st.y);
 	memcpy( &st1, &st );                                // CFA memcpy, type safe
-	printf( "CFA memcpy %#x %a\n", st1.x, st1.y );
+	sout | "CFA memcpy" | hex(st1.x) | hex(st1.y);
 
 
 	// data, array types
-	printf( "\n" );
+	sout | nl;
 
 	amemset( sta, fill, dim );							// CFA array memset, type safe
-	printf( "CFA array memset\n" );
-	for ( i; dim ) { printf( "%#x %a, ", sta[i].x, sta[i].y ); }
-	printf( "\n" );
+	sout | "CFA array memset";
+	for ( i; dim ) { sout | hex(sta[i].x) | hex(sta[i].y) | ", " | nonl; }
+	sout | nl;
 
 	amemcpy( sta1, sta, dim );							// CFA array memcpy, type safe
-	printf( "CFA array memcpy\n" );
-	for ( i; dim ) { printf( "%#x %a, ", sta1[i].x, sta1[i].y ); }
-	printf( "\n" );
+	sout | "CFA array memcpy";
+	for ( i; dim ) { sout | hex(sta1[i].x) | hex(sta1[i].y) | ", " | nonl; }
+	sout | nl;
 
 	// new, non-array types
-	printf( "\n" );
+	sout | nl;
 
 	const_count = dest_count = 0;
@@ -344,5 +344,5 @@
 	assert( const_count == 2 && dest_count == 0 );		// assertion for testing
 
-	printf( "CFA new initialize\n%d %g %d %g\n", stp->x, stp->y, stp1->x, stp1->y );
+	sout | "CFA new initialize" | nl | stp->x | stp->y | stp1->x | stp1->y;
 	delete( stp, stp1 );
 	assert( const_count == 2 && dest_count == 2 );		// assertion for testing
@@ -351,25 +351,25 @@
 	stp = anew( dim, 42, 42.5 );
 	assert( const_count == 2 + dim && dest_count == 2 ); // assertion for testing
-	printf( "CFA array new initialize\n" );
-	for ( i; dim ) { printf( "%d %g, ", stp[i].x, stp[i].y ); }
-	printf( "\n" );
+	sout | "CFA array new initialize";
+	for ( i; dim ) { sout | stp[i].x | stp[i].y | ", " | nonl; }
+	sout | nl;
 
 	stp1 = anew( dim, 42, 42.5 );
 	assert( const_count == 2 + 2 * dim && dest_count == 2 ); // assertion for testing
-	for ( i; dim ) { printf( "%d %g, ", stp1[i].x, stp1[i].y ); }
-	printf( "\n" );
+	for ( i; dim ) { sout | stp1[i].x | stp1[i].y | ", " | nonl; }
+	sout | nl;
 	adelete( stp, stp1 );
 	assert( const_count == 2 + 2 * dim && dest_count == 2 + 2 * dim); // assertion for testing
 
 	// extras
-	printf( "\n" );
+	sout | nl;
 
 	float * fp = malloc() + 1;
-	printf( "pointer arithmetic %d\n", fp == fp - 1 );
+	sout | "pointer arithmetic" | fp == fp - 1;
 	free( fp - 1 );
 
 	ip = foo( bar( baz( malloc(), 0 ), 0 ), 0 );
 	*ip = 0xdeadbeef;
-	printf( "CFA deep malloc %#x\n", *ip );
+	sout | "CFA deep malloc" | hex(*ip);
 
 	dp = alloc(5.0`fill);								// just for testing multiple free
@@ -379,5 +379,5 @@
 #ifdef ERR1
 	stp = malloc();
-	printf( "\nSHOULD FAIL\n" );
+	sout | "\nSHOULD FAIL";
 	ip = realloc( stp, dim * sizeof( *stp ) );
 	ip = memset( stp, 10 );
Index: tests/alloc2.cfa
===================================================================
--- tests/alloc2.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/alloc2.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -1,2 +1,3 @@
+#include <fstream.hfa>									// sout
 #include <malloc.h>										// malloc_usable_size
 #include <stdint.h>										// uintptr_t
@@ -4,5 +5,4 @@
 #include <string.h>										// memcmp
 
-int last_failed;
 int tests_total;
 int tests_failed;
@@ -13,98 +13,98 @@
 void test_base( void * ip, size_t size, size_t align ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total);
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = (malloc_size( ip ) == size) && (malloc_usable_size( ip ) >= size) && (malloc_alignment( ip ) == align) && ((uintptr_t)ip % align  == 0);
 	if ( ! passed ) {
-		printf( "failed test %3d: %4zu %4zu but got %4zu ( %3zu ) %4zu\n", tests_total, size, align, malloc_size( ip ), malloc_usable_size( ip ), malloc_alignment( ip ) );
+		sout | "base failed test" | tests_total | "ip" | ip | "size" | size | "align" | align | "but got size" | malloc_size( ip ) | "usable" | malloc_usable_size( ip ) | "align" | malloc_alignment( ip );
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total);
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_fill( void * ip_, size_t start, size_t end, char fill ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = true;
 	char * ip = (char *) ip_;
 	for ( i; start ~ end ) passed = passed && (ip[i] == fill);
 	if ( ! passed ) {
-		printf( "failed test %3d: fill C\n", tests_total );
+		sout | "fill1 failed test" | tests_total | "fill C";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_fill( void * ip_, size_t start, size_t end, int fill ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" tests_total;
 	bool passed = true;
 	int * ip = (int *)ip_;
-	for (i; start ~ end ) passed = passed && (ip[i] == fill);
+	for ( i; start ~ end ) passed = passed && (ip[i] == fill);
 	if ( ! passed ) {
-		printf( "failed test %3d: fill int\n", tests_total );
+		sout | "fill2 failed test" | tests_total | "fill int";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_fill( void * ip_, size_t start, size_t end, int * fill ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = memcmp((void*)((uintptr_t )ip_ + start ), (void*)fill, end ) == 0;
 	if ( ! passed ) {
-		printf( "failed test %3d: fill int A\n", tests_total );
+		sout | "fill3 failed test" | tests_total | "fill int A";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_fill( void * ip_, size_t start, size_t end, T1 fill ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = true;
 	T1 * ip = (T1 *) ip_;
 	for ( i; start ~ end ) passed = passed && (ip[i].data == fill.data );
 	if ( ! passed ) {
-		printf( "failed test %3d: fill T1\n", tests_total );
+		sout | "fill4 failed test" | tests_total | "fill T1";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_fill( void * ip_, size_t start, size_t end, T1 * fill ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = memcmp( (void*)((uintptr_t )ip_ + start ), (void*)fill, end ) == 0;
 	if ( ! passed ) {
-		printf( "failed test %3d: fill T1 A\n", tests_total );
+		sout | "fill5 failed test" | tests_total | "fill T1 A";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_use( int * ip, size_t dim ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = true;
 	for ( i; 0 ~ dim ) ip[i] = 0xdeadbeef;
 	for ( i; 0 ~ dim ) passed = passed &&  (ip[i] == 0xdeadbeef);
 	if ( ! passed ) {
-		printf( "failed test %3d: use int\n", tests_total );
+		sout | "use1 failed test" | tests_total | "use int";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
 void test_use( T1 * ip, size_t dim ) {
 	tests_total += 1;
-//	printf( "DEBUG: starting test %d\n", tests_total );
+	// sout | "DEBUG: starting test" | tests_total;
 	bool passed = true;
 	for ( i; 0 ~ dim ) ip[i].data = 0xdeadbeef;
 	for ( i; 0 ~ dim ) passed = passed &&  (ip[i].data == 0xdeadbeef);
 	if ( ! passed ) {
-		printf( "failed test %3d: use T1\n", tests_total );
+		sout | "use2 failed test" | tests_total | "use T1";
 		tests_failed += 1;
 	} // if
-//	printf( "DEBUG: done test %d\n", tests_total );
+	// sout | "DEBUG: done test" | tests_total;
 }
 
@@ -117,4 +117,5 @@
 	char FillC = 'a';
 	int * FillA = calloc( dim / 4 );
+
 	T1 FillT1 = { FillT };
 	T1 * FillT1A = (T1 *)(void *) malloc( (dim / 4) * sizeof(T1) );
@@ -129,5 +130,4 @@
 	// testing alloc
 
-	last_failed = -1;
 	tests_total = 0;
 	tests_failed = 0;
@@ -153,5 +153,5 @@
 	free( ip );
 
-	ip = alloc( ((double *)0p)`resize );
+	ip = alloc( 0p`resize );
 	test_base( ip, elemSize, libAlign );
 	test_use( ip, elemSize / elemSize );
@@ -495,12 +495,11 @@
 	free( ip );
 
-	if ( tests_failed == 0 ) printf( "PASSED alloc tests\n\n" );
-	else printf( "failed alloc tests : %d/%d\n\n", tests_failed, tests_total );
-
-	// testing alloc ( aligned struct )
+	if ( tests_failed == 0 ) sout | "PASSED alloc tests" | nl | nl;
+	else sout | "failed alloc tests :" | tests_failed | tests_total | nl | nl;
+
+	// testing alloc (aligned struct)
 
 	elemSize = sizeof(T1);
 	size = dim * elemSize;
-	last_failed = -1;
 	tests_total = 0;
 	tests_failed = 0;
@@ -868,8 +867,8 @@
 	free( t1p );
 
-	if ( tests_failed == 0) printf( "PASSED alloc tests (aligned struct)\n\n");
-	else printf( "failed alloc tests ( aligned struct ) : %d/%d\n\n", tests_failed, tests_total );
-
-	printf( "(if applicable) alignment error below indicates memory trashing caused by test_use.\n\n");
+	if ( tests_failed == 0) sout | "PASSED alloc tests (aligned struct)" | nl | nl;
+	else sout | "failed alloc tests ( aligned struct ) :" | tests_failed | tests_total | nl;
+
+	sout | "(if applicable) alignment error below indicates memory trashing caused by test_use." | nl | nl;
 	free( FillA );
 	free( FillT1A );
Index: tests/configs/.expect/parsebools.txt
===================================================================
--- tests/configs/.expect/parsebools.txt	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/configs/.expect/parsebools.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -7,5 +7,5 @@
 set false  :true
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 all true/set arg:
@@ -17,5 +17,5 @@
 set false  :false
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 all false/unset arg:
@@ -27,5 +27,5 @@
 set false  :true
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 gibberish arg 1:
@@ -43,5 +43,5 @@
   -h, --help        print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 gibberish arg 2:
@@ -59,5 +59,5 @@
   -h, --help        print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 gibberish arg 3:
@@ -74,5 +74,5 @@
   -h, --help        print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 gibberish arg 4:
@@ -89,5 +89,5 @@
   -h, --help        print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 All Done!
Index: tests/configs/.expect/parsenums.x64.txt
===================================================================
--- tests/configs/.expect/parsenums.x64.txt	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/configs/.expect/parsenums.x64.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -6,5 +6,5 @@
 double             :3.3
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 all 0 arg:
@@ -15,5 +15,5 @@
 double             :0.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 negative vals arg:
@@ -24,5 +24,5 @@
 double             :-1.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 funky notation arg:
@@ -33,5 +33,5 @@
 double             :5000000.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 big values arg:
@@ -42,5 +42,5 @@
 double             :5000000.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 too big values arg:
@@ -57,5 +57,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '4294967296' for option u could not be parsed
@@ -71,5 +71,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '18446744073709551616' for option l could not be parsed
@@ -85,5 +85,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '18446744073709551616' for option L could not be parsed
@@ -99,5 +99,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 negative errors arg:
@@ -114,5 +114,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '-1' for option l could not be parsed
@@ -128,5 +128,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '-1' for option L could not be parsed
@@ -142,5 +142,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 All Done!
Index: tests/configs/.expect/parsenums.x86.txt
===================================================================
--- tests/configs/.expect/parsenums.x86.txt	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/configs/.expect/parsenums.x86.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -6,5 +6,5 @@
 double             :3.3
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 all 0 arg:
@@ -15,5 +15,5 @@
 double             :0.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 negative vals arg:
@@ -24,5 +24,5 @@
 double             :-1.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 funky notation arg:
@@ -33,5 +33,5 @@
 double             :5000000.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 big values arg:
@@ -42,5 +42,5 @@
 double             :5000000.
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 0    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 0    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
 
 too big values arg:
@@ -57,5 +57,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '4294967296' for option u could not be parsed
@@ -71,5 +71,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '4294967296' for option l could not be parsed
@@ -85,5 +85,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '18446744073709551616' for option L could not be parsed
@@ -99,5 +99,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 negative errors arg:
@@ -114,5 +114,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '-1' for option l could not be parsed
@@ -128,5 +128,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 Argument '-1' for option L could not be parsed
@@ -142,5 +142,5 @@
   -h, --help               print this help message
 Child status:
-    WIFEXITED   : 1    WEXITSTATUS : 1    WIFSIGNALED : 0    WTERMSIG    : 0    WCOREDUMP   : 0    WIFSTOPPED  : 0    WSTOPSIG    : 1    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
 
 All Done!
Index: tests/configs/.expect/usage.txt
===================================================================
--- tests/configs/.expect/usage.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
+++ tests/configs/.expect/usage.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -0,0 +1,66 @@
+No args, no errors
+Usage:
+  ./usage Test usage
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
+No args, with errors
+Usage:
+  ./usage Test usage
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 1, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 1, IFCONTINUED: 0
+
+Args with short names only:
+Usage:
+  ./usage Test usage
+  -a       First arg
+  -b       Second arg
+  -c       Third arg
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
+Args with long names only:
+Usage:
+  ./usage Test usage
+      --AA   First arg
+      --BB   Second arg
+      --CC   Third arg
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
+Mix of short and long args:
+Usage:
+  ./usage Test usage
+  -a           First arg
+  -b, --BBBB   Second arg
+      --CC     Third arg
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
+Mix of short and long and some missing description:
+Usage:
+  ./usage Test usage
+  -a           First arg
+  -b, --BBBB
+      --CC     Third arg
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
+Mix of short and long and some long description:
+Usage:
+  ./usage Test usage
+  -a           First arg
+               The description has multiple lines,
+               ...for some reason
+  -b, --BBBB   12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
+      --CC     Third arg
+  -h, --help   print this help message
+Child status:
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
Index: tests/configs/parsebools.cfa
===================================================================
--- tests/configs/parsebools.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/configs/parsebools.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -1,22 +1,25 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <errno.h>
-#include <signal.h>
-
-extern "C" {
-	#include <sys/types.h>
-	#include <sys/wait.h>
-	#include <unistd.h>
-}
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// configs/parsebools.cfa
+// Testing parsing of boolean arguments
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Oct 12 15:28:01 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
 
 #include <parseargs.hfa>
 #include <fstream.hfa>
 
-int true_main(const char * exec);
+#include "../meta/fork+exec.hfa"
 
 int main(int argc, char * argv[]) {
-	if(!getenv("CFATEST_FORK_EXEC_TEXT")) return true_main(argv[0]);
+	check_main(argv[0]);
 
 	bool YN = false;
@@ -48,41 +51,5 @@
 }
 
-int do_wait(pid_t pid) {
-	int wstatus;
-	int options = 0;
-	pid_t ret = waitpid(pid, &wstatus, options);
-	fflush(stdout);
-	if(ret < 0) {
-		fprintf(stderr, "Fork returned with error: %d '%s'\n", errno, strerror(errno));
-		exit(1);
-	}
-	return wstatus;
-}
-
-pid_t strict_fork(void) {
-	fflush(stdout);
-	pid_t ret = fork();
-	if(ret < 0) {
-		fprintf(stderr, "Fork returned with error: %d '%s'\n", errno, strerror(errno));
-		exit(1);
-	}
-	return ret;
-}
-
-void print_status(int wstatus) {
-	printf("Child status:\n");
-	printf("    WIFEXITED   : %d", WIFEXITED(wstatus));
-	printf("    WEXITSTATUS : %d", WEXITSTATUS(wstatus));
-	printf("    WIFSIGNALED : %d", WIFSIGNALED(wstatus));
-	printf("    WTERMSIG    : %d", WTERMSIG(wstatus));
-	printf("    WCOREDUMP   : %d", WCOREDUMP(wstatus));
-	printf("    WIFSTOPPED  : %d", WIFSTOPPED(wstatus));
-	printf("    WSTOPSIG    : %d", WSTOPSIG(wstatus));
-	printf("    WIFCONTINUED: %d\n", WIFCONTINUED(wstatus));
-}
-
-int true_main(const char * path) {
-	char * env[] = { "CFATEST_FORK_EXEC_TEXT=1", 0p };
-
+int true_main(const char * path, char * env[]) {
 	printf("no arg:\n");
 	if(pid_t child = strict_fork(); child == 0) {
@@ -97,5 +64,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("all true/set arg:\n");
@@ -111,5 +77,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("all false/unset arg:\n");
@@ -125,5 +90,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("gibberish arg 1:\n");
@@ -139,5 +103,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("gibberish arg 2:\n");
@@ -153,5 +116,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("gibberish arg 3:\n");
@@ -167,5 +129,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("gibberish arg 4:\n");
@@ -181,5 +142,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("All Done!\n");
Index: tests/configs/parsenums.cfa
===================================================================
--- tests/configs/parsenums.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/configs/parsenums.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -1,17 +1,22 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <errno.h>
-#include <signal.h>
-
-extern "C" {
-	#include <sys/types.h>
-	#include <sys/wait.h>
-	#include <unistd.h>
-}
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// configs/parsenums.cfa
+// Testing parsing of integer arguments
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Oct 12 15:28:01 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
 
 #include <parseargs.hfa>
 #include <fstream.hfa>
+
+#include "../meta/fork+exec.hfa"
 
 #if __SIZEOF_LONG__ == 4
@@ -28,5 +33,5 @@
 
 int main(int argc, char * argv[]) {
-	if(!getenv("CFATEST_FORK_EXEC_TEXT")) return true_main(argv[0]);
+	check_main(argv[0]);
 
 	int i = -3;
@@ -56,41 +61,5 @@
 }
 
-int do_wait(pid_t pid) {
-	int wstatus;
-	int options = 0;
-	pid_t ret = waitpid(pid, &wstatus, options);
-	fflush(stdout);
-	if(ret < 0) {
-		fprintf(stderr, "Fork returned with error: %d '%s'\n", errno, strerror(errno));
-		exit(1);
-	}
-	return wstatus;
-}
-
-pid_t strict_fork(void) {
-	fflush(stdout);
-	pid_t ret = fork();
-	if(ret < 0) {
-		fprintf(stderr, "Fork returned with error: %d '%s'\n", errno, strerror(errno));
-		exit(1);
-	}
-	return ret;
-}
-
-void print_status(int wstatus) {
-	printf("Child status:\n");
-	printf("    WIFEXITED   : %d", WIFEXITED(wstatus));
-	printf("    WEXITSTATUS : %d", WEXITSTATUS(wstatus));
-	printf("    WIFSIGNALED : %d", WIFSIGNALED(wstatus));
-	printf("    WTERMSIG    : %d", WTERMSIG(wstatus));
-	printf("    WCOREDUMP   : %d", WCOREDUMP(wstatus));
-	printf("    WIFSTOPPED  : %d", WIFSTOPPED(wstatus));
-	printf("    WSTOPSIG    : %d", WSTOPSIG(wstatus));
-	printf("    WIFCONTINUED: %d\n", WIFCONTINUED(wstatus));
-}
-
-int true_main(const char * path) {
-	char * env[] = { "CFATEST_FORK_EXEC_TEXT=1", 0p };
-
+int true_main(const char * path, char * env[]) {
 	printf("no arg:\n");
 	if(pid_t child = strict_fork(); child == 0) {
@@ -105,5 +74,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("all 0 arg:\n");
@@ -119,5 +87,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("negative vals arg:\n");
@@ -133,5 +100,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("funky notation arg:\n");
@@ -147,5 +113,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("big values arg:\n");
@@ -161,5 +126,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("too big values arg:\n");
@@ -175,5 +139,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	if(pid_t child = strict_fork(); child == 0) {
@@ -188,5 +151,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	if(pid_t child = strict_fork(); child == 0) {
@@ -201,5 +163,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	if(pid_t child = strict_fork(); child == 0) {
@@ -214,5 +175,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("negative errors arg:\n");
@@ -228,5 +188,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	if(pid_t child = strict_fork(); child == 0) {
@@ -241,5 +200,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	if(pid_t child = strict_fork(); child == 0) {
@@ -254,5 +212,4 @@
 		print_status(status);
 	}
-	printf("\n");
 
 	printf("All Done!\n");
Index: tests/configs/usage.cfa
===================================================================
--- tests/configs/usage.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
+++ tests/configs/usage.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -0,0 +1,122 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// configs/usage.cfa
+// Testing printing of usage for arguments
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Oct 12 15:28:01 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <parseargs.hfa>
+#include <fstream.hfa>
+
+#include "../meta/fork+exec.hfa"
+
+int main() {
+	char * fake_argv[] = { "./usage" };
+
+	sout | "No args, no errors";
+	if(pid_t child = strict_fork(); child == 0) {
+		cfa_option opts[0];
+		print_args_usage(1, fake_argv, opts, 0, "Test usage", false);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+
+	sout | "No args, with errors";
+	if(pid_t child = strict_fork(); child == 0) {
+		cfa_option opts[0];
+		print_args_usage(1, fake_argv, opts, 0, "Test usage", true);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+
+	sout | "Args with short names only:";
+	if(pid_t child = strict_fork(); child == 0) {
+		int a, b, c;
+		cfa_option opts[] = {
+			{'a', "", "First arg", a },
+			{'b', "", "Second arg", b },
+			{'c', "", "Third arg", c },
+		};
+		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+
+	sout | "Args with long names only:";
+	if(pid_t child = strict_fork(); child == 0) {
+		int a, b, c;
+		cfa_option opts[] = {
+			{'\0', "AA", "First arg", a },
+			{'\0', "BB", "Second arg", b },
+			{'\0', "CC", "Third arg", c },
+		};
+		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+
+	sout | "Mix of short and long args:";
+	if(pid_t child = strict_fork(); child == 0) {
+		int a, b, c;
+		cfa_option opts[] = {
+			{'a', "", "First arg", a },
+			{'b', "BBBB", "Second arg", b },
+			{'\0', "CC", "Third arg", c },
+		};
+		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+
+	sout | "Mix of short and long and some missing description:";
+	if(pid_t child = strict_fork(); child == 0) {
+		int a, b, c;
+		cfa_option opts[] = {
+			{'a', "", "First arg", a },
+			{'b', "BBBB", "", b },
+			{'\0', "CC", "Third arg", c },
+		};
+		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+
+	sout | "Mix of short and long and some long description:";
+	if(pid_t child = strict_fork(); child == 0) {
+		int a, b, c;
+		cfa_option opts[] = {
+			{'a', "", "First arg\nThe description has multiple lines,\n...for some reason", a },
+			{'b', "BBBB", "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", b },
+			{'\0', "CC", "Third arg", c },
+		};
+		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+	}
+	else {
+		int status = do_wait(child);
+		print_status(status);
+	}
+}
+
+// no used
+static int true_main(const char * path, char * env[]) { return 0; }
Index: tests/malloc.cfa
===================================================================
--- tests/malloc.cfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/malloc.cfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -1,7 +1,5 @@
-#include <assert.h>
+#include <fstream.hfa>									// sout
 #include <malloc.h>										// malloc_usable_size
 #include <stdint.h>										// uintptr_t
-#include <stdlib.h>										// posix_memalign
-#include <fstream.hfa>
 #include <stdlib.hfa>									// access C malloc, realloc
 #include <unistd.h>										// getpagesize
@@ -10,318 +8,316 @@
 int tests_failed;
 size_t tAlign = 32;
-struct S1 { int d1; } __attribute__((aligned(32)));
+struct S1 { int data; } __attribute__(( aligned(32)));
 typedef struct S1 T1;
 
-void test_base( void * ip, size_t size, size_t align) {
+void test_base( void * ip, size_t size, size_t align ) {
 	tests_total += 1;
-	bool passed = (malloc_size(ip) == size) && (malloc_usable_size(ip) >= size) && (malloc_alignment(ip) == align) && ((uintptr_t)ip % align  == 0);
-	if (!passed) {
-		printf("failed test %2d: %4lu %4lu but got %4lu ( %3lu ) %4lu\n", tests_total, size, align, malloc_size(ip), malloc_usable_size(ip), malloc_alignment(ip));
+	bool passed = (malloc_size( ip ) == size) && (malloc_usable_size( ip ) >= size) && (malloc_alignment( ip ) == align) && ((uintptr_t)ip % align  == 0);
+	if ( ! passed ) {
+		sout | "base failed test" | tests_total | "ip" | ip | "size" | size | "align" | align | "but got size" | malloc_size( ip ) | "usable" | malloc_usable_size( ip ) | "align" | malloc_alignment( ip );
+		tests_failed += 1;
+	} // if
+}
+
+void test_fill( void * ip_, size_t start, size_t end, char fill ) {
+	tests_total += 1;
+	bool passed = true;
+	char * ip = (char *) ip_;
+	for ( i; start ~ end ) passed = passed && (ip[i] == fill);
+	if ( ! passed ) {
+		sout | "fill1 failed test" | tests_total | "fill C";
+		tests_failed += 1;
+	} // if
+}
+
+void test_use( void * ip_ ) {
+	tests_total += 1;
+	bool passed = true;
+	int * ip = (int *) ip_;
+	size_t size = malloc_size( ip );
+	for ( i; 0 ~ size ~ sizeof(int)) ip[i/sizeof(int)] = 0xdeadbeef;
+	for ( i; 0 ~ size ~ sizeof(int)) passed = passed && (ip[i / sizeof(int)] == 0xdeadbeef);
+	size_t usize = malloc_usable_size( ip );
+	for ( i; size ~ usize ~ sizeof(int)) ip[i / sizeof(int)] = -1;
+	for ( i; size ~ usize ~ sizeof(int)) passed = passed &&  (ip[i / sizeof(int)] == -1);
+	if ( ! passed ) {
+		sout | "failed test" | tests_total | "use";
 		tests_failed += 1;
 	}
 }
 
-void test_fill( void * ip_, size_t start, size_t end, char fill) {
-	tests_total += 1;
-	bool passed = true;
-	char * ip = (char *) ip_;
-	for (i; start ~ end) passed = passed && (ip[i] == fill);
-	if (!passed) {
-		printf("failed test %2d: fill\n", tests_total);
-		tests_failed += 1;
-	}
-}
-
-void test_use( void * ip_) {
-	tests_total += 1;
-	bool passed = true;
-	int * ip = (int *) ip_;
-	size_t size = malloc_size(ip);
-	for (i; 0 ~ size ~ sizeof(int)) ip[i/sizeof(int)] = 0xdeadbeef;
-	for (i; 0 ~ size ~ sizeof(int)) passed = passed &&  (ip[i/sizeof(int)] == 0xdeadbeef);
-	size_t usize = malloc_usable_size(ip);
-	for (i; size ~ usize ~ sizeof(int)) ip[i/sizeof(int)] = -1;
-	for (i; size ~ usize ~ sizeof(int)) passed = passed &&  (ip[i/sizeof(int)] == -1);
-	if (!passed) {
-		printf("failed test %2d: use\n", tests_total);
-		tests_failed += 1;
-	}
-}
-
 int main( void ) {
+	enum { dim = 8, align = 64, libAlign = libAlign() };
 	size_t elemSize = sizeof(int);
-	size_t dim = 8;
 	size_t size = dim * elemSize;
-	size_t align = 64;
-	const size_t libAlign = libAlign();
 	char fill = '\xde';
 	int * ip;
 	T1 * tp;
 
-	// testing C   malloc
+	// testing C malloc
 
 	tests_total = 0;
 	tests_failed = 0;
 
-	ip = (int *) (void *) malloc( size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aalloc( dim, elemSize );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aalloc( 0, elemSize );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aalloc( dim, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aalloc( 0, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	test_base(ip, size, libAlign);
-	test_fill(ip, 0, size, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( 0, elemSize );
-	test_base(ip, 0, libAlign);
-	test_fill(ip, 0, 0, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, 0 );
-	test_base(ip, 0, libAlign);
-	test_fill(ip, 0, 0, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( size );
-	ip = (int *) (void *) resize( (void *) ip, size / 4 );
-	test_base(ip, size / 4, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( size );
-	ip = (int *) (void *) resize( (void *) ip, size * 4 );
-	test_base(ip, size * 4, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( size );
-	ip = (int *) (void *) resize( (void *) ip, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) resize( NULL, size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) resize( 0p, size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, size / 4 );
-	test_base(ip, size / 4, libAlign);
-	test_fill(ip, 0, size / 4, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, size * 4 );
-	test_base(ip, size * 4, libAlign);
-	test_fill(ip, 0, size * 4, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) realloc( NULL, size  );
-	test_base(ip, size , libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) realloc( 0p, size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) memalign( align, size );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) memalign( align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) amemalign( align, dim, elemSize );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) amemalign( align, 0, elemSize );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) amemalign( align, dim, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) cmemalign( align, dim, elemSize );
-	test_base(ip, size, align);
-	test_fill(ip, 0, size, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) cmemalign( align, 0, elemSize );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) cmemalign( align, dim, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aligned_alloc( align, size );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aligned_alloc( align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	(int) posix_memalign( (void **) &ip, align, size );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
-
-	(int) posix_memalign( (void **) &ip, align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) valloc( size );
-	test_base(ip, size, getpagesize());
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) valloc( 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) pvalloc( getpagesize() * 3 / 2 );
-	test_base(ip, getpagesize() * 2, getpagesize());
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) pvalloc( 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( size );
-	ip = (int *) (void *) resize( (void *) ip, libAlign, size / 2 );
-	test_base(ip, size / 2, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) aligned_alloc( align, size );
-	ip = (int *) (void *) resize( (void *) ip, align, size / 2 );
-	test_base(ip, size / 2, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( size );
-	ip = (int *) (void *) resize( (void *) ip, align, size / 4 );
-	test_base(ip, size / 4, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) malloc( size );
-	ip = (int *) (void *) resize( (void *) ip, align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) resize( NULL, align, size );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) resize( 0p, align, size );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, libAlign, size / 2 );
-	test_base(ip, size / 2, libAlign);
-	test_fill(ip, 0, size / 2, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) cmemalign( align, dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, align, size / 2 );
-	test_base(ip, size / 2, align);
-	test_fill(ip, 0, size / 2, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, align, size / 4 );
-	test_base(ip, size / 4, align);
-	test_fill(ip, 0, size / 4, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, 0, size * 4 );
-	test_base(ip, size * 4, libAlign);
-	test_fill(ip, 0, size * 4, '\0');
-	test_use(ip);
-	free(ip);
-
-	ip = (int *) (void *) calloc( dim, elemSize );
-	ip = (int *) (void *) realloc( (void *) ip, align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	free( 0p ); // sanity check
-	free( NULL ); // sanity check
-
-	if (tests_failed == 0) printf("PASSED C malloc tests\n\n");
-	else printf("failed C malloc tests : %d/%d\n\n", tests_failed, tests_total);
+	ip = (int *)malloc( size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aalloc( dim, elemSize );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aalloc( 0, elemSize );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aalloc( dim, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aalloc( 0, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	test_base( ip, size, libAlign );
+	test_fill( ip, 0, size, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( 0, elemSize );
+	test_base( ip, 0, libAlign );
+	test_fill( ip, 0, 0, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, 0 );
+	test_base( ip, 0, libAlign );
+	test_fill( ip, 0, 0, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( size );
+	ip = (int *)resize( ip, size / 4 );
+	test_base( ip, size / 4, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( size );
+	ip = (int *)resize( ip, size * 4 );
+	test_base( ip, size * 4, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( size );
+	ip = (int *)resize( ip, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)resize( NULL, size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)resize( 0p, size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, size / 4 );
+	test_base( ip, size / 4, libAlign );
+	test_fill( ip, 0, size / 4, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, size * 4 );
+	test_base( ip, size * 4, libAlign );
+	test_fill( ip, 0, size * 4, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)realloc( NULL, size  );
+	test_base( ip, size , libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)realloc( 0p, size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)memalign( align, size );
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)memalign( align, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)amemalign( align, dim, elemSize );
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)amemalign( align, 0, elemSize );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)amemalign( align, dim, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)cmemalign( align, dim, elemSize );
+	test_base( ip, size, align );
+	test_fill( ip, 0, size, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)cmemalign( align, 0, elemSize );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)cmemalign( align, dim, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aligned_alloc( align, size );
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aligned_alloc( align, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	posix_memalign( (void **) &ip, align, size );
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
+
+	posix_memalign( (void **) &ip, align, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)valloc( size );
+	test_base( ip, size, getpagesize() );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)valloc( 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)pvalloc( getpagesize() * 3 / 2 );
+	test_base( ip, getpagesize() * 2, getpagesize() );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)pvalloc( 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( size );
+	ip = (int *)resize( ip, libAlign, size / 2 );
+	test_base( ip, size / 2, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)aligned_alloc( align, size );
+	ip = (int *)resize( ip, align, size / 2 );
+	test_base( ip, size / 2, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( size );
+	ip = (int *)resize( ip, align, size / 4 );
+	test_base( ip, size / 4, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)malloc( size );
+	ip = (int *)resize( ip, align, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)resize( NULL, align, size );
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)resize( 0p, align, size );
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, libAlign, size / 2 );
+	test_base( ip, size / 2, libAlign );
+	test_fill( ip, 0, size / 2, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)cmemalign( align, dim, elemSize );
+	ip = (int *)realloc( ip, align, size / 2 );
+	test_base( ip, size / 2, align );
+	test_fill( ip, 0, size / 2, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, align, size / 4 );
+	test_base( ip, size / 4, align );
+	test_fill( ip, 0, size / 4, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, libAlign, size * 4 );
+	test_base( ip, size * 4, libAlign );
+	test_fill( ip, 0, size * 4, '\0' );
+	test_use( ip );
+	free( ip );
+
+	ip = (int *)calloc( dim, elemSize );
+	ip = (int *)realloc( ip, align, 0 );
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	free( 0p );											// sanity check
+	free( NULL );										// sanity check
+
+	if (tests_failed == 0) sout | "PASSED C malloc tests" | nl | nl;
+	else sout | "failed C malloc tests" | tests_failed | tests_total | nl | nl;
 
 	// testing CFA malloc
@@ -331,135 +327,135 @@
 
 	ip = malloc();
-	test_base(ip, elemSize, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, elemSize, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = aalloc( dim );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = aalloc( 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = calloc( dim );
-	test_base(ip, size, libAlign);
-	test_fill(ip, 0, size, '\0');
-	test_use(ip);
-	free(ip);
+	test_base( ip, size, libAlign );
+	test_fill( ip, 0, size, '\0' );
+	test_use( ip );
+	free( ip );
 
 	ip = calloc( 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = aalloc( dim );
 	ip = resize( ip, size / 4 );
-	test_base(ip, size / 4, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, size / 4, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = aalloc( dim );
 	ip = resize( ip, size * 4 );
-	test_base(ip, size * 4, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, size * 4, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = aalloc( dim );
 	ip = resize( ip, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = resize( (int*)0p, size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = resize( (int*)0p, size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = resize( 0p, size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = resize( 0p, size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = calloc( dim );
 	ip = realloc( ip, size / 4 );
-	test_base(ip, size / 4, libAlign);
-	test_fill(ip, 0, size / 4, '\0');
-	test_use(ip);
-	free(ip);
+	test_base( ip, size / 4, libAlign );
+	test_fill( ip, 0, size / 4, '\0' );
+	test_use( ip );
+	free( ip );
 
 	ip = calloc( dim );
 	ip = realloc( ip, size * 4 );
-	test_base(ip, size * 4, libAlign);
-	test_fill(ip, 0, size, '\0');
-	test_use(ip);
-	free(ip);
+	test_base( ip, size * 4, libAlign );
+	test_fill( ip, 0, size, '\0' );
+	test_use( ip );
+	free( ip );
 
 	ip = calloc( dim );
 	ip = realloc( ip, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = realloc( (int*)0p, size );
-	test_base(ip, size , libAlign);
-	test_use(ip);
-	free(ip);
-
-	ip = realloc( (int*)0p, size );
-	test_base(ip, size, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = realloc( 0p, size );
+	test_base( ip, size , libAlign );
+	test_use( ip );
+	free( ip );
+
+	ip = realloc( 0p, size );
+	test_base( ip, size, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = memalign( align );
-	test_base(ip, elemSize, align);
-	test_use(ip);
-	free(ip);
+	test_base( ip, elemSize, align );
+	test_use( ip );
+	free( ip );
 
 	ip = amemalign( align, dim );
-	test_base(ip, size, align);
-	test_use(ip);
-	free(ip);
+	test_base( ip, size, align );
+	test_use( ip );
+	free( ip );
 
 	ip = amemalign( align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = cmemalign( align, dim );
-	test_base(ip, size, align);
-	test_fill(ip, 0, size, '\0');
-	test_use(ip);
-	free(ip);
+	test_base( ip, size, align );
+	test_fill( ip, 0, size, '\0' );
+	test_use( ip );
+	free( ip );
 
 	ip = cmemalign( align, 0 );
-	test_base(ip, 0, libAlign);
-	test_use(ip);
-	free(ip);
+	test_base( ip, 0, libAlign );
+	test_use( ip );
+	free( ip );
 
 	ip = aligned_alloc( align );
-	test_base(ip, elemSize, align);
-	test_use(ip);
-	free(ip);
-
-	(int) posix_memalign( (int **) &ip, align );
-	test_base(ip, elemSize, align);
-	test_use(ip);
-	free(ip);
+	test_base( ip, elemSize, align );
+	test_use( ip );
+	free( ip );
+
+	posix_memalign( (int **) &ip, align );
+	test_base( ip, elemSize, align );
+	test_use( ip );
+	free( ip );
 
 	ip = valloc();
-	test_base(ip, elemSize, getpagesize());
-	test_use(ip);
-	free(ip);
+	test_base( ip, elemSize, getpagesize() );
+	test_use( ip );
+	free( ip );
 
 	ip = pvalloc();
-	test_base(ip, getpagesize(), getpagesize());
-	test_use(ip);
-	free(ip);
-
-	if (tests_failed == 0) printf("PASSED CFA malloc tests\n\n");
-	else printf("failed CFA malloc tests : %d/%d\n\n", tests_failed, tests_total);
+	test_base( ip, getpagesize(), getpagesize() );
+	test_use( ip );
+	free( ip );
+
+	if (tests_failed == 0) sout | "PASSED CFA malloc tests" | nl | nl;
+	else sout | "failed CFA malloc tests" | tests_failed | tests_total | nl | nl;
 
 	// testing CFA malloc with aligned struct
@@ -471,137 +467,135 @@
 
 	tp = malloc();
-	test_base(tp, elemSize, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, elemSize, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = aalloc( dim );
-	test_base(tp, size, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = aalloc( 0 );
-	test_base(tp, 0, libAlign);
-	test_use(tp);
-	free(tp);
+	test_base( tp, 0, libAlign );
+	test_use( tp );
+	free( tp );
 
 	tp = calloc( dim );
-	test_base(tp, size, tAlign );
-	test_fill(tp, 0, size, '\0');
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, tAlign  );
+	test_fill( tp, 0, size, '\0' );
+	test_use( tp );
+	free( tp );
 
 	tp = calloc( 0 );
-	test_base(tp, 0, libAlign);
-	test_use(tp);
-	free(tp);
+	test_base( tp, 0, libAlign );
+	test_use( tp );
+	free( tp );
 
 	tp = aalloc( dim );
 	tp = resize( tp, size / 4 );
-	test_base(tp, size / 4, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size / 4, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = malloc();
 	tp = resize( tp, size * 4 );
-	test_base(tp, size * 4, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size * 4, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = aalloc( dim );
 	tp = resize( tp, 0 );
-	test_base(tp, 0, libAlign);
-	test_use(tp);
-	free(tp);
+	test_base( tp, 0, libAlign );
+	test_use( tp );
+	free( tp );
 
 	tp = resize( (T1*)0p, size );
-	test_base(tp, size, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = resize( (T1*)0p, size );
-	test_base(tp, size, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = calloc( dim );
 	tp = realloc( tp, size / 4 );
-	test_base(tp, size / 4, tAlign );
-	test_fill(tp, 0, size / 4, '\0');
-	test_use(tp);
-	free(tp);
+	test_base( tp, size / 4, tAlign  );
+	test_fill( tp, 0, size / 4, '\0' );
+	test_use( tp );
+	free( tp );
 
 	tp = calloc( dim );
 	tp = realloc( tp, size * 4 );
-	test_base(tp, size * 4, tAlign );
-	test_fill(tp, 0, size, '\0');
-	test_use(tp);
-	free(tp);
+	test_base( tp, size * 4, tAlign  );
+	test_fill( tp, 0, size, '\0' );
+	test_use( tp );
+	free( tp );
 
 	tp = calloc( dim );
 	tp = realloc( tp, 0 );
-	test_base(tp, 0, libAlign);
-	test_use(tp);
-	free(tp);
+	test_base( tp, 0, libAlign );
+	test_use( tp );
+	free( tp );
 
 	tp = realloc( (T1*)0p, size  );
-	test_base(tp, size , tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size , tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = realloc( (T1*)0p, size );
-	test_base(tp, size, tAlign );
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, tAlign  );
+	test_use( tp );
+	free( tp );
 
 	tp = memalign( align );
-	test_base(tp, elemSize, align);
-	test_use(tp);
-	free(tp);
+	test_base( tp, elemSize, align );
+	test_use( tp );
+	free( tp );
 
 	tp = amemalign( align, dim );
-	test_base(tp, size, align);
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, align );
+	test_use( tp );
+	free( tp );
 
 	tp = amemalign( align, 0 );
-	test_base(tp, 0, libAlign);
-	test_use(tp);
-	free(tp);
+	test_base( tp, 0, libAlign );
+	test_use( tp );
+	free( tp );
 
 	tp = cmemalign( align, dim );
-	test_base(tp, size, align);
-	test_fill(tp, 0, size, '\0');
-	test_use(tp);
-	free(tp);
+	test_base( tp, size, align );
+	test_fill( tp, 0, size, '\0' );
+	test_use( tp );
+	free( tp );
 
 	tp = cmemalign( align, 0 );
-	test_base(tp, 0, libAlign);
-	test_use(tp);
-	free(tp);
+	test_base( tp, 0, libAlign );
+	test_use( tp );
+	free( tp );
 
 	tp = aligned_alloc( align );
-	test_base(tp, elemSize, align);
-	test_use(tp);
-	free(tp);
-
-	(int) posix_memalign( (T1 **) &tp, align );
-	test_base(tp, elemSize, align);
-	test_use(tp);
-	free(tp);
+	test_base( tp, elemSize, align );
+	test_use( tp );
+	free( tp );
+
+	posix_memalign( (T1 **)&tp, align );
+	test_base( tp, elemSize, align );
+	test_use( tp );
+	free( tp );
 
 	tp = valloc();
-	test_base(tp, elemSize, getpagesize());
-	test_use(tp);
-	free(tp);
+	test_base( tp, elemSize, getpagesize() );
+	test_use( tp );
+	free( tp );
 
 	tp = pvalloc();
-	test_base(tp, getpagesize(), getpagesize());
-	test_use(tp);
-	free(tp);
-
-	if (tests_failed == 0) printf("PASSED CFA malloc tests (aligned struct)\n\n");
-	else printf("failed CFA malloc tests (aligned struct) : %d/%d\n\n", tests_failed, tests_total);
-
-	return 0;
+	test_base( tp, getpagesize(), getpagesize() );
+	test_use( tp );
+	free( tp );
+
+	if ( tests_failed == 0 ) sout | "PASSED CFA malloc tests (aligned struct)" | nl | nl;
+	else sout | "failed CFA malloc tests (aligned struct)" | tests_failed | tests_total | nl | nl;
 }
 
Index: tests/meta/.expect/fork+exec.txt
===================================================================
--- tests/meta/.expect/fork+exec.txt	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/meta/.expect/fork+exec.txt	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -4,12 +4,6 @@
 Success!
 Child status:
-    WIFEXITED   : 1
-    WEXITSTATUS : 0
-    WIFSIGNALED : 0
-    WTERMSIG    : 0
-    WCOREDUMP   : 0
-    WIFSTOPPED  : 0
-    WSTOPSIG    : 0
-    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
 1 arg:
 arguments are:
@@ -17,12 +11,6 @@
 Success!
 Child status:
-    WIFEXITED   : 1
-    WEXITSTATUS : 0
-    WIFSIGNALED : 0
-    WTERMSIG    : 0
-    WCOREDUMP   : 0
-    WIFSTOPPED  : 0
-    WSTOPSIG    : 0
-    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
 5 arg:
 arguments are:
@@ -34,11 +22,5 @@
 Success!
 Child status:
-    WIFEXITED   : 1
-    WEXITSTATUS : 0
-    WIFSIGNALED : 0
-    WTERMSIG    : 0
-    WCOREDUMP   : 0
-    WIFSTOPPED  : 0
-    WSTOPSIG    : 0
-    WIFCONTINUED: 0
+IFEXITED   : 1, EXITSTATUS : 0, IFSIGNALED : 0, TERMSIG    : 0, COREDUMP   : 0, IFSTOPPED  : 0, STOPSIG    : 0, IFCONTINUED: 0
+
 All Done!
Index: tests/meta/fork+exec.hfa
===================================================================
--- tests/meta/fork+exec.hfa	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/meta/fork+exec.hfa	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -28,5 +28,5 @@
 }
 
-static int true_main(const char * exec, char * env[]);
+static int true_main(const char * path, char * env[]);
 
 static int do_wait(pid_t pid) {
@@ -55,12 +55,14 @@
 static void print_status(int wstatus) {
 	printf("Child status:\n");
-	printf("    WIFEXITED   : %d\n", WIFEXITED(wstatus));
-	printf("    WEXITSTATUS : %d\n", WEXITSTATUS(wstatus));
-	printf("    WIFSIGNALED : %d\n", WIFSIGNALED(wstatus));
-	printf("    WTERMSIG    : %d\n", WTERMSIG(wstatus));
-	printf("    WCOREDUMP   : %d\n", WCOREDUMP(wstatus));
-	printf("    WIFSTOPPED  : %d\n", WIFSTOPPED(wstatus));
-	printf("    WSTOPSIG    : %d\n", WSTOPSIG(wstatus));
-	printf("    WIFCONTINUED: %d\n", WIFCONTINUED(wstatus));
+	printf("IFEXITED   : %d, ", WIFEXITED(wstatus));
+	printf("EXITSTATUS : %d, ", WEXITSTATUS(wstatus));
+	printf("IFSIGNALED : %d, ", WIFSIGNALED(wstatus));
+	printf("TERMSIG    : %d, ", WTERMSIG(wstatus));
+	printf("COREDUMP   : %d, ", WCOREDUMP(wstatus));
+	printf("IFSTOPPED  : %d, ", WIFSTOPPED(wstatus));
+	printf("STOPSIG    : %d, ", WSTOPSIG(wstatus));
+	printf("IFCONTINUED: %d", WIFCONTINUED(wstatus));
+	printf("\n");
+	printf("\n");
 }
 
Index: tests/test.py
===================================================================
--- tests/test.py	(revision 5408b5919f1f591121d6cdabf6c6b5e605e61e6b)
+++ tests/test.py	(revision c2b3243ebb592c712c00231ae4cd93fd4a4eea3e)
@@ -72,5 +72,5 @@
 				# this is a valid name, let's check if it already exists
 				found = [test for test in all_tests if canonical_path( test.target() ) == testname]
-				setup = itertools.product(settings.all_arch if options.arch else [None])
+				setup = settings.all_arch if options.arch else [None]
 				if not found:
 					# it's a new name, create it according to the name and specified architecture
