Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision 2aa2056f64da59ab4717e6588935a91c2a7705c7)
+++ libcfa/src/heap.cfa	(revision b6830d74cf52c207d2a08ac866856197ee2e29b8)
@@ -1,2 +1,4 @@
+// #comment TD : this file uses both spaces and tabs for indentation
+
 //
 // Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
@@ -22,4 +24,5 @@
 } // extern "C"
 
+// #comment TD : Many of these should be merged into math I believe
 #include "bits/align.hfa"									// libPow2
 #include "bits/defs.hfa"									// likely, unlikely
@@ -36,9 +39,9 @@
 
 size_t default_mmap_start() __attribute__(( weak )) {
-    return __CFA_DEFAULT_MMAP_START__;
+	return __CFA_DEFAULT_MMAP_START__;
 } // default_mmap_start
 
 size_t default_heap_expansion() __attribute__(( weak )) {
-    return __CFA_DEFAULT_HEAP_EXPANSION__;
+	return __CFA_DEFAULT_HEAP_EXPANSION__;
 } // default_heap_expansion
 
@@ -62,4 +65,5 @@
 #endif // LOCKFREE
 
+// #comment TD : This defined is significantly different from the __ALIGN__ define from locks.hfa
 #define ALIGN 16
 
@@ -136,5 +140,5 @@
 
 static void checkUnfreed() {
-    if ( allocFree != 0 ) {
+	if ( allocFree != 0 ) {
 		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
 		// char helpText[512];
@@ -143,5 +147,5 @@
 		// 					(long int)getpid(), allocFree, allocFree ); // always print the UNIX pid
 		// __cfaabi_dbg_bits_write( helpText, len );
-    } // if
+	} // if
 } // checkUnfreed
 
@@ -167,4 +171,6 @@
 				struct RealHeader {
 					union {
+						// #comment TD : this code use byte size but the comment uses bit size
+
 						struct {						// 32-bit word => 64-bit header, 64-bit word => 128-bit header
 							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
@@ -186,4 +192,6 @@
 
 						};
+
+						// #comment TD : C++ code
 						#if BUCKLOCK == LOCKFREE
 						Stack<Storage>::Link next;		// freed block points next freed block of same size (double-wide)
@@ -215,4 +223,5 @@
 	    Storage * freeList;
 		#elif BUCKLOCK == LOCKFREE
+		// #comment TD : C++ code
 	    StackLF<Storage> freeList;
 		#else
@@ -240,19 +249,20 @@
 static unsigned int maxBucketsUsed;						// maximum number of buckets in use
 
+// #comment TD : This array is not const but it feels like it should be
 // Powers of 2 are common allocation sizes, so make powers of 2 generate the minimum required size.
 static unsigned int bucketSizes[NoBucketSizes] @= {		// different bucket sizes
-    16, 32, 48, 64,
-    64 + sizeof(HeapManager.Storage), 96, 112, 128, 128 + sizeof(HeapManager.Storage), 160, 192, 224,
-    256 + sizeof(HeapManager.Storage), 320, 384, 448, 512 + sizeof(HeapManager.Storage), 640, 768, 896,
-    1_024 + sizeof(HeapManager.Storage), 1_536, 2_048 + sizeof(HeapManager.Storage), 2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), 6_144,
-    8_192 + sizeof(HeapManager.Storage), 9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360,
-    16_384 + sizeof(HeapManager.Storage), 18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720,
-    32_768 + sizeof(HeapManager.Storage), 36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440,
-    65_536 + sizeof(HeapManager.Storage), 73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880,
-    131_072 + sizeof(HeapManager.Storage), 147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760,
-    262_144 + sizeof(HeapManager.Storage), 294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520,
-    524_288 + sizeof(HeapManager.Storage), 655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), 1_179_648, 1_310_720, 1_441_792,
-    1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), 2_621_440, 3_145_728, 3_670_016,
-    4_194_304 + sizeof(HeapManager.Storage)
+	16, 32, 48, 64,
+	64 + sizeof(HeapManager.Storage), 96, 112, 128, 128 + sizeof(HeapManager.Storage), 160, 192, 224,
+	256 + sizeof(HeapManager.Storage), 320, 384, 448, 512 + sizeof(HeapManager.Storage), 640, 768, 896,
+	1_024 + sizeof(HeapManager.Storage), 1_536, 2_048 + sizeof(HeapManager.Storage), 2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), 6_144,
+	8_192 + sizeof(HeapManager.Storage), 9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360,
+	16_384 + sizeof(HeapManager.Storage), 18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720,
+	32_768 + sizeof(HeapManager.Storage), 36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440,
+	65_536 + sizeof(HeapManager.Storage), 73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880,
+	131_072 + sizeof(HeapManager.Storage), 147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760,
+	262_144 + sizeof(HeapManager.Storage), 294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520,
+	524_288 + sizeof(HeapManager.Storage), 655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), 1_179_648, 1_310_720, 1_441_792,
+	1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), 2_621_440, 3_145_728, 3_670_016,
+	4_194_304 + sizeof(HeapManager.Storage)
 };
 #ifdef FASTLOOKUP
@@ -267,40 +277,40 @@
 static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
 
-
+// #comment TD : The return type of this function should be commented
 static inline bool setMmapStart( size_t value ) {
-    if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
-    mmapStart = value;									// set global
-
-    // find the closest bucket size less than or equal to the mmapStart size
-    maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
-    assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
-    assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
-    return false;
+	if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
+	mmapStart = value;									// set global
+
+	// find the closest bucket size less than or equal to the mmapStart size
+	maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
+	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+	return false;
 } // setMmapStart
 
 
 static void ?{}( HeapManager & manager ) with ( manager ) {
-    pageSize = sysconf( _SC_PAGESIZE );
-
-    for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
+	pageSize = sysconf( _SC_PAGESIZE );
+
+	for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
 		freeLists[i].blockSize = bucketSizes[i];
-    } // for
+	} // for
 
 	#ifdef FASTLOOKUP
-    unsigned int idx = 0;
-    for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
+	unsigned int idx = 0;
+	for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
 		if ( i > bucketSizes[idx] ) idx += 1;
 		lookup[i] = idx;
-    } // for
+	} // for
 	#endif // FASTLOOKUP
 
-    if ( setMmapStart( default_mmap_start() ) ) {
+	if ( setMmapStart( default_mmap_start() ) ) {
 		abort( "HeapManager : internal error, mmap start initialization failure." );
-    } // if
-    heapExpand = default_heap_expansion();
-
-    char * End = (char *)sbrk( 0 );
-    sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
-    heapBegin = heapEnd = sbrk( 0 );					// get new start point
+	} // if
+	heapExpand = default_heap_expansion();
+
+	char * End = (char *)sbrk( 0 );
+	sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
+	heapBegin = heapEnd = sbrk( 0 );					// get new start point
 } // HeapManager
 
@@ -326,4 +336,5 @@
 	#endif // __CFA_DEBUG__
 
+	// #comment TD : This assertion seems redundent with the above code
 	assert( heapManager.heapBegin == 0 );
 	heapManager{};
@@ -361,5 +372,5 @@
 // Use "write" because streams may be shutdown when calls are made.
 static void printStats() {
-    char helpText[512];
+	char helpText[512];
 	__cfaabi_dbg_bits_print_buffer( helpText, sizeof(helpText),
 			"\nHeap statistics:\n"
@@ -385,8 +396,8 @@
 } // printStats
 
-
+// #comment TD : Why do we have this?
 static int printStatsXML( FILE * stream ) {
-    char helpText[512];
-    int len = snprintf( helpText, sizeof(helpText),
+	char helpText[512];
+	int len = snprintf( helpText, sizeof(helpText),
 						"<malloc version=\"1\">\n"
 						"<heap nr=\"0\">\n"
@@ -413,11 +424,11 @@
 						sbrk_calls, sbrk_storage
 		);
-    return write( fileno( stream ), helpText, len );	// -1 => error
+	return write( fileno( stream ), helpText, len );	// -1 => error
 } // printStatsXML
 #endif // __STATISTICS__
 
-
+// #comment TD : Is this the samething as Out-of-Memory?
 static inline void noMemory() {
-    abort( "Heap memory exhausted at %zu bytes.\n"
+	abort( "Heap memory exhausted at %zu bytes.\n"
 			"Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
 			((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
@@ -426,28 +437,29 @@
 
 static inline void checkAlign( size_t alignment ) {
-    if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) {
+	if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) {
 		abort( "Alignment %zu for memory allocation is less than sizeof(void *) and/or not a power of 2.", alignment );
-    } // if
+	} // if
 } // checkAlign
 
 
 static inline bool setHeapExpand( size_t value ) {
-    if ( heapExpand < pageSize ) return true;
-    heapExpand = value;
-    return false;
+	if ( heapExpand < pageSize ) return true;
+	heapExpand = value;
+	return false;
 } // setHeapExpand
 
 
 static inline void checkHeader( bool check, const char * name, void * addr ) {
-    if ( unlikely( check ) ) {							// bad address ?
+	if ( unlikely( check ) ) {							// bad address ?
 		abort( "Attempt to %s storage %p with address outside the heap.\n"
 				"Possible cause is duplicate free on same block or overwriting of memory.",
 				name, addr );
-    } // if
+	} // if
 } // checkHeader
 
-
+// #comment TD : function should be commented and/or have a more evocative name
+//               this isn't either a check or a constructor which is what I would expect this function to be
 static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & size, size_t & alignment ) {
-    if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+	if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
 		size_t offset = header->kind.fake.offset;
 		alignment = header->kind.fake.alignment & -2;	// remove flag from value
@@ -456,45 +468,49 @@
 		#endif // __CFA_DEBUG__
 		header = (HeapManager.Storage.Header *)((char *)header - offset);
-    } // if
+	} // if
 } // fakeHeader
 
-
+// #comment TD : Why is this a define
 #define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
 
 static inline bool headers( const char * name, void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
-    header = headerAddr( addr );
-
-    if ( unlikely( heapEnd < addr ) ) {					// mmapped ?
+	header = headerAddr( addr );
+
+	if ( unlikely( heapEnd < addr ) ) {					// mmapped ?
 		fakeHeader( header, size, alignment );
 		size = header->kind.real.blockSize & -3;		// mmap size
 		return true;
-    } // if
+	} // if
 
 	#ifdef __CFA_DEBUG__
-    checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
+			checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
 	#endif // __CFA_DEBUG__
-    // header may be safe to dereference
-    fakeHeader( header, size, alignment );
+
+	// #comment TD : This code looks weird...
+	//               It's called as the first statement of both branches of the last if, with the same parameters in all cases
+
+		// header may be safe to dereference
+		fakeHeader( header, size, alignment );
 	#ifdef __CFA_DEBUG__
-    checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
+			checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
 	#endif // __CFA_DEBUG__
 
-    freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
+		freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
 	#ifdef __CFA_DEBUG__
-    if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
-		abort( "Attempt to %s storage %p with corrupted header.\n"
-			   "Possible cause is duplicate free on same block or overwriting of header information.",
-			   name, addr );
-    } // if
+			if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
+			abort( "Attempt to %s storage %p with corrupted header.\n"
+				 "Possible cause is duplicate free on same block or overwriting of header information.",
+			   		name, addr );
+			} // if
 	#endif // __CFA_DEBUG__
-    size = freeElem->blockSize;
-    return false;
+		size = freeElem->blockSize;
+		return false;
 } // headers
 
 
 static inline void * extend( size_t size ) with ( heapManager ) {
-    lock( extlock __cfaabi_dbg_ctx2 );
-    ptrdiff_t rem = heapRemaining - size;
-    if ( rem < 0 ) {
+	lock( extlock __cfaabi_dbg_ctx2 );
+	ptrdiff_t rem = heapRemaining - size;
+	if ( rem < 0 ) {
 		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
 
@@ -514,22 +530,22 @@
 #endif // __CFA_DEBUG__
 		rem = heapRemaining + increase - size;
-    } // if
-
-    HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
-    heapRemaining = rem;
-    heapEnd = (char *)heapEnd + size;
-    unlock( extlock );
-    return block;
+	} // if
+
+	HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
+	heapRemaining = rem;
+	heapEnd = (char *)heapEnd + size;
+	unlock( extlock );
+	return block;
 } // extend
 
 
 static inline void * doMalloc( size_t size ) with ( heapManager ) {
-    HeapManager.Storage * block;
-
-    // Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
-    // along with the block and is a multiple of the alignment size.
-
-    size_t tsize = size + sizeof(HeapManager.Storage);
-    if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
+	HeapManager.Storage * block;
+
+	// Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
+	// along with the block and is a multiple of the alignment size.
+
+	size_t tsize = size + sizeof(HeapManager.Storage);
+	if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
 		HeapManager.FreeHeader * freeElem =
 			#ifdef FASTLOOKUP
@@ -544,8 +560,8 @@
 
 		#if defined( SPINLOCK )
-		lock( freeElem->lock __cfaabi_dbg_ctx2 );
-		block = freeElem->freeList;						// remove node from stack
+			lock( freeElem->lock __cfaabi_dbg_ctx2 );
+			block = freeElem->freeList;						// remove node from stack
 		#else
-		block = freeElem->freeList.pop();
+			block = freeElem->freeList.pop();
 		#endif // SPINLOCK
 		if ( unlikely( block == 0 ) ) {					// no free block ?
@@ -566,9 +582,9 @@
 
 		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
-    } else {											// large size => mmap
+		} else {											// large size => mmap
 		tsize = libCeiling( tsize, pageSize );			// must be multiple of page size
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		block = (HeapManager.Storage *)mmap( 0, tsize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
@@ -582,21 +598,21 @@
 #endif // __CFA_DEBUG__
 		block->header.kind.real.blockSize = tsize;		// storage size for munmap
-    } // if
-
-    void * area = &(block->data);						// adjust off header to user bytes
+		} // if
+
+		void * area = &(block->data);						// adjust off header to user bytes
 
 	#ifdef __CFA_DEBUG__
-    assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ?
-    __atomic_add_fetch( &allocFree, tsize, __ATOMIC_SEQ_CST );
-	if ( traceHeap() ) {
-		enum { BufferSize = 64 };
-		char helpText[BufferSize];
-		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
-		// int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
-		__cfaabi_dbg_bits_write( helpText, len );
-	} // if
+			assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ?
+			__atomic_add_fetch( &allocFree, tsize, __ATOMIC_SEQ_CST );
+		if ( traceHeap() ) {
+			enum { BufferSize = 64 };
+			char helpText[BufferSize];
+			int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
+			// int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
+			__cfaabi_dbg_bits_write( helpText, len );
+		} // if
 	#endif // __CFA_DEBUG__
 
-    return area;
+	return area;
 } // doMalloc
 
@@ -604,17 +620,17 @@
 static inline void doFree( void * addr ) with ( heapManager ) {
 	#ifdef __CFA_DEBUG__
-    if ( unlikely( heapManager.heapBegin == 0 ) ) {
-		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
-    } // if
+		if ( unlikely( heapManager.heapBegin == 0 ) ) {
+			abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
+		} // if
 	#endif // __CFA_DEBUG__
 
-    HeapManager.Storage.Header * header;
-    HeapManager.FreeHeader * freeElem;
-    size_t size, alignment;								// not used (see realloc)
-
-    if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST );
+	HeapManager.Storage.Header * header;
+	HeapManager.FreeHeader * freeElem;
+	size_t size, alignment;								// not used (see realloc)
+
+	if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
+		#ifdef __STATISTICS__
+			__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		if ( munmap( header, size ) == -1 ) {
@@ -625,30 +641,30 @@
 			#endif // __CFA_DEBUG__
 		} // if
-    } else {
+		} else {
 		#ifdef __CFA_DEBUG__
-		// Set free memory to garbage so subsequent usages might fail.
-		memset( ((HeapManager.Storage *)header)->data, '\377', freeElem->blockSize - sizeof( HeapManager.Storage ) );
+			// Set free memory to garbage so subsequent usages might fail.
+			memset( ((HeapManager.Storage *)header)->data, '\377', freeElem->blockSize - sizeof( HeapManager.Storage ) );
 		#endif // __CFA_DEBUG__
 
 		#ifdef __STATISTICS__
-		free_storage += size;
+			free_storage += size;
 		#endif // __STATISTICS__
 		#if defined( SPINLOCK )
-		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
-		header->kind.real.next = freeElem->freeList;	// push on stack
-		freeElem->freeList = (HeapManager.Storage *)header;
-		unlock( freeElem->lock );						// release spin lock
+			lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
+			header->kind.real.next = freeElem->freeList;	// push on stack
+			freeElem->freeList = (HeapManager.Storage *)header;
+			unlock( freeElem->lock );						// release spin lock
 		#else
-		freeElem->freeList.push( *(HeapManager.Storage *)header );
+			freeElem->freeList.push( *(HeapManager.Storage *)header );
 		#endif // SPINLOCK
-    } // if
+		} // if
 
 	#ifdef __CFA_DEBUG__
-    __atomic_add_fetch( &allocFree, -size, __ATOMIC_SEQ_CST );
-    if ( traceHeap() ) {
-		char helpText[64];
-		int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
-		__cfaabi_dbg_bits_write( helpText, len );
-    } // if
+   		 __atomic_add_fetch( &allocFree, -size, __ATOMIC_SEQ_CST );
+		if ( traceHeap() ) {
+			char helpText[64];
+			int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
+			__cfaabi_dbg_bits_write( helpText, len );
+		} // if
 	#endif // __CFA_DEBUG__
 } // doFree
@@ -656,14 +672,15 @@
 
 size_t checkFree( HeapManager & manager ) with ( manager ) {
-    size_t total = 0;
+	size_t total = 0;
 	#ifdef __STATISTICS__
-    __cfaabi_dbg_bits_acquire();
-    __cfaabi_dbg_bits_print_nolock( "\nBin lists (bin size : free blocks on list)\n" );
+		__cfaabi_dbg_bits_acquire();
+		__cfaabi_dbg_bits_print_nolock( "\nBin lists (bin size : free blocks on list)\n" );
 	#endif // __STATISTICS__
-    for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
+	for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
 		size_t size = freeLists[i].blockSize;
 		#ifdef __STATISTICS__
 		unsigned int N = 0;
 		#endif // __STATISTICS__
+
 		#if defined( SPINLOCK )
 		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) {
@@ -675,23 +692,24 @@
 			N += 1;
 			#endif // __STATISTICS__
-	    } // for
-		#ifdef __STATISTICS__
-	    __cfaabi_dbg_bits_print_nolock( "%7zu, %-7u  ", size, N );
-	    if ( (i + 1) % 8 == 0 ) __cfaabi_dbg_bits_print_nolock( "\n" );
+		} // for
+
+		#ifdef __STATISTICS__
+			__cfaabi_dbg_bits_print_nolock( "%7zu, %-7u  ", size, N );
+			if ( (i + 1) % 8 == 0 ) __cfaabi_dbg_bits_print_nolock( "\n" );
 		#endif // __STATISTICS__
 	} // for
 	#ifdef __STATISTICS__
-	__cfaabi_dbg_bits_print_nolock( "\ntotal free blocks:%zu\n", total );
-	__cfaabi_dbg_bits_release();
+		__cfaabi_dbg_bits_print_nolock( "\ntotal free blocks:%zu\n", total );
+		__cfaabi_dbg_bits_release();
 	#endif // __STATISTICS__
 	return (char *)heapEnd - (char *)heapBegin - total;
 } // checkFree
 
-
+// #comment TD : This is not a good name, plus this feels like it could easily be folded into doMalloc
 static inline void * malloc2( size_t size ) {			// necessary for malloc statistics
 	assert( heapManager.heapBegin != 0 );
-    void * area = doMalloc( size );
-    if ( unlikely( area == 0 ) ) errno = ENOMEM;		// POSIX
-    return area;
+	void * area = doMalloc( size );
+	if ( unlikely( area == 0 ) ) errno = ENOMEM;		// POSIX
+	return area;
 } // malloc2
 
@@ -699,58 +717,67 @@
 static inline void * memalign2( size_t alignment, size_t size ) { // necessary for malloc statistics
 #ifdef __CFA_DEBUG__
-    checkAlign( alignment );							// check alignment
+	checkAlign( alignment );							// check alignment
 #endif // __CFA_DEBUG__
 
-    // if alignment <= default alignment, do normal malloc as two headers are unnecessary
-    if ( unlikely( alignment <= libAlign() ) ) return malloc2( size );
-
-    // Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
-    // administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
-    //      .-------------v-----------------v----------------v----------,
-    //      | Real Header | ... padding ... |   Fake Header  | data ... |
-    //      `-------------^-----------------^-+--------------^----------'
-    //      |<--------------------------------' offset/align |<-- alignment boundary
-
-    // subtract libAlign() because it is already the minimum alignment
-    // add sizeof(Storage) for fake header
-    char * area = (char *)doMalloc( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
-    if ( unlikely( area == 0 ) ) return area;
-
-    // address in the block of the "next" alignment address
-    char * user = (char *)libCeiling( (uintptr_t)(area + sizeof(HeapManager.Storage)), alignment );
-
-    // address of header from malloc
-    HeapManager.Storage.Header * realHeader = headerAddr( area );
-    // address of fake header * before* the alignment location
-    HeapManager.Storage.Header * fakeHeader = headerAddr( user );
-    // SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
-    fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
-    // SKULLDUGGERY: odd alignment imples fake header
-    fakeHeader->kind.fake.alignment = alignment | 1;
-
-    return user;
+	// if alignment <= default alignment, do normal malloc as two headers are unnecessary
+	if ( unlikely( alignment <= libAlign() ) ) return malloc2( size );
+
+	// Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
+	// administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
+	//      .-------------v-----------------v----------------v----------,
+	//      | Real Header | ... padding ... |   Fake Header  | data ... |
+	//      `-------------^-----------------^-+--------------^----------'
+	//      |<--------------------------------' offset/align |<-- alignment boundary
+
+	// subtract libAlign() because it is already the minimum alignment
+	// add sizeof(Storage) for fake header
+	// #comment TD : this is the only place that calls doMalloc without calling malloc2, why ?
+	char * area = (char *)doMalloc( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
+	if ( unlikely( area == 0 ) ) return area;
+
+	// address in the block of the "next" alignment address
+	char * user = (char *)libCeiling( (uintptr_t)(area + sizeof(HeapManager.Storage)), alignment );
+
+	// address of header from malloc
+	HeapManager.Storage.Header * realHeader = headerAddr( area );
+	// address of fake header * before* the alignment location
+	HeapManager.Storage.Header * fakeHeader = headerAddr( user );
+	// SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
+	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
+	// SKULLDUGGERY: odd alignment imples fake header
+	fakeHeader->kind.fake.alignment = alignment | 1;
+
+	return user;
 } // memalign2
 
 
 extern "C" {
-    void * malloc( size_t size ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST );
+	// The malloc() function allocates size bytes and returns a pointer to the
+	// allocated memory. The memory is not initialized. If size is 0, then malloc()
+	// returns either NULL, or a unique pointer value that can later be successfully
+	// passed to free().
+	void * malloc( size_t size ) {
+		#ifdef __STATISTICS__
+			__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
 		return malloc2( size );
-    } // malloc
-
-
-    void * calloc( size_t noOfElems, size_t elemSize ) {
+		} // malloc
+
+	// The calloc() function allocates memory for an array of nmemb elements of
+	// size bytes each and returns a pointer to the allocated memory. The memory
+	// is set to zero. If nmemb or size is 0, then calloc() returns either NULL,
+	// or a unique pointer value that can later be successfully passed to free().
+		void * calloc( size_t noOfElems, size_t elemSize ) {
 		size_t size = noOfElems * elemSize;
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &calloc_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &calloc_storage, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
 		char * area = (char *)malloc2( size );
 		if ( unlikely( area == 0 ) ) return 0;
+
 		HeapManager.Storage.Header * header;
 		HeapManager.FreeHeader * freeElem;
@@ -762,14 +789,15 @@
 		#endif // __CFA_DEBUG__
 			memset( area, '\0', asize - sizeof(HeapManager.Storage) ); // set to zeros
+
 		header->kind.real.blockSize |= 2;		// mark as zero filled
 		return area;
-    } // calloc
-
-
-    void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
+		} // calloc
+
+	// #comment TD : Document this function
+	void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
 		size_t size = noOfElems * elemSize;
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -788,10 +816,18 @@
 
 		return area;
-    } // cmemalign
-
-
-    void * realloc( void * addr, size_t size ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+		} // cmemalign
+
+	// The realloc() function changes the size of the memory block pointed to by
+	// ptr to size bytes. The contents will be unchanged in the range from the
+	// start of the region up to the minimum of the old and new sizes. If the new
+	// size is larger than the old size, the added memory will not be initialized.
+	// If ptr is NULL, then the call is equivalent to malloc(size), for all values
+	// of size; if size is equal to zero, and ptr is not NULL, then the call is
+	// equivalent to free(ptr). Unless ptr is NULL, it must have been returned by
+	// an earlier call to malloc(), calloc() or realloc(). If the area pointed to
+	// was moved, a free(ptr) is done.
+		void * realloc( void * addr, size_t size ) {
+		#ifdef __STATISTICS__
+			__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -812,5 +848,5 @@
 
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -835,8 +871,12 @@
 		free( addr );
 		return area;
-    } // realloc
-
-
-    void * memalign( size_t alignment, size_t size ) {
+	} // realloc
+
+
+	// The obsolete function memalign() allocates size bytes and returns
+	// a pointer to the allocated memory. The memory address will be a
+	// multiple of alignment, which must be a power of two.
+	void * memalign( size_t alignment, size_t size ) __attribute__ ((deprecated));
+		void * memalign( size_t alignment, size_t size ) {
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST );
@@ -847,37 +887,53 @@
 
 		return area;
-    } // memalign
-
-
-    void * aligned_alloc( size_t alignment, size_t size ) {
+		} // memalign
+
+	// The function aligned_alloc() is the same as memalign(), except for
+	// the added restriction that size should be a multiple of alignment.
+	void * aligned_alloc( size_t alignment, size_t size ) {
 		return memalign( alignment, size );
-    } // aligned_alloc
-
-
-    int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
+	} // aligned_alloc
+
+
+	// The function posix_memalign() allocates size bytes and places the address
+	// of the allocated memory in *memptr. The address of the allocated memory
+	// will be a multiple of alignment, which must be a power of two and a multiple
+	// of sizeof(void *). If size is 0, then posix_memalign() returns either NULL,
+	// or a unique pointer value that can later be successfully passed to free(3).
+	int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
 		if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
 		* memptr = memalign( alignment, size );
 		if ( unlikely( * memptr == 0 ) ) return ENOMEM;
 		return 0;
-    } // posix_memalign
-
-
-    void * valloc( size_t size ) {
+	} // posix_memalign
+
+	// The obsolete function valloc() allocates size bytes and returns a pointer
+	// to the allocated memory. The memory address will be a multiple of the page size.
+	// It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
+	void * valloc( size_t size ) __attribute__ ((deprecated));
+	void * valloc( size_t size ) {
 		return memalign( pageSize, size );
-    } // valloc
-
-
-    void free( void * addr ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
+	} // valloc
+
+
+	// The free() function frees the memory space pointed to by ptr, which must
+	// have been returned by a previous call to malloc(), calloc() or realloc().
+	// Otherwise, or if free(ptr) has already been called before, undefined
+	// behavior occurs. If ptr is NULL, no operation is performed.
+	void free( void * addr ) {
+		#ifdef __STATISTICS__
+			__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		// #comment TD : To decrease nesting I would but the special case in the
+		//               else instead, plus it reads more naturally to have the
+		//               short / normal case instead
 		if ( unlikely( addr == 0 ) ) {					// special case
 			#ifdef __CFA_DEBUG__
-			if ( traceHeap() ) {
-				#define nullmsg "Free( 0x0 ) size:0\n"
-				// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
-				__cfaabi_dbg_bits_write( nullmsg, sizeof(nullmsg) - 1 );
-			} // if
+				if ( traceHeap() ) {
+					#define nullmsg "Free( 0x0 ) size:0\n"
+					// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
+					__cfaabi_dbg_bits_write( nullmsg, sizeof(nullmsg) - 1 );
+				} // if
 			#endif // __CFA_DEBUG__
 			return;
@@ -885,26 +941,35 @@
 
 		doFree( addr );
-    } // free
-
-
-    int mallopt( int option, int value ) {
+	} // free
+
+	// The mallopt() function adjusts parameters that control the behavior of the
+	// memory-allocation functions (see malloc(3)). The param argument specifies
+	// the parameter to be modified, and value specifies the new value for that
+	// parameter.
+		int mallopt( int option, int value ) {
 		choose( option ) {
-		  case M_TOP_PAD:
-			if ( setHeapExpand( value ) ) fallthru default;
-		  case M_MMAP_THRESHOLD:
-			if ( setMmapStart( value ) ) fallthru default;
-		  default:
-			return 1;									// success, or unsupported
+			case M_TOP_PAD:
+				if ( setHeapExpand( value ) ) fallthru default;
+			case M_MMAP_THRESHOLD:
+				if ( setMmapStart( value ) ) fallthru default;
+			default:
+				// #comment TD : 1 for unsopported feels wrong
+				return 1;									// success, or unsupported
 		} // switch
 		return 0;										// error
-    } // mallopt
-
-
+	} // mallopt
+
+	// The malloc_trim() function attempts to release free memory at the top
+	// of the heap (by calling sbrk(2) with a suitable argument).
 	int malloc_trim( size_t ) {
 		return 0;										// => impossible to release memory
 	} // malloc_trim
 
-    size_t malloc_usable_size( void * addr ) {
+	// The malloc_usable_size() function returns the number of usable bytes in the
+	// block pointed to by ptr, a pointer to a block of memory allocated by
+	// malloc(3) or a related function.
+		size_t malloc_usable_size( void * addr ) {
 		if ( unlikely( addr == 0 ) ) return 0;			// null allocation has 0 size
+
 		HeapManager.Storage.Header * header;
 		HeapManager.FreeHeader * freeElem;
@@ -914,8 +979,9 @@
 		size_t usize = size - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
 		return usize;
-    } // malloc_usable_size
-
-
-    size_t malloc_alignment( void * addr ) {
+	} // malloc_usable_size
+
+
+		// #comment TD : Document this function
+	size_t malloc_alignment( void * addr ) {
 		if ( unlikely( addr == 0 ) ) return libAlign();	// minimum alignment
 		HeapManager.Storage.Header * header = (HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) );
@@ -925,9 +991,11 @@
 			return libAlign ();							// minimum alignment
 		} // if
-    } // malloc_alignment
-
-
-    bool malloc_zero_fill( void * addr ) {
+		} // malloc_alignment
+
+
+		// #comment TD : Document this function
+	bool malloc_zero_fill( void * addr ) {
 		if ( unlikely( addr == 0 ) ) return false;		// null allocation is not zero fill
+
 		HeapManager.Storage.Header * header = (HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) );
 		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
@@ -935,26 +1003,28 @@
 		} // if
 		return (header->kind.real.blockSize & 2) != 0;	// zero filled (calloc/cmemalign) ?
-    } // malloc_zero_fill
-
-
-    void malloc_stats( void ) {
-		#ifdef __STATISTICS__
-		printStats();
-		if ( checkFree() ) checkFree( heapManager );
-		#endif // __STATISTICS__
-    } // malloc_stats
-
-
-    int malloc_stats_fd( int fd ) {
-		#ifdef __STATISTICS__
-		int temp = statfd;
-		statfd = fd;
-		return temp;
+		} // malloc_zero_fill
+
+
+	// #comment TD : Document this function
+	void malloc_stats( void ) {
+		#ifdef __STATISTICS__
+			printStats();
+			if ( checkFree() ) checkFree( heapManager );
+		#endif // __STATISTICS__
+		} // malloc_stats
+
+	// #comment TD : Document this function
+		int malloc_stats_fd( int fd ) {
+		#ifdef __STATISTICS__
+			int temp = statfd;
+			statfd = fd;
+			return temp;
 		#else
-		return -1;
-		#endif // __STATISTICS__
-    } // malloc_stats_fd
-
-
+			return -1;
+		#endif // __STATISTICS__
+		} // malloc_stats_fd
+
+
+	// #comment TD : Document this function
 	int malloc_info( int options, FILE * stream ) {
 		return printStatsXML( stream );
@@ -962,8 +1032,8 @@
 
 
+	// #comment TD : What are these two functions for?
 	void * malloc_get_state( void ) {
 		return 0;
 	} // malloc_get_state
-
 
 	int malloc_set_state( void * ptr ) {
