Index: libcfa/src/containers/stackLockFree.hfa
===================================================================
--- libcfa/src/containers/stackLockFree.hfa	(revision 9c438546124986f69543f60d68dfa32a64f1ad68)
+++ libcfa/src/containers/stackLockFree.hfa	(revision 9c438546124986f69543f60d68dfa32a64f1ad68)
@@ -0,0 +1,64 @@
+// 
+// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// stackLockFree.hfa -- 
+// 
+// Author           : Peter A. Buhr
+// Created On       : Wed May 13 20:58:58 2020
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Sun May 17 20:53:37 2020
+// Update Count     : 48
+// 
+
+#pragma once
+
+#include <stdint.h>
+
+forall( dtype T )
+union Link {
+	struct {									// 32/64-bit x 2
+		T * top;								// pointer to stack top
+		uintptr_t count;						// count each push
+	};
+	#if _GLIBCXX_USE_INT128 == 1
+	__int128									// gcc, 128-bit integer
+	#else
+	uint64_t									// 64-bit integer
+	#endif // _GLIBCXX_USE_INT128 == 1
+	atom;
+}; // Link
+
+forall( otype T | { Link(T) * getNext( T * ); } ) {
+    struct StackLF {
+		Link(T) stack;
+	}; // StackLF
+
+	static inline {
+		void ?{}( StackLF(T) & this ) with(this) { stack.atom = 0; }
+
+		T * top( StackLF(T) & this ) with(this) { return stack.top; }
+
+		void push( StackLF(T) & this, T & n ) with(this) {
+			for () {									// busy wait
+				*getNext( &n ) = stack;					// atomic assignment unnecessary, or use CAA
+				if ( __atomic_compare_exchange_n( &stack.atom, &getNext( &n )->atom, (Link(T))@{ {&n, getNext( &n )->count + 1} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) break; // attempt to update top node
+			} // for
+		} // push
+
+		T * pop( StackLF(T) & this ) with(this) {
+			Link(T) t @= {};
+			for () {									// busy wait
+				t = stack;								// atomic assignment unnecessary, or use CAA
+			  if ( t.top == 0p ) return 0p;				// empty stack ?
+			  if ( __atomic_compare_exchange_n( &stack.atom, &t.atom, (Link(T))@{ {getNext( t.top )->top, t.count} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) return t.top; // attempt to update top node
+			} // for
+		} // pop
+	} // distribution
+} // distribution
+
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision 2223c8098e0dfae91c6eecb79a740021ef48f3d2)
+++ libcfa/src/heap.cfa	(revision 9c438546124986f69543f60d68dfa32a64f1ad68)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed May  6 17:29:26 2020
-// Update Count     : 727
+// Last Modified On : Sun May 17 20:58:17 2020
+// Update Count     : 762
 //
 
@@ -128,6 +128,9 @@
 #define LOCKFREE 1
 #define BUCKETLOCK SPINLOCK
-#if BUCKETLOCK == LOCKFREE
-#include <uStackLF.h>
+#if BUCKETLOCK == SPINLOCK
+#elif BUCKETLOCK == LOCKFREE
+#include <stackLockFree.hfa>
+#else
+	#error undefined lock type for bucket lock
 #endif // LOCKFREE
 
@@ -137,6 +140,4 @@
 
 struct HeapManager {
-//	struct FreeHeader;									// forward declaration
-
 	struct Storage {
 		struct Header {									// header
@@ -146,31 +147,31 @@
 						struct {						// 4-byte word => 8-byte header, 8-byte word => 16-byte header
 							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
-							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
 							#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
 
 							union {
-//								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
+								// FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
 								// 2nd low-order bit => zero filled
 								void * home;			// allocated block points back to home locations (must overlay alignment)
 								size_t blockSize;		// size for munmap (must overlay alignment)
-								#if BUCKLOCK == SPINLOCK
+								#if BUCKETLOCK == SPINLOCK
 								Storage * next;			// freed block points next freed block of same size
 								#endif // SPINLOCK
 							};
+							size_t size;				// allocation size in bytes
 
 							#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
-							uint32_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
 							#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
 						};
-						// future code
-						#if BUCKLOCK == LOCKFREE
-						Stack<Storage>::Link next;		// freed block points next freed block of same size (double-wide)
+						#if BUCKETLOCK == LOCKFREE
+						Link(Storage) next;				// freed block points next freed block of same size (double-wide)
 						#endif // LOCKFREE
 					};
 				} real; // RealHeader
+
 				struct FakeHeader {
 					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-					// 1st low-order bit => fake header & alignment
-					uint32_t alignment;
+					uint32_t alignment;					// 1st low-order bit => fake header & alignment
 					#endif // __ORDER_LITTLE_ENDIAN__
 
@@ -182,5 +183,4 @@
 				} fake; // FakeHeader
 			} kind; // Kind
-			size_t size;								// allocation size in bytes
 		} header; // Header
 		char pad[libAlign() - sizeof( Header )];
@@ -191,13 +191,10 @@
 
 	struct FreeHeader {
-		#if BUCKLOCK == SPINLOCK
+		#if BUCKETLOCK == SPINLOCK
 		__spinlock_t lock;								// must be first field for alignment
 		Storage * freeList;
-		#elif BUCKLOCK == LOCKFREE
-		// future code
-		StackLF<Storage> freeList;
 		#else
-			#error undefined lock type for bucket lock
-		#endif // SPINLOCK
+		StackLF(Storage) freeList;
+		#endif // BUCKETLOCK
 		size_t blockSize;								// size of allocations on this list
 	}; // FreeHeader
@@ -211,4 +208,10 @@
 	size_t heapRemaining;								// amount of storage not allocated in the current chunk
 }; // HeapManager
+
+#if BUCKETLOCK == LOCKFREE
+static inline Link(HeapManager.Storage) * getNext( HeapManager.Storage * this ) { return &this->header.kind.real.next; }
+static inline void ?{}( HeapManager.FreeHeader & ) {}
+static inline void ^?{}( HeapManager.FreeHeader & ) {}
+#endif // LOCKFREE
 
 static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
@@ -251,4 +254,6 @@
 static bool heapBoot = 0;								// detect recursion during boot
 #endif // __CFA_DEBUG__
+
+// The constructor for heapManager is called explicitly in memory_startup.
 static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
 
@@ -354,11 +359,4 @@
 
 
-// static inline void noMemory() {
-// 	abort( "Heap memory exhausted at %zu bytes.\n"
-// 		   "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
-// 		   ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
-// } // noMemory
-
-
 // thunk problem
 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
@@ -406,4 +404,11 @@
 
 
+// static inline void noMemory() {
+// 	abort( "Heap memory exhausted at %zu bytes.\n"
+// 		   "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
+// 		   ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
+// } // noMemory
+
+
 static inline void checkAlign( size_t alignment ) {
 	if ( alignment < libAlign() || ! libPow2( alignment ) ) {
@@ -433,5 +438,6 @@
 
 
-static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
+static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem,
+							size_t & size, size_t & alignment ) with( heapManager ) {
 	header = headerAddr( addr );
 
@@ -465,5 +471,5 @@
 
 
-static inline void * extend( size_t size ) with ( heapManager ) {
+static inline void * extend( size_t size ) with( heapManager ) {
 	lock( extlock __cfaabi_dbg_ctx2 );
 	ptrdiff_t rem = heapRemaining - size;
@@ -496,5 +502,5 @@
 
 
-static inline void * doMalloc( size_t size ) with ( heapManager ) {
+static inline void * doMalloc( size_t size ) with( heapManager ) {
 	HeapManager.Storage * block;						// pointer to new block of storage
 
@@ -529,14 +535,14 @@
 		// Spin until the lock is acquired for this particular size of block.
 
-		#if defined( SPINLOCK )
+		#if BUCKETLOCK == SPINLOCK
 		lock( freeElem->lock __cfaabi_dbg_ctx2 );
 		block = freeElem->freeList;						// remove node from stack
 		#else
-		block = freeElem->freeList.pop();
-		#endif // SPINLOCK
+		block = pop( freeElem->freeList );
+		#endif // BUCKETLOCK
 		if ( unlikely( block == 0p ) ) {				// no free block ?
-			#if defined( SPINLOCK )
+			#if BUCKETLOCK == SPINLOCK
 			unlock( freeElem->lock );
-			#endif // SPINLOCK
+			#endif // BUCKETLOCK
 
 			// Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
@@ -544,10 +550,10 @@
 
 			block = (HeapManager.Storage *)extend( tsize );	// mutual exclusion on call
-  if ( unlikely( block == 0p ) ) return 0p;
-		#if defined( SPINLOCK )
+	if ( unlikely( block == 0p ) ) return 0p;
+		#if BUCKETLOCK == SPINLOCK
 		} else {
 			freeElem->freeList = block->header.kind.real.next;
 			unlock( freeElem->lock );
-		#endif // SPINLOCK
+		#endif // BUCKETLOCK
 		} // if
 
@@ -572,5 +578,5 @@
 	} // if
 
-	block->header.size = size;							// store allocation size
+	block->header.kind.real.size = size;				// store allocation size
 	void * addr = &(block->data);						// adjust off header to user bytes
 
@@ -591,5 +597,5 @@
 
 
-static inline void doFree( void * addr ) with ( heapManager ) {
+static inline void doFree( void * addr ) with( heapManager ) {
 	#ifdef __CFA_DEBUG__
 	if ( unlikely( heapManager.heapBegin == 0p ) ) {
@@ -623,5 +629,5 @@
 		free_storage += size;
 		#endif // __STATISTICS__
-		#if defined( SPINLOCK )
+		#if BUCKETLOCK == SPINLOCK
 		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
 		header->kind.real.next = freeElem->freeList;	// push on stack
@@ -629,6 +635,6 @@
 		unlock( freeElem->lock );						// release spin lock
 		#else
-		freeElem->freeList.push( *(HeapManager.Storage *)header );
-		#endif // SPINLOCK
+		push( freeElem->freeList, *(HeapManager.Storage *)header );
+		#endif // BUCKETLOCK
 	} // if
 
@@ -645,5 +651,5 @@
 
 
-size_t prtFree( HeapManager & manager ) with ( manager ) {
+size_t prtFree( HeapManager & manager ) with( manager ) {
 	size_t total = 0;
 	#ifdef __STATISTICS__
@@ -657,9 +663,11 @@
 		#endif // __STATISTICS__
 
-		#if defined( SPINLOCK )
+		#if BUCKETLOCK == SPINLOCK
 		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
 		#else
-		for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0p; p = p->header.kind.real.next.top ) {
-		#endif // SPINLOCK
+		for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; /* p = getNext( p )->top */) {
+			typeof(p) temp = getNext( p )->top;			// FIX ME: direct assignent fails, initialization works
+			p = temp;
+		#endif // BUCKETLOCK
 			total += size;
 			#ifdef __STATISTICS__
@@ -681,5 +689,5 @@
 
 
-static void ?{}( HeapManager & manager ) with ( manager ) {
+static void ?{}( HeapManager & manager ) with( manager ) {
 	pageSize = sysconf( _SC_PAGESIZE );
 
@@ -1095,5 +1103,5 @@
 			header = realHeader( header );				// backup from fake to real header
 		} // if
-		return header->size;
+		return header->kind.real.size;
 	} // malloc_size
 
@@ -1105,6 +1113,6 @@
 			header = realHeader( header );				// backup from fake to real header
 		} // if
-		size_t ret = header->size;
-		header->size = size;
+		size_t ret = header->kind.real.size;
+		header->kind.real.size = size;
 		return ret;
 	} // $malloc_size_set
