Index: benchmark/io/http/worker.hfa
===================================================================
--- benchmark/io/http/worker.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ benchmark/io/http/worker.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -2,5 +2,5 @@
 
 #include <iofwd.hfa>
-#include <queueLockFree.hfa>
+#include <containers/lockfree.hfa>
 #include <thread.hfa>
 
Index: doc/theses/mike_brooks_MMath/programs/hello-md.cfa
===================================================================
--- doc/theses/mike_brooks_MMath/programs/hello-md.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ doc/theses/mike_brooks_MMath/programs/hello-md.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -1,12 +1,3 @@
 #include "array.hfa"
-
-
-trait ix( C &, E &, ztype(N) ) {
-    E & ?[?]( C &, ptrdiff_t );
-    void __taglen( tag(C), tag(N) );
-};
-
-forall( ztype(Zn), ztype(S), Timmed &, Tbase & )
-void __taglen( tag(arpk(Zn, S, Timmed, Tbase)), tag(Zn) ) {}
 
 
@@ -38,8 +29,17 @@
 
 
-forall( ztype( N ) )
+
+
+
+
+
+
+
+
+
+forall( [N] )
 void print1d_cstyle( array(float, N) & c );
 
-forall( C &, ztype( N ) | ix( C, float, N ) )
+forall( [N], C & | ar( C, float, N ) )
 void print1d( C & c );
 
@@ -58,7 +58,7 @@
 
 
-forall( ztype( N ) )
+forall( [N] )
 void print1d_cstyle( array(float, N) & c ) {
-    for( i; z(N) ) {
+    for( i; N ) {
         printf("%.1f  ", c[i]);
     }
@@ -78,7 +78,7 @@
 
 
-forall( C &, ztype( N ) | ix( C, float, N ) )
+forall( [N], C & | ar( C, float, N ) )
 void print1d( C & c ) {
-    for( i; z(N) ) {
+    for( i; N ) {
         printf("%.1f  ", c[i]);
     }
@@ -99,9 +99,9 @@
 
 
-void fill( array(float, Z(5), Z(7)) & a ) {
+void fill( array(float, 5, 7) & a ) {
     for ( i; (ptrdiff_t) 5 ) {
         for ( j; 7 ) {
-            a[[i,j]] = 1.0 * i + 0.1 * j;
-            printf("%.1f  ", a[[i,j]]);
+            a[i,j] = 1.0 * i + 0.1 * j;
+            printf("%.1f  ", a[i,j]);
         }
         printf("\n");
@@ -118,5 +118,5 @@
 
 
-array( float, Z(5), Z(7) ) a;
+array( float, 5, 7 ) a;
 fill(a);
 /*
@@ -148,10 +148,10 @@
 
 
-print1d( a[[ 2, all ]] );  // 2.0  2.1  2.2  2.3  2.4  2.5  2.6
-print1d( a[[ all, 3 ]] );  // 0.3  1.3  2.3  3.3  4.3
+print1d( a[ 2, all ] );  // 2.0  2.1  2.2  2.3  2.4  2.5  2.6
+print1d( a[ all, 3 ] );  // 0.3  1.3  2.3  3.3  4.3
 
 
 
-print1d_cstyle( a[[ 2, all ]] );
+print1d_cstyle( a[ 2, all ] );
 
 
@@ -161,7 +161,7 @@
 
 
-#ifdef SHOWERR1
+#ifdef SHOW_ERROR_1
 
-print1d_cstyle( a[[ all, 2 ]] );  // bad
+print1d_cstyle( a[ all, 2 ] );  // bad
 
 #endif
Index: doc/theses/thierry_delisle_PhD/thesis/text/front.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/text/front.tex	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ doc/theses/thierry_delisle_PhD/thesis/text/front.tex	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -161,5 +161,5 @@
 Thanks to Andrew Beach, Michael Brooks, Colby Parsons, Mubeen Zulfiqar, Fangren Yu and Jiada Liang for their work on the \CFA project as well as all the discussions which have helped me concretize the ideas in this thesis.
 
-Finally, I acknowledge that this has been possible thanks to the financial help offered by the David R. Cheriton School of Computer Science and the corporate partnership with Huawei Ltd.
+Finally, I acknowledge that this has been possible thanks to the financial help offered by the David R. Cheriton School of Computer Science, the corporate partnership with Huawei Ltd. and the Natural Sciences and Engineering Research Council.
 \cleardoublepage
 
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/Makefile.am	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -62,6 +62,5 @@
 	containers/array.hfa \
 	containers/list.hfa \
-	containers/queueLockFree.hfa \
-	containers/stackLockFree.hfa \
+	containers/lockfree.hfa \
 	containers/string_sharectx.hfa \
 	containers/vector2.hfa \
@@ -112,4 +111,5 @@
 	concurrency/invoke.h \
 	concurrency/future.hfa \
+	concurrency/once.hfa \
 	concurrency/kernel/fwd.hfa \
 	concurrency/mutex_stmt.hfa
@@ -127,4 +127,5 @@
 
 thread_libsrc = ${inst_thread_headers_src} ${inst_thread_headers_src:.hfa=.cfa} \
+	interpose_thread.cfa \
 	bits/signal.hfa \
 	concurrency/clib/cfathread.cfa \
@@ -145,5 +146,6 @@
 	concurrency/stats.cfa \
 	concurrency/stats.hfa \
-	concurrency/stats.hfa
+	concurrency/stats.hfa \
+	concurrency/pthread.cfa
 
 else
Index: libcfa/src/bits/containers.hfa
===================================================================
--- libcfa/src/bits/containers.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/bits/containers.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -152,4 +152,5 @@
 
 		void append( __queue(T) & this, T * val ) with(this) {
+			verify(get_next( *val ) == 0p);
 			verify(this.tail != 0p);
 			verify(*this.tail == 1p);
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/bits/defs.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -30,8 +30,6 @@
 #ifdef __cforall
 #define __cfa_anonymous_object(x) inline struct x
-#define __cfa_dlink(x) inline dlink(x)
 #else
 #define __cfa_anonymous_object(x) struct x __cfa_anonymous_object
-#define __cfa_dlink(x) struct { struct x * next; struct x * back; } __dlink_substitute
 #endif
 
Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -172,9 +172,9 @@
 
 		pthread_attr_t attr;
-		if (int ret = pthread_attr_init(&attr); 0 != ret) {
+		if (int ret = __cfaabi_pthread_attr_init(&attr); 0 != ret) {
 			abort | "failed to create master epoll thread attr: " | ret | strerror(ret);
 		}
 
-		if (int ret = pthread_create(&master_poller, &attr, master_epoll, 0p); 0 != ret) {
+		if (int ret = __cfaabi_pthread_create(&master_poller, &attr, master_epoll, 0p); 0 != ret) {
 			abort | "failed to create master epoll thread: " | ret | strerror(ret);
 		}
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/invoke.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -146,9 +146,20 @@
 
 	// Link lists fields
-	// instrusive link field for threads
+	// instrusive link field for threads in the ready-queue
 	struct __thread_desc_link {
 		struct thread$ * next;
 		volatile unsigned long long ts;
 	};
+
+	// Link lists fields
+	// instrusive link field for threads in the user_link/cltr_link
+	struct __thread_user_link {
+		#ifdef __cforall
+			inline dlink(thread$);
+		#else
+			struct thread$ * next; struct thread$ * back;
+		#endif
+	};
+	_Static_assert(sizeof(struct __thread_user_link) == 2 * sizeof(struct thread$ *), "__thread_user_link should be consistent in C and Cforall");
 
 	struct thread$ {
@@ -159,5 +170,5 @@
 		// Link lists fields
 		// instrusive link field for threads
-		struct __thread_desc_link link;
+		struct __thread_desc_link rdy_link;
 
 		// current execution status for coroutine
@@ -195,11 +206,11 @@
 		struct __monitor_group_t monitors;
 
-		// used to put threads on dlist data structure
-		__cfa_dlink(thread$);
-
-		struct {
-			struct thread$ * next;
-			struct thread$ * prev;
-		} node;
+		// intrusive link fields, used for locks, monitors and any user defined data structure
+		// default link fields for dlist
+		struct __thread_user_link user_link;
+
+		// secondary intrusive link fields, used for global cluster list
+		// default link fields for dlist
+		struct __thread_user_link cltr_link;
 
 		// used to store state between clh lock/unlock
@@ -214,10 +225,9 @@
 
 		#if defined( __CFA_WITH_VERIFY__ )
+			struct processor * volatile executing;
 			void * canary;
 		#endif
 	};
-	#ifdef __cforall
-		P9_EMBEDDED( thread$, dlink(thread$) )
-	#endif
+
 	// Wrapper for gdb
 	struct cfathread_thread_t { struct thread$ debug; };
@@ -231,12 +241,26 @@
 	#ifdef __cforall
 	extern "Cforall" {
+		static inline thread$ * volatile & ?`next ( thread$ * this ) {
+			return this->user_link.next;
+		}
 
 		static inline thread$ *& get_next( thread$ & this ) __attribute__((const)) {
-			return this.link.next;
-		}
-
-		static inline [thread$ *&, thread$ *& ] __get( thread$ & this ) __attribute__((const)) {
-			return this.node.[next, prev];
-		}
+			return this.user_link.next;
+		}
+
+		static inline tytagref( dlink(thread$), dlink(thread$) ) ?`inner( thread$ & this ) {
+			dlink(thread$) & b = this.user_link;
+			tytagref( dlink(thread$), dlink(thread$) ) result = { b };
+			return result;
+		}
+
+		static inline tytagref(struct __thread_user_link, dlink(thread$)) ?`inner( struct thread$ & this ) {
+			struct __thread_user_link & ib = this.cltr_link;
+			dlink(thread$) & b = ib`inner;
+			tytagref(struct __thread_user_link, dlink(thread$)) result = { b };
+			return result;
+		}
+
+		P9_EMBEDDED(struct __thread_user_link, dlink(thread$))
 
 		static inline void ?{}(__monitor_group_t & this) {
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/io.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -610,5 +610,5 @@
 		if( we ) {
 			sigval_t value = { PREEMPT_IO };
-			pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
+			__cfaabi_pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
 		}
 
@@ -639,91 +639,3 @@
 		}
 	}
-
-	#if defined(CFA_WITH_IO_URING_IDLE)
-		bool __kernel_read(struct processor * proc, io_future_t & future, iovec & iov, int fd) {
-			io_context$ * ctx = proc->io.ctx;
-			/* paranoid */ verify( ! __preemption_enabled() );
-			/* paranoid */ verify( proc == __cfaabi_tls.this_processor );
-			/* paranoid */ verify( ctx );
-
-			__u32 idx;
-			struct io_uring_sqe * sqe;
-
-			// We can proceed to the fast path
-			if( !__alloc(ctx, &idx, 1) ) {
-				/* paranoid */ verify( false ); // for now check if this happens, next time just abort the sleep.
-				return false;
-			}
-
-			// Allocation was successful
-			__fill( &sqe, 1, &idx, ctx );
-
-			sqe->user_data = (uintptr_t)&future;
-			sqe->flags = 0;
-			sqe->fd = fd;
-			sqe->off = 0;
-			sqe->ioprio = 0;
-			sqe->fsync_flags = 0;
-			sqe->__pad2[0] = 0;
-			sqe->__pad2[1] = 0;
-			sqe->__pad2[2] = 0;
-
-			#if defined(CFA_HAVE_IORING_OP_READ)
-				sqe->opcode = IORING_OP_READ;
-				sqe->addr = (uint64_t)iov.iov_base;
-				sqe->len = iov.iov_len;
-			#elif defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV)
-				sqe->opcode = IORING_OP_READV;
-				sqe->addr = (uintptr_t)&iov;
-				sqe->len = 1;
-			#else
-				#error CFA_WITH_IO_URING_IDLE but none of CFA_HAVE_READV, CFA_HAVE_IORING_OP_READV or CFA_HAVE_IORING_OP_READ defined
-			#endif
-
-			asm volatile("": : :"memory");
-
-			/* paranoid */ verify( sqe->user_data == (uintptr_t)&future );
-			__submit_only( ctx, &idx, 1 );
-
-			/* paranoid */ verify( proc == __cfaabi_tls.this_processor );
-			/* paranoid */ verify( ! __preemption_enabled() );
-
-			return true;
-		}
-
-		void __cfa_io_idle( struct processor * proc ) {
-			iovec iov;
-			__atomic_acquire( &proc->io.ctx->cq.lock );
-
-			__attribute__((used)) volatile bool was_reset = false;
-
-			with( proc->idle_wctx) {
-
-				// Do we already have a pending read
-				if(available(*ftr)) {
-					// There is no pending read, we need to add one
-					reset(*ftr);
-
-					iov.iov_base = rdbuf;
-					iov.iov_len  = sizeof(eventfd_t);
-					__kernel_read(proc, *ftr, iov, evfd );
-					ftr->result = 0xDEADDEAD;
-					*((eventfd_t *)rdbuf) = 0xDEADDEADDEADDEAD;
-					was_reset = true;
-				}
-			}
-
-			if( !__atomic_load_n( &proc->do_terminate, __ATOMIC_SEQ_CST ) ) {
-				__ioarbiter_flush( *proc->io.ctx );
-				proc->idle_wctx.sleep_time = rdtscl();
-				ioring_syscsll( *proc->io.ctx, 1, IORING_ENTER_GETEVENTS);
-			}
-
-			ready_schedule_lock();
-			__cfa_do_drain( proc->io.ctx, proc->cltr );
-			ready_schedule_unlock();
-
-			asm volatile ("" :: "m" (was_reset));
-		}
-	#endif
 #endif
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -34,5 +34,4 @@
 	bool __cfa_io_flush( processor * proc ) { return false; }
 	bool __cfa_io_drain( processor * proc ) __attribute__((nonnull (1))) { return false; }
-	void __cfa_io_idle ( processor * ) __attribute__((nonnull (1))) {}
 	void __cfa_io_stop ( processor * proc ) {}
 
@@ -317,45 +316,4 @@
 	}
 
-//=============================================================================================
-// I/O Context Sleep
-//=============================================================================================
-	// static inline void __epoll_ctl(io_context$ & ctx, int op, const char * error) {
-	// 	struct epoll_event ev;
-	// 	ev.events = EPOLLIN | EPOLLONESHOT;
-	// 	ev.data.u64 = (__u64)&ctx;
-	// 	int ret = epoll_ctl(iopoll.epollfd, op, ctx.efd, &ev);
-	// 	if (ret < 0) {
-	// 		abort( "KERNEL ERROR: EPOLL %s - (%d) %s\n", error, (int)errno, strerror(errno) );
-	// 	}
-	// }
-
-	// static void __epoll_register(io_context$ & ctx) {
-	// 	__epoll_ctl(ctx, EPOLL_CTL_ADD, "ADD");
-	// }
-
-	// static void __epoll_unregister(io_context$ & ctx) {
-	// 	// Read the current epoch so we know when to stop
-	// 	size_t curr = __atomic_load_n(&iopoll.epoch, __ATOMIC_SEQ_CST);
-
-	// 	// Remove the fd from the iopoller
-	// 	__epoll_ctl(ctx, EPOLL_CTL_DEL, "REMOVE");
-
-	// 	// Notify the io poller thread of the shutdown
-	// 	iopoll.run = false;
-	// 	sigval val = { 1 };
-	// 	pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
-
-	// 	// Make sure all this is done
-	// 	__atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-	// 	// Wait for the next epoch
-	// 	while(curr == iopoll.epoch && !iopoll.stopped) Pause();
-	// }
-
-	// void __ioctx_prepare_block(io_context$ & ctx) {
-	// 	__cfadbg_print_safe(io_core, "Kernel I/O - epoll : Re-arming io poller %d (%p)\n", ctx.fd, &ctx);
-	// 	__epoll_ctl(ctx, EPOLL_CTL_MOD, "REARM");
-	// }
-
 
 //=============================================================================================
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/kernel.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -138,9 +138,5 @@
 extern bool __cfa_io_drain( processor * proc ) __attribute__((nonnull (1)));
 extern bool __cfa_io_flush( processor * ) __attribute__((nonnull (1)));
-extern void __cfa_io_idle( processor * ) __attribute__((nonnull (1)));
-
-#if defined(CFA_WITH_IO_URING_IDLE)
-	extern bool __kernel_read(processor * proc, io_future_t & future, iovec &, int fd);
-#endif
+
 
 extern void __disable_interrupts_hard();
@@ -162,11 +158,4 @@
 	verify(this);
 
-	/* paranoid */ verify( this->idle_wctx.ftr   != 0p );
-	/* paranoid */ verify( this->idle_wctx.rdbuf != 0p );
-
-	// used for idle sleep when io_uring is present
-	// mark it as already fulfilled so we know if there is a pending request or not
-	this->idle_wctx.ftr->self.ptr = 1p;
-
 	__cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -291,5 +280,5 @@
 	/* paranoid */ verify( ! __preemption_enabled() );
 	/* paranoid */ verifyf( thrd_dst->state == Ready || thrd_dst->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", thrd_dst->state, thrd_dst->preempted);
-	/* paranoid */ verifyf( thrd_dst->link.next == 0p, "Expected null got %p", thrd_dst->link.next );
+	/* paranoid */ verifyf( thrd_dst->rdy_link.next == 0p, "Expected null got %p", thrd_dst->rdy_link.next );
 	__builtin_prefetch( thrd_dst->context.SP );
 
@@ -321,4 +310,5 @@
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
+		/* paranoid */ verify( __atomic_exchange_n( &thrd_dst->executing, this, __ATOMIC_SEQ_CST) == 0p );
 		/* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
 
@@ -332,8 +322,9 @@
 
 		/* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
+		/* paranoid */ verify( __atomic_exchange_n( &thrd_dst->executing, 0p, __ATOMIC_SEQ_CST) == this );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
+		/* paranoid */ verify( thrd_dst->state != Halted );
 		/* paranoid */ verify( thrd_dst->context.SP );
-		/* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
 		/* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
 		/* paranoid */ verify( ! __preemption_enabled() );
@@ -452,5 +443,5 @@
 					"Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
 	/* paranoid */ #endif
-	/* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
+	/* paranoid */ verifyf( thrd->rdy_link.next == 0p, "Expected null got %p", thrd->rdy_link.next );
 	/* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd->canary );
 
@@ -600,8 +591,10 @@
 		/* paranoid */ verifyf( ((uintptr_t)thrd->context.SP) < ((uintptr_t)__get_stack(thrd->curr_cor)->base ), "ERROR : thread$ %p has been corrupted.\n StackPointer too small.\n", thrd );
 
-		thrd->state = Halting;
 		if( TICKET_RUNNING != thrd->ticket ) { abort( "Thread terminated with pending unpark" ); }
 		if( thrd != this->owner ) { abort( "Thread internal monitor has incorrect owner" ); }
 		if( this->recursion != 1) { abort( "Thread internal monitor has unbalanced recursion" ); }
+
+		thrd->state = Halting;
+		thrd->ticket = TICKET_DEAD;
 
 		// Leave the thread
@@ -624,5 +617,5 @@
 		// If that is the case, abandon the preemption.
 		bool preempted = false;
-		if(thrd->link.next == 0p) {
+		if(thrd->rdy_link.next == 0p) {
 			preempted = true;
 			thrd->preempted = reason;
@@ -726,38 +719,34 @@
 
 
-	#if !defined(CFA_WITH_IO_URING_IDLE)
-		#if !defined(__CFA_NO_STATISTICS__)
-			if(this->print_halts) {
-				__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
+	#if !defined(__CFA_NO_STATISTICS__)
+		if(this->print_halts) {
+			__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
+		}
+	#endif
+
+	__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle_fd);
+
+	{
+		eventfd_t val;
+		ssize_t ret = read( this->idle_wctx.evfd, &val, sizeof(val) );
+		if(ret < 0) {
+			switch((int)errno) {
+			case EAGAIN:
+			#if EAGAIN != EWOULDBLOCK
+				case EWOULDBLOCK:
+			#endif
+			case EINTR:
+				// No need to do anything special here, just assume it's a legitimate wake-up
+				break;
+			default:
+				abort( "KERNEL : internal error, read failure on idle eventfd, error(%d) %s.", (int)errno, strerror( (int)errno ) );
 			}
-		#endif
-
-		__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle_fd);
-
-		{
-			eventfd_t val;
-			ssize_t ret = read( this->idle_wctx.evfd, &val, sizeof(val) );
-			if(ret < 0) {
-				switch((int)errno) {
-				case EAGAIN:
-				#if EAGAIN != EWOULDBLOCK
-					case EWOULDBLOCK:
-				#endif
-				case EINTR:
-					// No need to do anything special here, just assume it's a legitimate wake-up
-					break;
-				default:
-					abort( "KERNEL : internal error, read failure on idle eventfd, error(%d) %s.", (int)errno, strerror( (int)errno ) );
-				}
-			}
-		}
-
-		#if !defined(__CFA_NO_STATISTICS__)
-			if(this->print_halts) {
-				__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
-			}
-		#endif
-	#else
-		__cfa_io_idle( this );
+		}
+	}
+
+	#if !defined(__CFA_NO_STATISTICS__)
+		if(this->print_halts) {
+			__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
+		}
 	#endif
 }
@@ -775,4 +764,5 @@
 		insert_first(this.idles, proc);
 
+		// update the pointer to the head wait context, which should now point to this proc.
 		__atomic_store_n(&this.fdw, &proc.idle_wctx, __ATOMIC_SEQ_CST);
 	unlock( this );
@@ -791,4 +781,5 @@
 
 		{
+			// update the pointer to the head wait context
 			struct __fd_waitctx * wctx = 0;
 			if(!this.idles`isEmpty) wctx = &this.idles`first.idle_wctx;
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/kernel.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -64,4 +64,5 @@
 	// 1 - means the proc should wake-up immediately
 	// FD - means the proc is going asleep and should be woken by writing to the FD.
+	//      The FD value should always be the evfd field just below.
 	volatile int sem;
 
@@ -69,12 +70,5 @@
 	int evfd;
 
-	// buffer into which the proc will read from evfd
-	// unused if not using io_uring for idle sleep
-	void * rdbuf;
-
-	// future use to track the read of the eventfd
-	// unused if not using io_uring for idle sleep
-	io_future_t * ftr;
-
+	// Used for debugging, should be removed eventually.
 	volatile unsigned long long wake__time;
 	volatile unsigned long long sleep_time;
@@ -160,7 +154,7 @@
 // P9_EMBEDDED( processor, dlink(processor) )
 static inline tytagref( dlink(processor), dlink(processor) ) ?`inner( processor & this ) {
-    dlink(processor) & b = this.link;
-    tytagref( dlink(processor), dlink(processor) ) result = { b };
-    return result;
+	dlink(processor) & b = this.link;
+	tytagref( dlink(processor), dlink(processor) ) result = { b };
+	return result;
 }
 
@@ -256,5 +250,5 @@
 	// List of threads
 	__spinlock_t thread_list_lock;
-	__dllist_t(struct thread$) threads;
+	dlist(struct thread$, struct __thread_user_link) threads;
 	unsigned int nthreads;
 
@@ -269,4 +263,9 @@
 		io_context_params params;
 	} io;
+
+	struct {
+		struct processor ** procs;
+		unsigned cnt;
+	} managed;
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -298,4 +297,8 @@
 static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
 
+// set the number of internal processors
+// these processors are in addition to any explicitly declared processors
+unsigned set_concurrency( cluster & this, unsigned new_count );
+
 #if !defined(__CFA_NO_STATISTICS__)
 	void print_stats_now( cluster & this, int flags );
Index: libcfa/src/concurrency/kernel/cluster.cfa
===================================================================
--- libcfa/src/concurrency/kernel/cluster.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/kernel/cluster.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -483,11 +483,11 @@
 
 	// We add a boat-load of assertions here because the anchor code is very fragile
-	/* paranoid */ _Static_assert( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
-	/* paranoid */ verify( offsetof( thread$, link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
-	/* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, link )) == (uintptr_t)(&this.l.anchor) );
-	/* paranoid */ verify( &mock_head(this)->link.next == &this.l.anchor.next );
-	/* paranoid */ verify( &mock_head(this)->link.ts   == &this.l.anchor.ts   );
-	/* paranoid */ verify( mock_head(this)->link.next == 0p );
-	/* paranoid */ verify( mock_head(this)->link.ts   == MAX );
+	/* paranoid */ _Static_assert( offsetof( thread$, rdy_link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
+	/* paranoid */ verify( offsetof( thread$, rdy_link ) == nested_offsetof(__intrusive_lane_t, l.anchor) );
+	/* paranoid */ verify( ((uintptr_t)( mock_head(this) ) + offsetof( thread$, rdy_link )) == (uintptr_t)(&this.l.anchor) );
+	/* paranoid */ verify( &mock_head(this)->rdy_link.next == &this.l.anchor.next );
+	/* paranoid */ verify( &mock_head(this)->rdy_link.ts   == &this.l.anchor.ts   );
+	/* paranoid */ verify( mock_head(this)->rdy_link.next == 0p );
+	/* paranoid */ verify( mock_head(this)->rdy_link.ts   == MAX );
 	/* paranoid */ verify( mock_head(this) == this.l.prev );
 	/* paranoid */ verify( __alignof__(__intrusive_lane_t) == 64 );
Index: libcfa/src/concurrency/kernel/private.hfa
===================================================================
--- libcfa/src/concurrency/kernel/private.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/kernel/private.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -20,4 +20,6 @@
 #endif
 
+#include <signal.h>
+
 #include "kernel.hfa"
 #include "thread.hfa"
@@ -39,14 +41,4 @@
 }
 
-// Defines whether or not we *want* to use io_uring_enter as the idle_sleep blocking call
-// #define CFA_WANT_IO_URING_IDLE
-
-// Defines whether or not we *can* use io_uring_enter as the idle_sleep blocking call
-#if defined(CFA_WANT_IO_URING_IDLE) && defined(CFA_HAVE_LINUX_IO_URING_H)
-	#if defined(CFA_HAVE_IORING_OP_READ) || (defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV))
-		#define CFA_WITH_IO_URING_IDLE
-	#endif
-#endif
-
 // #define READYQ_USE_LINEAR_AVG
 #define READYQ_USE_LOGDBL_AVG
@@ -63,4 +55,16 @@
 #endif
 
+extern "C" {
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_join(pthread_t _thread, void **retval);
+	__attribute__((visibility("protected"))) pthread_t __cfaabi_pthread_self(void);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_init(pthread_attr_t *attr);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_destroy(pthread_attr_t *attr);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize );
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize );
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_sigqueue(pthread_t _thread, int sig, const union sigval value);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_sigmask( int how, const sigset_t *set, sigset_t *oset);
+}
+
 //-----------------------------------------------------------------------------
 // Scheduler
@@ -153,4 +157,5 @@
 #define TICKET_RUNNING ( 0) // thread is running
 #define TICKET_UNBLOCK ( 1) // thread should ignore next block
+#define TICKET_DEAD    (0xDEAD) // thread should never be unparked
 
 //-----------------------------------------------------------------------------
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -16,4 +16,6 @@
 #define __cforall_thread__
 #define _GNU_SOURCE
+
+// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
 
 // C Includes
@@ -113,7 +115,4 @@
 KERNEL_STORAGE(thread$,	             mainThread);
 KERNEL_STORAGE(__stack_t,            mainThreadCtx);
-// KERNEL_STORAGE(__scheduler_RWLock_t, __scheduler_lock);
-KERNEL_STORAGE(eventfd_t,            mainIdleEventFd);
-KERNEL_STORAGE(io_future_t,          mainIdleFuture);
 #if !defined(__CFA_NO_STATISTICS__)
 KERNEL_STORAGE(__stats_t, mainProcStats);
@@ -222,5 +221,5 @@
 		( this.runner ){};
 		init( this, "Main Processor", *mainCluster, 0p );
-		kernel_thread = pthread_self();
+		kernel_thread = __cfaabi_pthread_self();
 
 		runner{ &this };
@@ -232,8 +231,4 @@
 	mainProcessor = (processor *)&storage_mainProcessor;
 	(*mainProcessor){};
-
-	mainProcessor->idle_wctx.rdbuf = &storage_mainIdleEventFd;
-	mainProcessor->idle_wctx.ftr   = (io_future_t*)&storage_mainIdleFuture;
-	/* paranoid */ verify( sizeof(storage_mainIdleEventFd) == sizeof(eventfd_t) );
 
 	__cfa_io_start( mainProcessor );
@@ -283,6 +278,15 @@
 }
 
+extern "C"{
+	void pthread_delete_kernel_threads_();
+}
+
+
 static void __kernel_shutdown(void) {
 	if(!cfa_main_returned) return;
+
+	//delete kernel threads for pthread_concurrency
+	pthread_delete_kernel_threads_();
+
 	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
@@ -327,5 +331,5 @@
 
 		/* paranoid */ verify( this.do_terminate == true );
-		__cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
+		__cfadbg_print_safe(runtime_core, "Kernel : destroyed main processor context %p\n", &runner);
 	}
 
@@ -373,11 +377,4 @@
 	register_tls( proc );
 
-	// used for idle sleep when io_uring is present
-	io_future_t future;
-	eventfd_t idle_buf;
-	proc->idle_wctx.ftr = &future;
-	proc->idle_wctx.rdbuf = &idle_buf;
-
-
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
@@ -388,5 +385,5 @@
 	(proc->runner){ proc, &info };
 
-	__cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
+	__cfadbg_print_safe(runtime_core, "Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
 
 	//Set global state
@@ -514,15 +511,14 @@
 	self_mon.recursion = 1;
 	self_mon_p = &self_mon;
-	link.next = 0p;
-	link.ts   = MAX;
+	rdy_link.next = 0p;
+	rdy_link.ts   = MAX;
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
 	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
 	#if defined( __CFA_WITH_VERIFY__ )
+		executing = 0p;
 		canary = 0x0D15EA5E0D15EA5Ep;
 	#endif
 
-	node.next = 0p;
-	node.prev = 0p;
 	doregister(curr_cluster, this);
 
@@ -647,8 +643,11 @@
 	#endif
 
-	threads{ __get };
+	threads{};
 
 	io.arbiter = create();
 	io.params = io_params;
+
+	managed.procs = 0p;
+	managed.cnt = 0;
 
 	doregister(this);
@@ -667,4 +666,6 @@
 
 void ^?{}(cluster & this) libcfa_public {
+	set_concurrency( this, 0 );
+
 	destroy(this.io.arbiter);
 
@@ -722,5 +723,5 @@
 	lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
 	cltr->nthreads += 1;
-	push_front(cltr->threads, thrd);
+	insert_first(cltr->threads, thrd);
 	unlock    (cltr->thread_list_lock);
 }
@@ -728,6 +729,10 @@
 void unregister( cluster * cltr, thread$ & thrd ) {
 	lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
-	remove(cltr->threads, thrd );
-	cltr->nthreads -= 1;
+	{
+		tytagref( dlink(thread$), dlink(thread$) ) ?`inner( thread$ & this ) = void;
+		with( DLINK_VIA( thread$, struct __thread_user_link ) )
+			remove( thrd );
+		cltr->nthreads -= 1;
+	}
 	unlock(cltr->thread_list_lock);
 }
@@ -777,5 +782,5 @@
 	pthread_attr_t attr;
 
-	check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+	check( __cfaabi_pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
 
 	size_t stacksize = max( PTHREAD_STACK_MIN, DEFAULT_STACK_SIZE );
@@ -804,11 +809,11 @@
 	#endif
 
-	check( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
-	check( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
+	check( __cfaabi_pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
+	check( __cfaabi_pthread_create( pthread, &attr, start, arg ), "pthread_create" );
 	return stack;
 }
 
 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ) {
-	int err = pthread_join( pthread, retval );
+	int err = __cfaabi_pthread_join( pthread, retval );
 	if( err != 0 ) abort("KERNEL ERROR: joining pthread %p caused error %s\n", (void*)pthread, strerror(err));
 
@@ -816,9 +821,9 @@
 		pthread_attr_t attr;
 
-		check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+		check( __cfaabi_pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
 
 		size_t stacksize;
 		// default stack size, normally defined by shell limit
-		check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
+		check( __cfaabi_pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
 		assert( stacksize >= PTHREAD_STACK_MIN );
 		stacksize += __page_size;
@@ -838,4 +843,29 @@
 }
 
+unsigned set_concurrency( cluster & this, unsigned new ) libcfa_public {
+	unsigned old = this.managed.cnt;
+
+	__cfadbg_print_safe(runtime_core, "Kernel : resizing cluster from %u to %u\n", old, (unsigned)new);
+
+	// Delete all the old unneeded procs
+	if(old > new) for(i; (unsigned)new ~ old) {
+		__cfadbg_print_safe(runtime_core, "Kernel : destroying %u\n", i);
+		delete( this.managed.procs[i] );
+	}
+
+	// Allocate new array (uses realloc and memcpies the data)
+	this.managed.procs = alloc( new, this.managed.procs`realloc );
+	this.managed.cnt = new;
+
+	// Create the desired new procs
+	if(old < new) for(i; old ~ new) {
+		__cfadbg_print_safe(runtime_core, "Kernel : constructing %u\n", i);
+		(*(this.managed.procs[i] = alloc())){ this };
+	}
+
+	// return the old count
+	return old;
+}
+
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/locks.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -21,5 +21,5 @@
 
 #include "bits/weakso_locks.hfa"
-#include "containers/queueLockFree.hfa"
+#include "containers/lockfree.hfa"
 #include "containers/list.hfa"
 
@@ -498,5 +498,5 @@
 }
 
-static inline size_t on_wait(simple_owner_lock & this) with(this) { 
+static inline size_t on_wait(simple_owner_lock & this) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	/* paranoid */ verifyf( owner != 0p, "Attempt to release lock %p that isn't held", &this );
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/monitor.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -122,7 +122,7 @@
 
 		// Some one else has the monitor, wait in line for it
-		/* paranoid */ verify( thrd->link.next == 0p );
+		/* paranoid */ verify( thrd->user_link.next == 0p );
 		append( this->entry_queue, thrd );
-		/* paranoid */ verify( thrd->link.next == 1p );
+		/* paranoid */ verify( thrd->user_link.next == 1p );
 
 		unlock( this->lock );
@@ -233,7 +233,7 @@
 
 		// Some one else has the monitor, wait in line for it
-		/* paranoid */ verify( thrd->link.next == 0p );
+		/* paranoid */ verify( thrd->user_link.next == 0p );
 		append( this->entry_queue, thrd );
-		/* paranoid */ verify( thrd->link.next == 1p );
+		/* paranoid */ verify( thrd->user_link.next == 1p );
 		unlock( this->lock );
 
@@ -791,5 +791,5 @@
 	thread$ * new_owner = pop_head( this->entry_queue );
 	/* paranoid */ verifyf( !this->owner || active_thread() == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", active_thread(), this->owner, this->recursion, this );
-	/* paranoid */ verify( !new_owner || new_owner->link.next == 0p );
+	/* paranoid */ verify( !new_owner || new_owner->user_link.next == 0p );
 	__set_owner( this, new_owner );
 
@@ -935,9 +935,17 @@
 	__queue_t(thread$) & entry_queue = monitors[0]->entry_queue;
 
+	#if defined( __CFA_WITH_VERIFY__ )
+		thread$ * last = 0p;
+	#endif
 	// For each thread in the entry-queue
 	for(	thread$ ** thrd_it = &entry_queue.head;
 		(*thrd_it) != 1p;
-		thrd_it = &(*thrd_it)->link.next
+		thrd_it = &get_next(**thrd_it)
 	) {
+		thread$ * curr = *thrd_it;
+
+		/* paranoid */ verifyf( !last || last->user_link.next == curr, "search not making progress, from %p (%p) to %p", last, last->user_link.next, curr );
+		/* paranoid */ verifyf( curr != last, "search not making progress, from %p to %p", last, curr );
+
 		// For each acceptable check if it matches
 		int i = 0;
@@ -946,5 +954,5 @@
 		for( __acceptable_t * it = begin; it != end; it++, i++ ) {
 			// Check if we have a match
-			if( *it == (*thrd_it)->monitors ) {
+			if( *it == curr->monitors ) {
 
 				// If we have a match return it
@@ -953,4 +961,8 @@
 			}
 		}
+
+		#if defined( __CFA_WITH_VERIFY__ )
+			last = curr;
+		#endif
 	}
 
@@ -1025,7 +1037,7 @@
 
 		// Some one else has the monitor, wait in line for it
-		/* paranoid */ verify( thrd->link.next == 0p );
+		/* paranoid */ verify( thrd->user_link.next == 0p );
 		append( this->entry_queue, thrd );
-		/* paranoid */ verify( thrd->link.next == 1p );
+		/* paranoid */ verify( thrd->user_link.next == 1p );
 
 		unlock( this->lock );
Index: libcfa/src/concurrency/once.hfa
===================================================================
--- libcfa/src/concurrency/once.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ libcfa/src/concurrency/once.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,106 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// once.hfa -- Algorithms to prevent concurrent calls to cause duplicate calls
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Oct 11:40:47 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#pragma once
+
+#include "containers/lockfree.hfa"
+#include "kernel/fwd.hfa"
+
+enum once_state {
+	ARMED = 0,
+	IN_PROGRESS,
+	READY
+};
+
+struct once_flag {
+	volatile int state;
+	poison_list( thread$ ) waiters;
+};
+
+static inline {
+	void ?{}(once_flag & this) { this.state = ARMED; }
+
+	void once_wait$(once_flag & this) {
+		// just push the thread to the list
+		if(push( this.waiters, active_thread() )) {
+			// the list wasn't poisoned, push was successful, just park.
+			park();
+		}
+	}
+
+	void once_call$( once_flag & this, void (*func)(void) ) {
+		/* paranoid */ verify( once_state.IN_PROGRESS == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+		/* paranoid */ verify( ! is_poisoned(this.waiters) );
+
+		// call the thing we are here for!
+		func();
+
+		/* paranoid */ verify( ! is_poisoned(this.waiters) );
+		/* paranoid */ verify( once_state.IN_PROGRESS == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+		// Mark the call as being done.
+		__atomic_store_n( &this.state, (int)once_state.IN_PROGRESS, __ATOMIC_SEQ_CST );
+
+		// wake up the sleepers and make sure no new sleeper arrives
+		thread$ * sleeper = poison( this.waiters );
+
+		/* paranoid */ verify( ! is_poisoned(this.waiters) );
+		/* paranoid */ verify( once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+		while(sleeper != 0p) {
+			// find the next thread now because unpark invalidates the pointer
+			thread$ * next = advance(sleeper);
+
+			// wake-up the thread, invalidates pointer
+			unpark( sleeper );
+
+			// update the current
+			sleeper = next;
+		}
+	}
+
+	bool call_once( once_flag & this, void (*func)(void) ) {
+		// is the call already done?
+		if(likely(once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED))) {
+			/* paranoid */ verify( is_poisoned(this.waiters) );
+			return false;
+		}
+
+		// Try to CAS ourself as the thread that will actually call the function
+		int expected = ARMED;
+		if( __atomic_compare_exchange_n( &this.state, &expected, (int)once_state.IN_PROGRESS, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ) {
+
+			// we won the race, call the function
+			once_call$( this, func );
+
+			/* paranoid */ verify( is_poisoned(this.waiters) );
+			/* paranoid */ verify( once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+			// in case someone cares, this call did do the underlying call
+			return true;
+		}
+		else {
+
+			// someone else is doing the call, just wait
+			once_wait$( this );
+
+			/* paranoid */ verify( is_poisoned(this.waiters) );
+			/* paranoid */ verify( once_state.READY == __atomic_load_n(&this.state, __ATOMIC_RELAXED) );
+
+			// in case someone cares, someone else did the call
+			return false;
+		}
+	}
+}
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/preemption.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -352,5 +352,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
+	ret = __cfaabi_pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
@@ -385,5 +385,5 @@
 	sigaddset( &mask, sig );
 
-	if ( pthread_sigmask( SIG_UNBLOCK, &mask, 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_UNBLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
@@ -396,5 +396,5 @@
 	sigaddset( &mask, sig );
 
-	if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 		abort( "internal error, pthread_sigmask" );
 	}
@@ -404,5 +404,5 @@
 static void preempt( processor * this ) {
 	sigval_t value = { PREEMPT_NORMAL };
-	pthread_sigqueue( this->kernel_thread, SIGUSR1, value );
+	__cfaabi_pthread_sigqueue( this->kernel_thread, SIGUSR1, value );
 }
 
@@ -415,5 +415,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
+	ret = __cfaabi_pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
@@ -434,5 +434,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
+	ret = __cfaabi_pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
@@ -505,5 +505,5 @@
 	sigval val;
 	val.sival_int = 0;
-	pthread_sigqueue( alarm_thread, SIGALRM, val );
+	__cfaabi_pthread_sigqueue( alarm_thread, SIGALRM, val );
 
 	// Wait for the preemption thread to finish
@@ -579,5 +579,5 @@
 	static_assert( sizeof( sigset_t ) == sizeof( cxt->uc_sigmask ), "Expected cxt->uc_sigmask to be of sigset_t" );
 	#endif
-	if ( pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), 0p ) == -1 ) {
 		abort( "internal error, sigprocmask" );
 	}
@@ -607,5 +607,5 @@
 	sigset_t mask;
 	sigfillset(&mask);
-	if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
Index: libcfa/src/concurrency/pthread.cfa
===================================================================
--- libcfa/src/concurrency/pthread.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ libcfa/src/concurrency/pthread.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,920 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2019 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// pthread.cfa --
+//
+// Author           : Zhenyan Zhu
+// Created On       : Sat Aug 6 16:29:18 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#define __cforall_thread__
+#define _GNU_SOURCE
+
+#include <signal.h>
+#include <pthread.h>
+#include <errno.h>
+#include "locks.hfa"
+#include "bits/stack.hfa"
+
+
+#define check_nonnull(x) asm("": "+rm"(x)); if( x == 0p ) return EINVAL;
+
+/* pthread key, pthread once inner routine mutual exclusion */
+static simple_owner_lock once_lock,key_lock,magic_mutex_check, concurrency_lock;
+
+//######################### Local Storage Helpers #########################
+
+enum { PTHREAD_KEYS_MAX = 1024 };
+
+struct pthread_values{
+	inline Seqable;
+	void* value;
+	bool in_use;
+};
+
+static inline {
+	pthread_values *& Back( pthread_values * n ) {
+		return (pthread_values *)Back( (Seqable *)n );
+	}
+
+	pthread_values *& Next( pthread_values * n ) {
+		return (pthread_values *)Next( (Colable *)n );
+	}
+}
+
+struct pthread_keys {
+	bool in_use;
+	void (*destructor)( void * );
+	Sequence(pthread_values) threads;
+};
+
+static void ?{}(pthread_keys& k){
+	k.threads{};
+}
+
+// Create storage separately to ensure no constructors are called.
+static pthread_keys cfa_pthread_keys_storage[PTHREAD_KEYS_MAX] __attribute__((aligned (16)));
+
+static void init_pthread_storage(){
+	for (int i = 0; i < PTHREAD_KEYS_MAX; i++){
+		cfa_pthread_keys_storage[i]{};
+	}
+}
+
+#define cfa_pthread_keys ((pthread_keys *)cfa_pthread_keys_storage)
+
+/* Controlling the iterations of destructors for thread-specific data.  */
+#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS	4
+/* Number of iterations this implementation does.  */
+#define PTHREAD_DESTRUCTOR_ITERATIONS	_POSIX_THREAD_DESTRUCTOR_ITERATIONS
+
+//######################### Parallelism Helpers #########################
+
+struct Pthread_kernel_threads{
+	inline Colable;
+	processor p;
+};
+
+Pthread_kernel_threads *& Next( Pthread_kernel_threads * n ) {
+	return (Pthread_kernel_threads *)Next( (Colable *)n );
+}
+
+static Stack(Pthread_kernel_threads) cfa_pthreads_kernel_threads;
+static bool cfa_pthreads_kernel_threads_zero = false;	// set to zero ?
+static int cfa_pthreads_no_kernel_threads = 1;	// number of kernel threads
+
+
+//######################### Cond Helpers #########################
+
+typedef pthread_cond_var(simple_owner_lock) cfa2pthr_cond_var_t;
+
+/* condvar helper routines */
+static void init(pthread_cond_t* pcond){
+	static_assert(sizeof(pthread_cond_t) >= sizeof(cfa2pthr_cond_var_t),"sizeof(pthread_t) < sizeof(cfa2pthr_cond_var_t)");
+	cfa2pthr_cond_var_t* _cond = (cfa2pthr_cond_var_t*)pcond;
+	?{}(*_cond);
+}
+
+static cfa2pthr_cond_var_t* get(pthread_cond_t* pcond){
+	static_assert(sizeof(pthread_cond_t) >= sizeof(cfa2pthr_cond_var_t),"sizeof(pthread_t) < sizeof(cfa2pthr_cond_var_t)");
+	return (cfa2pthr_cond_var_t*)pcond;
+}
+
+static void destroy(pthread_cond_t* cond){
+	static_assert(sizeof(pthread_cond_t) >= sizeof(cfa2pthr_cond_var_t),"sizeof(pthread_t) < sizeof(cfa2pthr_cond_var_t)");
+	^?{}(*get(cond));
+}
+
+
+//######################### Mutex Helper #########################
+
+/* mutex helper routines */
+static void mutex_check(pthread_mutex_t* t){
+	// Use double check to improve performance.
+	// Check is safe on x86; volatile prevents compiler reordering
+	volatile pthread_mutex_t *const mutex_ = t;
+
+	// SKULLDUGGERY: not a portable way to access the kind field, /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h
+	int _lock_val = ((pthread_mutex_t *)mutex_)->__data.__lock;
+
+	// if pthread_mutex_t is initialized by PTHREAD_MUTEX_INITIALIZER, _lock_val should be 0
+	if ( _lock_val == 0 ) {
+		lock(magic_mutex_check);
+		_lock_val = ((pthread_mutex_t *)mutex_)->__data.__lock;
+		if ( _lock_val == 0 ) {
+			pthread_mutex_init( t, NULL );
+		}
+		unlock(magic_mutex_check);
+	}
+} // mutex_check
+
+
+static void init(pthread_mutex_t* plock){
+	static_assert(sizeof(pthread_mutex_t) >= sizeof(simple_owner_lock),"sizeof(pthread_mutex_t) < sizeof(simple_owner_lock)");
+	simple_owner_lock* _lock = (simple_owner_lock*)plock;
+	?{}(*_lock);
+}
+
+static simple_owner_lock* get(pthread_mutex_t* plock){
+	static_assert(sizeof(pthread_mutex_t) >= sizeof(simple_owner_lock),"sizeof(pthread_mutex_t) < sizeof(simple_owner_lock)");
+	return (simple_owner_lock*)plock;
+}
+
+static void destroy(pthread_mutex_t* plock){
+	static_assert(sizeof(pthread_mutex_t) >= sizeof(simple_owner_lock),"sizeof(pthread_mutex_t) < sizeof(simple_owner_lock)");
+	^?{}(*get(plock));
+}
+
+//######################### Attr helpers #########################
+struct cfaPthread_attr_t {								// thread attributes
+		int contentionscope;
+		int detachstate;
+		size_t stacksize;
+		void *stackaddr;
+		int policy;
+		int inheritsched;
+		struct sched_param param;
+} typedef cfaPthread_attr_t;
+
+static const cfaPthread_attr_t default_attrs{
+	0,
+	0,
+	(size_t)65000,
+	(void *)NULL,
+	0,
+	0,
+	{0}
+};
+
+static cfaPthread_attr_t* get(const pthread_attr_t* attr){
+	static_assert(sizeof(pthread_attr_t) >= sizeof(cfaPthread_attr_t),"sizeof(pthread_attr_t) < sizeof(cfaPthread_attr_t)");
+	return (cfaPthread_attr_t*)attr;
+}
+
+
+//######################### Threads Helper #########################
+
+// exception for cancel_stack in pthread_exit
+exception pthread_exit_exp {};
+static vtable(pthread_exit_exp) exp_vt;
+
+thread cfaPthread{
+	cfaPthread_attr_t attr;
+	pthread_t pthreadId;
+
+	// pthreads return value
+	void *joinval;
+
+	// pthread attributes
+	pthread_attr_t pthread_attr;
+
+	void *(*start_routine)(void *);
+	void *start_arg;
+
+	// thread local data
+	pthread_values* pthreadData;
+
+	// flag used for tryjoin
+	bool isTerminated;
+};
+
+/* thread part routines */
+//  cfaPthread entry point
+void main(cfaPthread& _thread) with(_thread){
+	joinval =  start_routine(start_arg);
+	isTerminated = true;
+}
+
+static cfaPthread *lookup( pthread_t p ){
+	static_assert(sizeof(pthread_t) >= sizeof(cfaPthread*),"sizeof(pthread_t) < sizeof(cfaPthread*)");
+	return (cfaPthread*)p;
+}
+
+static void pthread_deletespecific_( pthread_values* values )  { // see uMachContext::invokeTask
+	pthread_values* value;
+	pthread_keys* key;
+	bool destcalled = true;
+	if (values != NULL){
+		for ( int attempts = 0; attempts < PTHREAD_DESTRUCTOR_ITERATIONS && destcalled ; attempts += 1 ) {
+			destcalled = false;
+			lock(key_lock);
+			for (int i = 0; i < PTHREAD_KEYS_MAX; i++){
+				// for each valid key
+				if ( values[i].in_use){
+					value = &values[i];
+					key = &cfa_pthread_keys[i];
+					value->in_use = false;
+					remove(key->threads, *value);
+					// if  a  key  value  has  a  non-NULL  destructor pointer,  and  the  thread  has  a  non-NULL  value associated with that key,
+					// the value of the key is set to NULL, and then the function pointed to is called with the previously associated value as its sole argument.
+					if (value->value != NULL && key->destructor != NULL){
+						unlock(key_lock);
+						key->destructor(value->value); // run destructor
+						lock(key_lock);
+						destcalled = true;
+					}   // if
+					value->value = NULL;
+				}   // if
+			}   // for
+			unlock(key_lock);
+		}   // for
+		free(values);
+	}   // if
+}
+
+static void ^?{}(cfaPthread & mutex t){
+	// delete pthread local storage
+	pthread_values * values = t.pthreadData;
+	pthread_deletespecific_(values);
+}
+
+static void ?{}(cfaPthread &t, pthread_t* _thread, const pthread_attr_t * _attr,void *(*start_routine)(void *), void * arg) {
+	static_assert(sizeof(pthread_t) >= sizeof(cfaPthread*), "pthread_t too small to hold a pointer: sizeof(pthread_t) < sizeof(cfaPthread*)");
+
+	// set up user thread stackSize
+	cfaPthread_attr_t * attr = get(_attr);
+	((thread&)t){ attr ? attr->stacksize: DEFAULT_STACK_SIZE };
+
+	// initialize _thread & cfaPthread id
+	*_thread = t.pthreadId = (pthread_t)(&t);
+
+	// if attr null, self attr will be set as default_attrs; else set to attr
+	t.attr = (attr != NULL ? *attr : default_attrs);
+
+	// init start routine and arguments
+	t.start_routine = start_routine;
+	t.start_arg = arg;
+	t.pthreadData = NULL;
+}
+
+
+extern "C"{
+	//######################### Pthread Attrs #########################
+
+	int pthread_attr_init(pthread_attr_t *attr) libcfa_public __THROW {
+		cfaPthread_attr_t* _attr = get(attr);
+		?{}(*_attr, default_attrs);
+		return 0;
+	}
+	int pthread_attr_destroy(pthread_attr_t *attr) libcfa_public __THROW {
+		^?{}(*get(attr));
+		return 0;
+	}
+
+	int pthread_attr_setscope( pthread_attr_t *attr, int contentionscope ) libcfa_public __THROW {
+		get( attr )->contentionscope = contentionscope;
+		return 0;
+	} // pthread_attr_setscope
+
+	int pthread_attr_getscope( const pthread_attr_t *attr, int *contentionscope ) libcfa_public __THROW {
+		*contentionscope = get( attr )->contentionscope;
+		return 0;
+	} // pthread_attr_getscope
+
+	int pthread_attr_setdetachstate( pthread_attr_t *attr, int detachstate ) libcfa_public __THROW {
+		get( attr )->detachstate = detachstate;
+		return 0;
+	} // pthread_attr_setdetachstate
+
+	int pthread_attr_getdetachstate( const pthread_attr_t *attr, int *detachstate ) libcfa_public __THROW {
+		*detachstate = get( attr )->detachstate;
+		return 0;
+	} // pthread_attr_getdetachstate
+
+	int pthread_attr_setstacksize( pthread_attr_t *attr, size_t stacksize ) libcfa_public __THROW {
+		get( attr )->stacksize = stacksize;
+		return 0;
+	} // pthread_attr_setstacksize
+
+	int pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ) libcfa_public __THROW {
+		*stacksize = get( attr )->stacksize;
+		return 0;
+	} // pthread_attr_getstacksize
+
+	int pthread_attr_getguardsize( const pthread_attr_t * /* attr */, size_t * /* guardsize */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_attr_getguardsize
+
+	int pthread_attr_setguardsize( pthread_attr_t * /* attr */, size_t /* guardsize */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_attr_setguardsize
+
+	int pthread_attr_setstackaddr( pthread_attr_t *attr, void *stackaddr ) libcfa_public __THROW {
+		get( attr )->stackaddr = stackaddr;
+		return 0;
+	} // pthread_attr_setstackaddr
+
+	int pthread_attr_getstackaddr( const pthread_attr_t *attr, void **stackaddr ) libcfa_public __THROW {
+		*stackaddr = get( attr )->stackaddr;
+		return 0;
+	} // pthread_attr_getstackaddr
+
+	int pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ) libcfa_public __THROW {
+		get( attr )->stackaddr = stackaddr;
+		get( attr )->stacksize = stacksize;
+		return 0;
+	} // pthread_attr_setstack
+
+	int pthread_attr_getstack( const pthread_attr_t *attr, void **stackaddr, size_t *stacksize ) libcfa_public __THROW {
+		*stackaddr = get( attr )->stackaddr;
+		*stacksize = get( attr )->stacksize;
+		return 0;
+	} // pthread_attr_getstack
+
+	// Initialize thread attribute *attr with attributes corresponding to the
+	// already running thread threadID. It shall be called on unitialized attr
+	// and destroyed with pthread_attr_destroy when no longer needed.
+	int pthread_getattr_np( pthread_t threadID, pthread_attr_t *attr ) libcfa_public __THROW { // GNU extension
+		check_nonnull(attr);
+
+		// copy all fields
+		*get(attr) = lookup( threadID )->attr;
+
+		return 0;
+	} // pthread_getattr_np
+
+
+	//######################### Threads #########################
+
+	int pthread_create(pthread_t * _thread, const pthread_attr_t * attr, void *(*start_routine)(void *), void * arg) libcfa_public __THROW {
+		cfaPthread *t = alloc();
+		(*t){_thread, attr, start_routine, arg};
+		return 0;
+	}
+
+
+	int pthread_join(pthread_t _thread, void **value_ptr) libcfa_public __THROW {
+		// if thread is invalid
+		if (_thread == NULL) return EINVAL;
+		if (_thread == pthread_self()) return EDEADLK;
+
+		// get user thr pointer
+		cfaPthread* p = lookup(_thread);
+		try {
+			join(*p);
+		}
+		// if thread called pthread_exit
+		catchResume (ThreadCancelled(cfaPthread) * cancel) {}
+
+		// fetch result
+		if (value_ptr != NULL ) *value_ptr = p->joinval;
+		delete(p);
+		return 0;
+	}
+
+	int pthread_tryjoin_np(pthread_t _thread, void **value_ptr) libcfa_public __THROW {
+		// if thread is invalid
+		if (_thread == NULL) return EINVAL;
+		if (_thread == pthread_self()) return EDEADLK;
+
+		cfaPthread* p = lookup(_thread);
+
+		// thread not finished ?
+		if (!p->isTerminated) return EBUSY;
+
+		join( *p );
+
+		if (value_ptr != NULL ) *value_ptr = p->joinval;
+		delete(p);
+		return 0;
+	}
+
+	pthread_t pthread_self(void) libcfa_public __THROW {
+		return (pthread_t)((uintptr_t)active_thread() - (sizeof(cfaPthread) - sizeof(thread$)));
+	}
+
+	void pthread_exit(void * status) libcfa_public __THROW {
+		pthread_t pid = pthread_self();
+		cfaPthread* _thread = (cfaPthread*)pid;
+		_thread->joinval = status;  // set return value
+		_thread->isTerminated = 1;  // set terminated flag
+		cancel_stack((pthread_exit_exp){&exp_vt});
+	}   //pthread_exit_
+
+	int pthread_yield( void ) __THROW {			// GNU extension
+		yield();
+		return 0;
+	}
+
+
+	//######################### Mutex #########################
+
+	int pthread_mutex_init(pthread_mutex_t *_mutex, const pthread_mutexattr_t *attr) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		init(_mutex);
+		return 0;
+	}   //pthread_mutex_init_
+
+
+	int pthread_mutex_destroy(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		if (_lock->owner != NULL){
+			return EBUSY;
+		}
+		destroy(_mutex);
+		return 0;
+	}   //pthread_mutex_destroy_
+
+	int pthread_mutex_lock(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		mutex_check(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		lock(*_lock);
+		return 0;
+	}   //pthread_mutex_lock_
+
+	int pthread_mutex_unlock(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		if (_lock->owner != active_thread()){
+			return EPERM;
+		} // current thread does not hold the mutex
+		unlock(*_lock);
+		return 0;
+	}   //pthread_mutex_unlock_
+
+	int pthread_mutex_trylock(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		if (_lock->owner != active_thread() && _lock->owner != NULL){
+			return EBUSY;
+		}   // if mutex is owned
+		lock(*_lock);
+		return 0;
+	}   //pthread_mutex_trylock_
+
+	//######################### Conditional Variable #########################
+
+	/* conditional variable routines */
+	int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) libcfa_public __THROW {
+		check_nonnull(cond);
+		init(cond);
+		return 0;
+	}  //pthread_cond_init
+
+	int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		check_nonnull(cond);
+		wait(*get(cond), *get(_mutex));
+		return 0;
+	} // pthread_cond_wait
+
+	int pthread_cond_timedwait(pthread_cond_t * cond, pthread_mutex_t * _mutex, const struct timespec * abstime) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		check_nonnull(cond);
+		wait(*get(cond), *get(_mutex), *abstime);
+		return 0;
+	} // pthread_cond_timedwait
+
+	int pthread_cond_signal(pthread_cond_t *cond) libcfa_public __THROW {
+		check_nonnull(cond);
+		return notify_one(*get(cond));
+	} // pthread_cond_signal
+
+	int pthread_cond_broadcast(pthread_cond_t *cond) libcfa_public __THROW {
+		check_nonnull(cond);
+		return notify_all(*get(cond));
+	} // pthread_cond_broadcast
+
+	int pthread_cond_destroy(pthread_cond_t *cond) libcfa_public __THROW {
+		check_nonnull(cond);
+		destroy(cond);
+		return 0;
+	} // pthread_cond_destroy
+
+
+
+	//######################### Local storage #########################
+
+	int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) libcfa_public __THROW {
+		static_assert(sizeof(pthread_once_t) >= sizeof(int),"sizeof(pthread_once_t) < sizeof(int)");
+		check_nonnull(once_control);
+		check_nonnull(init_routine);
+		lock(once_lock);
+		if ( *((int *)once_control) == 0 ) {
+			init_routine();
+			*((int *)once_control) = 1;
+		} // if
+		unlock(once_lock);
+		return 0;
+	} // pthread_once
+
+	int pthread_key_create( pthread_key_t *key, void (*destructor)( void * ) ) libcfa_public __THROW {
+		lock(key_lock);
+		for ( int i = 0; i < PTHREAD_KEYS_MAX; i += 1 ) {
+			if ( ! cfa_pthread_keys[i].in_use ) {
+				cfa_pthread_keys[i].in_use = true;
+				cfa_pthread_keys[i].destructor = destructor;
+				unlock( key_lock );
+				*key = i;
+				return 0;
+			} // if
+		} // for
+		unlock(key_lock);
+		return EAGAIN;
+	}   // pthread_key_create
+
+	int pthread_key_delete( pthread_key_t key ) libcfa_public __THROW {
+		lock(key_lock);
+		if ( key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use ) {
+			unlock( key_lock );
+			return EINVAL;
+		} // if
+		cfa_pthread_keys[key].in_use = false;
+		cfa_pthread_keys[key].destructor = NULL;
+
+		// Remove key from all threads with a value.
+		pthread_values& p;
+		Sequence(pthread_values)& head = cfa_pthread_keys[key].threads;
+		for ( SeqIter(pthread_values) iter = { head }; iter | p; ) {
+			remove(head, p);
+			p.in_use = false;
+		}
+		unlock(key_lock);
+		return 0;
+	}   // pthread_key_delete
+
+	int pthread_setspecific( pthread_key_t key, const void *value ) libcfa_public __THROW {
+		// get current thread
+		cfaPthread* t = lookup(pthread_self());
+		// if current thread's pthreadData is NULL; initialize it
+		pthread_values* values;
+		if (t->pthreadData == NULL){
+			values = anew( PTHREAD_KEYS_MAX);
+			t->pthreadData = values;
+			for (int i = 0;i < PTHREAD_KEYS_MAX; i++){
+				t->pthreadData[i].in_use = false;
+			}   // for
+		}   else {
+			values = t->pthreadData;
+		}   // if
+		// find corresponding key and set value
+		lock(key_lock);
+		// if invalid key
+		if ( key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use ) {
+			unlock( key_lock );
+			return EINVAL;
+		} // if
+		pthread_values &entry = values[key];
+		if ( ! entry.in_use ) {
+			entry.in_use = true;
+			add(cfa_pthread_keys[key].threads, entry);
+		} // if
+		entry.value = (void *)value;
+		unlock(key_lock);
+		return 0;
+	} //pthread_setspecific
+
+	void* pthread_getspecific(pthread_key_t key) libcfa_public __THROW {
+		if (key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use) return NULL;
+
+		// get current thread
+		cfaPthread* t = lookup(pthread_self());
+		if (t->pthreadData == NULL) return NULL;
+		lock(key_lock);
+		pthread_values &entry = ((pthread_values *)t->pthreadData)[key];
+		if ( ! entry.in_use ) {
+			unlock( key_lock );
+			return NULL;
+		} // if
+		void *value = entry.value;
+		unlock(key_lock);
+
+		return value;
+	}   //pthread_get_specific
+
+	//######################### Parallelism #########################
+	void pthread_delete_kernel_threads_() __THROW {	// see uMain::~uMain
+		Pthread_kernel_threads& p;
+		for ( StackIter(Pthread_kernel_threads) iter = {cfa_pthreads_kernel_threads}; iter | p; ) {
+			delete(&p);
+		} // for
+	} // pthread_delete_kernel_threads_
+
+	int pthread_getconcurrency( void ) __THROW {	// XOPEN extension
+		return cfa_pthreads_kernel_threads_zero ? 0 : cfa_pthreads_no_kernel_threads;
+	} // pthread_getconcurrency
+
+	int pthread_setconcurrency( int new_level ) libcfa_public __THROW { // XOPEN extension
+		if ( new_level < 0 ) return EINVAL;
+		if ( new_level == 0 ) {
+			cfa_pthreads_kernel_threads_zero = true;	// remember set to zero, but ignore
+			return 0;					// do not do kernel thread management
+		} // exit
+		cfa_pthreads_kernel_threads_zero = false;
+		lock( concurrency_lock );
+		for ( ; new_level > cfa_pthreads_no_kernel_threads; cfa_pthreads_no_kernel_threads += 1 ) { // add processors ?
+			push(cfa_pthreads_kernel_threads, *new() );
+		} // for
+		for ( ; new_level < cfa_pthreads_no_kernel_threads; cfa_pthreads_no_kernel_threads -= 1 ) { // remove processors ?
+			delete(&pop(cfa_pthreads_kernel_threads));
+		} // for
+		unlock( concurrency_lock );
+		return 0;
+	} // pthread_setconcurrency
+
+	//######################### Signal #########################
+
+
+	 int pthread_sigmask( int /* how */, const sigset_t * /* set */, sigset_t * /* oset */ ) libcfa_public __THROW {
+		abort( "pthread_sigmask : not implemented" );
+		return 0;
+	 } // pthread_sigmask
+
+	int pthread_kill( pthread_t _thread __attribute__(( unused )), int sig ) libcfa_public __THROW {
+		if ( sig == 0 ) {
+			return 0;
+		} else {
+			abort( "pthread_kill : not implemented" );
+		} // if
+		return 0;
+	} // pthread_kill
+
+	int pthread_sigqueue(pthread_t , int sig, const union sigval) libcfa_public __THROW {
+		abort( "pthread_sigqueue : not implemented" );
+		return 0;
+	} // pthread_sigqueue
+
+	//######################### Scheduling #########################
+	int pthread_detach( pthread_t threadID ) __THROW {
+		abort( "pthread_detach : not implemented" );
+		return 0;
+	} // pthread_detach
+
+	int pthread_setschedparam( pthread_t /* thread */, int /* policy */, const struct sched_param * /* param */ ) libcfa_public __THROW {
+		abort( "pthread_setschedparam : not implemented" );
+		return 0;
+	} // pthread_setschedparam
+
+	int pthread_getschedparam( pthread_t /* thread */, int */* policy */, struct sched_param * /* param */ ) libcfa_public __THROW {
+		abort( "pthread_getschedparam : not implemented" );
+		return 0;
+	} // pthread_getschedparam
+
+	 //######################### Mutex Attr #########################
+
+	int pthread_mutexattr_init( pthread_mutexattr_t * /* attr */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_init
+
+	int pthread_mutexattr_destroy( pthread_mutexattr_t * /* attr */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_destroy
+
+	int pthread_mutexattr_setpshared( pthread_mutexattr_t * /* attr */, int /* pshared */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_setpshared
+
+	int pthread_mutexattr_getpshared( const pthread_mutexattr_t * /* attr */, int * /* pshared */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_getpshared
+
+	int pthread_mutexattr_setprotocol( pthread_mutexattr_t * /* attr */, int /* protocol */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_setprotocol
+
+	int pthread_mutexattr_getprotocol( const pthread_mutexattr_t * /* attr */, int * /* protocol */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_getprotocol
+
+	int pthread_mutexattr_setprioceiling( pthread_mutexattr_t * /* attr */, int /* prioceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_setprioceiling
+
+	int pthread_mutexattr_getprioceiling( const pthread_mutexattr_t * /* attr */, int * /* ceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_getprioceiling
+
+	int pthread_mutex_setprioceiling( pthread_mutex_t * /* mutex */, int /* prioceiling */, int * /* old_ceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutex_setprioceiling
+
+	int pthread_mutex_getprioceiling( const pthread_mutex_t * /* mutex */, int * /* ceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutex_getprioceiling
+
+	int pthread_mutexattr_gettype( __const pthread_mutexattr_t * __restrict /* __attr */, int * __restrict /* __kind */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_gettype
+
+	int pthread_mutexattr_settype( pthread_mutexattr_t * /* __attr */, int /* __kind */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_settype
+
+	//######################### Mutex #########################
+
+	int pthread_mutex_timedlock( pthread_mutex_t *__restrict /* __mutex */, __const struct timespec *__restrict /* __abstime */ ) libcfa_public __THROW {
+		abort( "pthread_mutex_timedlock" );
+	} // pthread_mutex_timedlock
+
+	//######################### Condition #########################
+
+	int pthread_condattr_getclock( __const pthread_condattr_t * __restrict /* __attr */, __clockid_t *__restrict /* __clock_id */ ) libcfa_public __THROW {
+		abort( "pthread_condattr_getclock" );
+	} // pthread_condattr_getclock
+
+	int pthread_condattr_setclock( pthread_condattr_t * /* __attr */, __clockid_t /* __clock_id */ ) libcfa_public __THROW {
+		abort( "pthread_condattr_setclock" );
+	} // pthread_condattr_setclock
+
+	//######################### Spinlock #########################
+
+	int pthread_spin_init( pthread_spinlock_t * /* __lock */, int /*__pshared */ ) libcfa_public __THROW {
+		abort( "pthread_spin_init" );
+	} // pthread_spin_init
+
+	int pthread_spin_destroy( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_destroy" );
+	} // pthread_spin_destroy
+
+	int pthread_spin_lock( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_lock" );
+	} // pthread_spin_lock
+
+	int pthread_spin_trylock( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_trylock" );
+	} // pthread_spin_trylock
+
+	int pthread_spin_unlock( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_unlock" );
+	} // pthread_spin_unlock
+
+	//######################### Barrier #########################
+
+	int pthread_barrier_init( pthread_barrier_t *__restrict /* __barrier */, __const pthread_barrierattr_t *__restrict /* __attr */, unsigned int /* __count */ ) libcfa_public __THROW {
+		abort( "pthread_barrier_init" );
+	} // pthread_barrier_init
+
+	int pthread_barrier_destroy( pthread_barrier_t * /* __barrier */ ) libcfa_public  __THROW {
+		abort( "pthread_barrier_destroy" );
+	} // pthread_barrier_destroy
+
+	int pthread_barrier_wait( pthread_barrier_t * /* __barrier */ ) libcfa_public __THROW {
+		abort( "pthread_barrier_wait" );
+	} // pthread_barrier_wait
+
+	int pthread_barrierattr_init( pthread_barrierattr_t * /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_init" );
+	} // pthread_barrierattr_init
+
+	int pthread_barrierattr_destroy( pthread_barrierattr_t * /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_destroy" );
+	} // pthread_barrierattr_destroy
+
+	int pthread_barrierattr_getpshared( __const pthread_barrierattr_t * __restrict /* __attr */, int *__restrict /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_getpshared" );
+	} // pthread_barrierattr_getpshared
+
+	int pthread_barrierattr_setpshared( pthread_barrierattr_t * /* __attr */, int /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_setpshared" );
+	} // pthread_barrierattr_setpshared
+
+	//######################### Clock #########################
+
+	int pthread_getcpuclockid( pthread_t /* __thread_id */, __clockid_t * /* __clock_id */ ) libcfa_public __THROW {
+		abort( "pthread_getcpuclockid" );
+	} // pthread_getcpuclockid
+
+	// pthread_atfork()
+
+// UNIX98
+
+	//######################### Read/Write #########################
+
+	int pthread_rwlock_init( pthread_rwlock_t *__restrict /* __rwlock */, __const pthread_rwlockattr_t *__restrict /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_init" );
+	} // pthread_rwlock_init
+
+	int pthread_rwlock_destroy( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_destroy" );
+	} // pthread_rwlock_destroy
+
+	int pthread_rwlock_rdlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_rdlock" );
+	} // pthread_rwlock_rdlock
+
+	int pthread_rwlock_tryrdlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_tryrdlock" );
+	} // pthread_rwlock_tryrdlock
+
+	int pthread_rwlock_wrlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_wrlock" );
+	} // pthread_rwlock_wrlock
+
+	int pthread_rwlock_trywrlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_trywrlock" );
+	} // pthread_rwlock_trywrlock
+
+	int pthread_rwlock_unlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_unlock" );
+	} // pthread_rwlock_unlock
+
+	int pthread_rwlockattr_init( pthread_rwlockattr_t * /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_init" );
+	} // pthread_rwlockattr_init
+
+	int pthread_rwlockattr_destroy( pthread_rwlockattr_t * /*__attr */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_destroy" );
+	} // pthread_rwlockattr_destroy
+
+	int pthread_rwlockattr_getpshared( __const pthread_rwlockattr_t * __restrict /* __attr */, int *__restrict /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_getpshared" );
+	} // pthread_rwlockattr_getpshared
+
+	int pthread_rwlockattr_setpshared( pthread_rwlockattr_t * /* __attr */, int /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_setpshared" );
+	} // pthread_rwlockattr_setpshared
+
+	int pthread_rwlockattr_getkind_np( __const pthread_rwlockattr_t * /* __attr */, int * /* __pref */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_getkind_np" );
+	} // pthread_rwlockattr_getkind_np
+
+	int pthread_rwlockattr_setkind_np( pthread_rwlockattr_t * /* __attr */, int /* __pref */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_setkind_np" );
+	} // pthread_rwlockattr_setkind_np
+
+// UNIX98 + XOPEN
+
+	int pthread_rwlock_timedrdlock( pthread_rwlock_t *__restrict  /* __rwlock */, __const struct timespec *__restrict /* __abstime */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_timedrdlock" );
+	} // pthread_rwlock_timedrdlock
+
+	int pthread_rwlock_timedwrlock( pthread_rwlock_t *__restrict  /* __rwlock */, __const struct timespec *__restrict /* __abstime */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_timedwrlock" );
+	} // pthread_rwlock_timedwrlock
+
+// GNU
+
+	//######################### Parallelism #########################
+
+	int pthread_setaffinity_np( pthread_t /* __th */, size_t /* __cpusetsize */, __const cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_setaffinity_np" );
+	} // pthread_setaffinity_np
+
+	int pthread_getaffinity_np( pthread_t /* __th */, size_t /* __cpusetsize */, cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_getaffinity_np" );
+	} // pthread_getaffinity_np
+
+	int pthread_attr_setaffinity_np( pthread_attr_t * /* __attr */, size_t /* __cpusetsize */, __const cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_attr_setaffinity_np" );
+	} // pthread_attr_setaffinity_np
+
+	int pthread_attr_getaffinity_np( __const pthread_attr_t * /* __attr */, size_t /* __cpusetsize */, cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_attr_getaffinity_np" );
+	} // pthread_attr_getaffinity_np
+
+	//######################### Cancellation #########################
+
+	void _pthread_cleanup_push_defer( struct _pthread_cleanup_buffer * /* __buffer */, void( * /* __routine */ )( void * ), void * /* __arg */ ) libcfa_public __THROW {
+		abort( "_pthread_cleanup_push_defer" );
+	} // _pthread_cleanup_push_defer
+
+	void _pthread_cleanup_pop_restore( struct _pthread_cleanup_buffer * /* __buffer */, int /* __execute */ ) libcfa_public __THROW {
+		abort( "_pthread_cleanup_pop_restore" );
+	} // _pthread_cleanup_pop_res
+
+	int pthread_cancel( pthread_t threadID ) libcfa_public __THROW {
+		abort("pthread cancel not implemented");
+		return 0;
+	} // pthread_cancel
+
+	int pthread_setcancelstate( int state, int *oldstate ) libcfa_public __THROW {
+		abort("pthread_setcancelstate not implemented");
+		return 0;
+	} // pthread_setcancelstate
+
+	int pthread_setcanceltype( int type, int *oldtype ) libcfa_public __THROW {
+		abort("pthread_setcanceltype not implemented");
+		return 0;
+	} // pthread_setcanceltype
+} // extern "C"
+
+#pragma GCC diagnostic pop
+
Index: libcfa/src/concurrency/ready_subqueue.hfa
===================================================================
--- libcfa/src/concurrency/ready_subqueue.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/ready_subqueue.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -25,5 +25,5 @@
 static inline thread$ * mock_head(const __intrusive_lane_t & this) {
 	thread$ * rhead = (thread$ *)(
-		(uintptr_t)( &this.l.anchor ) - __builtin_offsetof( thread$, link )
+		(uintptr_t)( &this.l.anchor ) - __builtin_offsetof( thread$, rdy_link )
 	);
 	return rhead;
@@ -34,8 +34,8 @@
 static inline void push( __intrusive_lane_t & this, thread$ * node ) {
 	/* paranoid */ verify( this.l.lock );
-	/* paranoid */ verify( node->link.next == 0p );
-	/* paranoid */ verify( __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED) == MAX  );
-	/* paranoid */ verify( this.l.prev->link.next == 0p );
-	/* paranoid */ verify( __atomic_load_n(&this.l.prev->link.ts, __ATOMIC_RELAXED)   == MAX  );
+	/* paranoid */ verify( node->rdy_link.next == 0p );
+	/* paranoid */ verify( __atomic_load_n(&node->rdy_link.ts, __ATOMIC_RELAXED) == MAX  );
+	/* paranoid */ verify( this.l.prev->rdy_link.next == 0p );
+	/* paranoid */ verify( __atomic_load_n(&this.l.prev->rdy_link.ts, __ATOMIC_RELAXED)   == MAX  );
 	if( this.l.anchor.next == 0p ) {
 		/* paranoid */ verify( this.l.anchor.next == 0p );
@@ -51,6 +51,6 @@
 
 	// Get the relevant nodes locally
-	this.l.prev->link.next = node;
-	__atomic_store_n(&this.l.prev->link.ts, rdtscl(), __ATOMIC_RELAXED);
+	this.l.prev->rdy_link.next = node;
+	__atomic_store_n(&this.l.prev->rdy_link.ts, rdtscl(), __ATOMIC_RELAXED);
 	this.l.prev = node;
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -70,9 +70,9 @@
 	// Get the relevant nodes locally
 	thread$ * node = this.l.anchor.next;
-	this.l.anchor.next = node->link.next;
-	__atomic_store_n(&this.l.anchor.ts, __atomic_load_n(&node->link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED);
+	this.l.anchor.next = node->rdy_link.next;
+	__atomic_store_n(&this.l.anchor.ts, __atomic_load_n(&node->rdy_link.ts, __ATOMIC_RELAXED), __ATOMIC_RELAXED);
 	bool is_empty = this.l.anchor.next == 0p;
-	node->link.next = 0p;
-	__atomic_store_n(&node->link.ts, ULLONG_MAX, __ATOMIC_RELAXED);
+	node->rdy_link.next = 0p;
+	__atomic_store_n(&node->rdy_link.ts, ULLONG_MAX, __ATOMIC_RELAXED);
 	#if !defined(__CFA_NO_STATISTICS__)
 		this.l.cnt--;
@@ -83,7 +83,7 @@
 
 	unsigned long long ats = __atomic_load_n(&this.l.anchor.ts, __ATOMIC_RELAXED);
-	/* paranoid */ verify( node->link.next == 0p );
-	/* paranoid */ verify( __atomic_load_n(&node->link.ts , __ATOMIC_RELAXED) == MAX );
-	/* paranoid */ verify( __atomic_load_n(&node->link.ts , __ATOMIC_RELAXED) != 0   );
+	/* paranoid */ verify( node->rdy_link.next == 0p );
+	/* paranoid */ verify( __atomic_load_n(&node->rdy_link.ts , __ATOMIC_RELAXED) == MAX );
+	/* paranoid */ verify( __atomic_load_n(&node->rdy_link.ts , __ATOMIC_RELAXED) != 0   );
 	/* paranoid */ verify( ats != 0 );
 	/* paranoid */ verify( (ats == MAX) == is_empty );
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/thread.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -44,15 +44,13 @@
 	self_mon_p = &self_mon;
 	curr_cluster = &cl;
-	link.next = 0p;
-	link.ts   = MAX;
+	rdy_link.next = 0p;
+	rdy_link.ts   = MAX;
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
 	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
 	#if defined( __CFA_WITH_VERIFY__ )
+		executing = 0p;
 		canary = 0x0D15EA5E0D15EA5Ep;
 	#endif
-
-	node.next = 0p;
-	node.prev = 0p;
 
 	clh_node = malloc( );
@@ -177,4 +175,50 @@
 
 //-----------------------------------------------------------------------------
+bool migrate( thread$ * thrd, struct cluster & cl ) {
+
+	monitor$ * tmon = get_monitor(thrd);
+	monitor$ * __monitors[] = { tmon };
+	monitor_guard_t __guard = { __monitors, 1 };
+
+
+	{
+		// if nothing needs to be done, return false
+		if( thrd->curr_cluster == &cl ) return false;
+
+		// are we migrating ourself?
+		const bool local = thrd == active_thread();
+
+		/* paranoid */ verify( !local || &cl != active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_processor()->cltr );
+		/* paranoid */ verify( local || tmon->signal_stack.top->owner->waiting_thread == thrd );
+		/* paranoid */ verify( local || tmon->signal_stack.top );
+
+		// make sure we aren't interrupted while doing this
+		// not as important if we aren't local
+		disable_interrupts();
+
+		// actually move the thread
+		unregister( thrd->curr_cluster, *thrd );
+		thrd->curr_cluster = &cl;
+		doregister( thrd->curr_cluster, *thrd );
+
+		// restore interrupts
+		enable_interrupts();
+
+		// if this is the local thread, we are still running on the old cluster
+		if(local) yield();
+
+		/* paranoid */ verify( !local || &cl == active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_processor()->cltr );
+		/* paranoid */ verify(  local || tmon->signal_stack.top );
+		/* paranoid */ verify(  local || tmon->signal_stack.top->owner->waiting_thread == thrd );
+
+		return true;
+	}
+}
+
+//-----------------------------------------------------------------------------
 #define GENERATOR LCG
 
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/concurrency/thread.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -132,4 +132,12 @@
 
 //----------
+// misc
+bool migrate( thread$ * thrd, struct cluster & cl );
+
+forall( T & | is_thread(T) )
+static inline bool migrate( T & mutex thrd, struct cluster & cl ) { return migrate( &(thread&)thrd, cl ); }
+
+
+//----------
 // prng
 static inline {
Index: libcfa/src/containers/array.hfa
===================================================================
--- libcfa/src/containers/array.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/containers/array.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -1,2 +1,4 @@
+#pragma once
+
 #include <assert.h>
 
@@ -18,7 +20,12 @@
     // About the choice of integral types offered as subscript overloads:
     // Intent is to cover these use cases:
+    //    a[0]                                                // i : zero_t
+    //    a[1]                                                // i : one_t
+    //    a[2]                                                // i : int
     //    float foo( ptrdiff_t i ) { return a[i]; }           // i : ptrdiff_t
+    //    float foo( size_t i ) { return a[i]; }              // i : size_t
     //    forall( [N] ) ... for( i; N ) { total += a[i]; }    // i : typeof( sizeof(42) )
     //    for( i; 5 ) { total += a[i]; }                      // i : int
+    //
     // It gets complicated by:
     // -  CFA does overloading on concrete types, like int and unsigned int, not on typedefed
@@ -28,9 +35,28 @@
     //    should give them type size_t.
     //
-    //                          gcc -m32         cfa -m32 given bug         gcc -m64
+    //                          gcc -m32         cfa -m32 given bug         gcc -m64 (and cfa)
     // ptrdiff_t                int              int                        long int
     // size_t                   unsigned int     unsigned int               unsigned long int
     // typeof( sizeof(42) )     unsigned int     unsigned long int          unsigned long int
     // int                      int              int                        int
+    //
+    // So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
+    //
+    // The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
+    // because assertion satisfaction requires types to match exacly.  Both higher-dimensional
+    // subscripting and operations on slices use asserted subscript operators.  The test case
+    // array-container/array-sbscr-cases covers the combinations.  Mike beleives that commenting out
+    // any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
+    // Mike is open to being shown a smaller set of overloads that still passes the test.
+
+    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, zero_t ) {
+        assert( 0 < N );
+        return (Timmed &) a.strides[0];
+    }
+
+    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, one_t ) {
+        assert( 1 < N );
+        return (Timmed &) a.strides[1];
+    }
 
     static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, int i ) {
@@ -77,4 +103,6 @@
         return N;
     }
+
+    static inline void __taglen( tag(arpk(N, S, Timmed, Tbase)), tag(N) ) {}
 
     // workaround #226 (and array relevance thereof demonstrated in mike102/otype-slow-ndims.cfa)
@@ -156,4 +184,11 @@
 #endif
 
+// Available for users to work around Trac #265
+// If `a[...0...]` isn't working, try `a[...ix0...]` instead.
+
+#define ix0 ((ptrdiff_t)0)
+
+
+
 //
 // Rotation
@@ -185,6 +220,24 @@
 //
 
-trait ar(A &, Tv &) {
-    Tv& ?[?]( A&, ptrdiff_t );
-    size_t ?`len( A& );
-};
+// desired:
+// trait ar(A &, Tv &, [N]) {
+//     Tv& ?[?]( A&, zero_t );
+//     Tv& ?[?]( A&, one_t  );
+//     Tv& ?[?]( A&, int    );
+//                   ...
+//     size_t ?`len( A& );
+//     void __taglen( tag(C), tag(N) );
+// };
+
+// working around N's not being accepted as arguments to traits
+
+#define ar(A, Tv, N) {                 \
+    Tv& ?[?]( A&, zero_t );            \
+    Tv& ?[?]( A&, one_t );             \
+    Tv& ?[?]( A&, int );               \
+    Tv& ?[?]( A&, unsigned int );      \
+    Tv& ?[?]( A&, long int );          \
+    Tv& ?[?]( A&, unsigned long int ); \
+    size_t ?`len( A& );                \
+    void __taglen( tag(A), tag(N) );   \
+}
Index: libcfa/src/containers/lockfree.hfa
===================================================================
--- libcfa/src/containers/lockfree.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ libcfa/src/containers/lockfree.hfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,252 @@
+#pragma once
+
+#include <assert.h>
+
+#include <stdint.h>
+#include <bits/defs.hfa>
+
+forall( T &) {
+	//------------------------------------------------------------
+	// Queue based on the MCS lock
+	// It is a Multi-Producer/Single-Consumer queue threads pushing
+	// elements must hold on to the elements they push
+	// Not appropriate for an async message queue for example,
+	struct mcs_queue {
+		T * volatile tail;
+	};
+
+	static inline void ?{}(mcs_queue(T) & this) { this.tail = 0p; }
+	static inline bool empty(const mcs_queue(T) & this) { return !this.tail; }
+
+ 	static inline forall(| { T * volatile & ?`next ( T * ); })
+	{
+		// Adds an element to the list
+		// Multi-Thread Safe, Lock-Free
+		T * push(mcs_queue(T) & this, T * elem) __attribute__((artificial));
+		T * push(mcs_queue(T) & this, T * elem) {
+			/* paranoid */ verify(!(elem`next));
+			// Race to add to the tail
+			T * prev = __atomic_exchange_n(&this.tail, elem, __ATOMIC_SEQ_CST);
+			// If we aren't the first, we need to tell the person before us
+			// No need to
+			if (prev) prev`next = elem;
+			return prev;
+		}
+
+		// Advances the head of the list, dropping the element given.
+		// Passing an element that is not the head is undefined behavior
+		// NOT Multi-Thread Safe, concurrent pushes are safe
+		T * advance(mcs_queue(T) & this, T * elem) __attribute__((artificial));
+		T * advance(mcs_queue(T) & this, T * elem) {
+			T * expected = elem;
+			// Check if this is already the last item
+			if (__atomic_compare_exchange_n(&this.tail, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return 0p;
+
+			// If not wait for next item to show-up, filled by push
+			while (!(elem`next)) Pause();
+
+			// we need to return if the next link was empty
+			T * ret = elem`next;
+
+			// invalidate link to reset to initial state
+			elem`next = 0p;
+			return ret;
+		}
+	}
+
+	//------------------------------------------------------------
+	// Queue based on the MCS lock
+	// Extension of the above lock which supports 'blind' pops.
+	// i.e., popping a value from the head without knowing what the head is
+	// has no extra guarantees beyond the mcs_queue
+	struct mpsc_queue {
+		inline mcs_queue(T);
+		T * volatile head;
+	};
+
+	static inline void ?{}(mpsc_queue(T) & this) {
+		((mcs_queue(T)&)this){};
+		this.head = 0p;
+	}
+
+	static inline forall(| { T * volatile & ?`next ( T * ); })
+	{
+		// Added a new element to the queue
+		// Multi-Thread Safe, Lock-Free
+		T * push(mpsc_queue(T) & this, T * elem) __attribute__((artificial));
+		T * push(mpsc_queue(T) & this, T * elem) {
+			T * prev = push((mcs_queue(T)&)this, elem);
+			if (!prev) this.head = elem;
+			return prev;
+		}
+
+		// Pop an element from the queue
+		// return the element that was removed
+		// next is set to the new head of the queue
+		// NOT Multi-Thread Safe
+		T * pop(mpsc_queue(T) & this, T *& next) __attribute__((artificial));
+		T * pop(mpsc_queue(T) & this, T *& next) {
+			T * elem = this.head;
+			// If head is empty just return
+			if (!elem) return 0p;
+
+			// If there is already someone in the list, then it's easy
+			if (elem`next) {
+				this.head = next = elem`next;
+				// force memory sync
+				__atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+				// invalidate link to reset to initial state
+				elem`next = 0p;
+			}
+			// Otherwise, there might be a race where it only looks but someone is enqueuing
+			else {
+				// null out head here, because we linearize with push
+				// at the CAS in advance and therefore can write to head
+				// after that point, it could overwrite the write in push
+				this.head = 0p;
+				next = advance((mcs_queue(T)&)this, elem);
+
+				// Only write to the head if there is a next element
+				// it is the only way we can guarantee we are not overwriting
+				// a write made in push
+				if (next) this.head = next;
+			}
+
+			// return removed element
+			return elem;
+		}
+
+		// Same as previous function
+		T * pop(mpsc_queue(T) & this) {
+			T * _ = 0p;
+			return pop(this, _);
+		}
+	}
+
+	//------------------------------------------------------------
+	// Queue based on the MCS lock with poisoning
+	// It is a Multi-Producer/Single-Consumer queue threads pushing
+	// elements must hold on to the elements they push
+	// Not appropriate for an async message queue for example
+	// poisoning the queue prevents any new elements from being push
+	// enum(void*) poison_state {
+	// 	EMPTY = 0p,
+	// 	POISON = 1p,
+	// 	IN_PROGRESS = 1p
+	// };
+
+	struct poison_list {
+		T * volatile head;
+	};
+
+	static inline void ?{}(poison_list(T) & this) { this.head = 0p; }
+	static inline bool is_poisoned( const poison_list(T) & this ) { return 1p == this.head; }
+
+ 	static inline forall(| { T * volatile & ?`next ( T * ); })
+	{
+		// Adds an element to the list
+		// Multi-Thread Safe, Lock-Free
+		bool push(poison_list(T) & this, T * elem) __attribute__((artificial));
+		bool push(poison_list(T) & this, T * elem) {
+			/* paranoid */ verify(0p == (elem`next));
+			__atomic_store_n( &elem`next, (T*)1p, __ATOMIC_RELAXED );
+
+			// read the head up-front
+			T * expected = this.head;
+			for() {
+				// check if it's poisoned
+				if(expected == 1p) return false;
+
+				// try to CAS the elem in
+				if(__atomic_compare_exchange_n(&this.head, &expected, elem, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)) {
+					// We managed to exchange in, we are done
+
+					// We should never succeed the CAS if it's poisonned and the elem should be 1p.
+					/* paranoid */ verify( expected  != 1p );
+					/* paranoid */ verify( elem`next == 1p );
+
+					// If we aren't the first, we need to tell the person before us
+					// No need to
+					elem`next = expected;
+					return true;
+				}
+			}
+		}
+
+		// Advances the head of the list, dropping the element given.
+		// Passing an element that is not the head is undefined behavior
+		// NOT Multi-Thread Safe, concurrent pushes are safe
+		T * advance(T * elem) __attribute__((artificial));
+		T * advance(T * elem) {
+			T * ret;
+
+			// Wait for next item to show-up, filled by push
+			while (1p == (ret = __atomic_load_n(&elem`next, __ATOMIC_RELAXED))) Pause();
+
+			return ret;
+		}
+
+		// Poison the queue, preveting new pushes and returning the head
+		T * poison(poison_list(T) & this) __attribute__((artificial));
+		T * poison(poison_list(T) & this) {
+			T * ret = __atomic_exchange_n( &this.head, (T*)1p, __ATOMIC_SEQ_CST );
+			/* paranoid */ verifyf( ret != (T*)1p, "Poison list %p poisoned more than once!", &this );
+			return ret;
+		}
+	}
+}
+
+forall( T & )
+union Link {
+	struct {											// 32/64-bit x 2
+		T * volatile top;								// pointer to stack top
+		uintptr_t count;								// count each push
+	};
+	#if __SIZEOF_INT128__ == 16
+	__int128											// gcc, 128-bit integer
+	#else
+	uint64_t											// 64-bit integer
+	#endif // __SIZEOF_INT128__ == 16
+	atom;
+}; // Link
+
+forall( T | sized(T) | { Link(T) * ?`next( T * ); } ) {
+	struct StackLF {
+		Link(T) stack;
+	}; // StackLF
+
+	static inline {
+		void ?{}( StackLF(T) & this ) with(this) { stack.atom = 0; }
+
+		T * top( StackLF(T) & this ) with(this) { return stack.top; }
+
+		void push( StackLF(T) & this, T & n ) with(this) {
+			*( &n )`next = stack;						// atomic assignment unnecessary, or use CAA
+			for () {									// busy wait
+			  if ( __atomic_compare_exchange_n( &stack.atom, &( &n )`next->atom, (Link(T))@{ {&n, ( &n )`next->count + 1} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) break; // attempt to update top node
+			} // for
+		} // push
+
+		T * pop( StackLF(T) & this ) with(this) {
+			Link(T) t @= stack;							// atomic assignment unnecessary, or use CAA
+			for () {									// busy wait
+			  if ( t.top == 0p ) return 0p;				// empty stack ?
+			  if ( __atomic_compare_exchange_n( &stack.atom, &t.atom, (Link(T))@{ {( t.top )`next->top, t.count} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) return t.top; // attempt to update top node
+			} // for
+		} // pop
+
+		bool unsafe_remove( StackLF(T) & this, T * node ) with(this) {
+			Link(T) * link = &stack;
+			for() {
+				T * next = link->top;
+				if( next == node ) {
+					link->top = ( node )`next->top;
+					return true;
+				}
+				if( next == 0p ) return false;
+				link = ( next )`next;
+			}
+		}
+	} // distribution
+} // distribution
Index: bcfa/src/containers/queueLockFree.hfa
===================================================================
--- libcfa/src/containers/queueLockFree.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ 	(revision )
@@ -1,125 +1,0 @@
-#pragma once
-
-#include <assert.h>
-
-#include <bits/defs.hfa>
-
-forall( T &) {
-	//------------------------------------------------------------
-	// Queue based on the MCS lock
-	// It is a Multi-Producer/Single-Consumer queue threads pushing
-	// elements must hold on to the elements they push
-	// Not appropriate for an async message queue for example,
-	struct mcs_queue {
-		T * volatile tail;
-	};
-
-	static inline void ?{}(mcs_queue(T) & this) { this.tail = 0p; }
-	static inline bool empty(const mcs_queue(T) & this) { return !this.tail; }
-
- 	static inline forall(| { T * volatile & ?`next ( T * ); })
-	{
-		// Adds an element to the list
-		// Multi-Thread Safe, Lock-Free
-		T * push(mcs_queue(T) & this, T * elem) __attribute__((artificial));
-		T * push(mcs_queue(T) & this, T * elem) {
-			/* paranoid */ verify(!(elem`next));
-			// Race to add to the tail
-			T * prev = __atomic_exchange_n(&this.tail, elem, __ATOMIC_SEQ_CST);
-			// If we aren't the first, we need to tell the person before us
-			// No need to
-			if (prev) prev`next = elem;
-			return prev;
-		}
-
-		// Advances the head of the list, dropping the element given.
-		// Passing an element that is not the head is undefined behavior
-		// NOT Multi-Thread Safe, concurrent pushes are safe
-		T * advance(mcs_queue(T) & this, T * elem) __attribute__((artificial));
-		T * advance(mcs_queue(T) & this, T * elem) {
-			T * expected = elem;
-			// Check if this is already the last item
-			if (__atomic_compare_exchange_n(&this.tail, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return 0p;
-
-			// If not wait for next item to show-up, filled by push
-			while (!(elem`next)) Pause();
-
-			// we need to return if the next link was empty
-			T * ret = elem`next;
-
-			// invalidate link to reset to initial state
-			elem`next = 0p;
-			return ret;
-		}
-	}
-
-	//------------------------------------------------------------
-	// Queue based on the MCS lock
-	// Extension of the above lock which supports 'blind' pops.
-	// i.e., popping a value from the head without knowing what the head is
-	// has no extra guarantees beyond the mcs_queue
-	struct mpsc_queue {
-		inline mcs_queue(T);
-		T * volatile head;
-	};
-
-	static inline void ?{}(mpsc_queue(T) & this) {
-		((mcs_queue(T)&)this){};
-		this.head = 0p;
-	}
-
-	static inline forall(| { T * volatile & ?`next ( T * ); })
-	{
-		// Added a new element to the queue
-		// Multi-Thread Safe, Lock-Free
-		T * push(mpsc_queue(T) & this, T * elem) __attribute__((artificial));
-		T * push(mpsc_queue(T) & this, T * elem) {
-			T * prev = push((mcs_queue(T)&)this, elem);
-			if (!prev) this.head = elem;
-			return prev;
-		}
-
-		// Pop an element from the queue
-		// return the element that was removed
-		// next is set to the new head of the queue
-		// NOT Multi-Thread Safe
-		T * pop(mpsc_queue(T) & this, T *& next) __attribute__((artificial));
-		T * pop(mpsc_queue(T) & this, T *& next) {
-			T * elem = this.head;
-			// If head is empty just return
-			if (!elem) return 0p;
-
-			// If there is already someone in the list, then it's easy
-			if (elem`next) {
-				this.head = next = elem`next;
-				// force memory sync
-				__atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-				// invalidate link to reset to initial state
-				elem`next = 0p;
-			}
-			// Otherwise, there might be a race where it only looks but someone is enqueuing
-			else {
-				// null out head here, because we linearize with push
-				// at the CAS in advance and therefore can write to head
-				// after that point, it could overwrite the write in push
-				this.head = 0p;
-				next = advance((mcs_queue(T)&)this, elem);
-
-				// Only write to the head if there is a next element
-				// it is the only way we can guarantee we are not overwriting
-				// a write made in push
-				if (next) this.head = next;
-			}
-
-			// return removed element
-			return elem;
-		}
-
-		// Same as previous function
-		T * pop(mpsc_queue(T) & this) {
-			T * _ = 0p;
-			return pop(this, _);
-		}
-	}
-}
Index: bcfa/src/containers/stackLockFree.hfa
===================================================================
--- libcfa/src/containers/stackLockFree.hfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ 	(revision )
@@ -1,76 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// stackLockFree.hfa --
-//
-// Author           : Peter A. Buhr
-// Created On       : Wed May 13 20:58:58 2020
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Jan 20 20:40:03 2021
-// Update Count     : 67
-//
-
-#pragma once
-
-#include <stdint.h>
-
-forall( T & )
-union Link {
-	struct {											// 32/64-bit x 2
-		T * volatile top;								// pointer to stack top
-		uintptr_t count;								// count each push
-	};
-	#if __SIZEOF_INT128__ == 16
-	__int128											// gcc, 128-bit integer
-	#else
-	uint64_t											// 64-bit integer
-	#endif // __SIZEOF_INT128__ == 16
-	atom;
-}; // Link
-
-forall( T | sized(T) | { Link(T) * ?`next( T * ); } ) {
-	struct StackLF {
-		Link(T) stack;
-	}; // StackLF
-
-	static inline {
-		void ?{}( StackLF(T) & this ) with(this) { stack.atom = 0; }
-
-		T * top( StackLF(T) & this ) with(this) { return stack.top; }
-
-		void push( StackLF(T) & this, T & n ) with(this) {
-			*( &n )`next = stack;						// atomic assignment unnecessary, or use CAA
-			for () {									// busy wait
-			  if ( __atomic_compare_exchange_n( &stack.atom, &( &n )`next->atom, (Link(T))@{ {&n, ( &n )`next->count + 1} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) break; // attempt to update top node
-			} // for
-		} // push
-
-		T * pop( StackLF(T) & this ) with(this) {
-			Link(T) t @= stack;							// atomic assignment unnecessary, or use CAA
-			for () {									// busy wait
-			  if ( t.top == 0p ) return 0p;				// empty stack ?
-			  if ( __atomic_compare_exchange_n( &stack.atom, &t.atom, (Link(T))@{ {( t.top )`next->top, t.count} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) return t.top; // attempt to update top node
-			} // for
-		} // pop
-
-		bool unsafe_remove( StackLF(T) & this, T * node ) with(this) {
-			Link(T) * link = &stack;
-			for() {
-				T * next = link->top;
-				if( next == node ) {
-					link->top = ( node )`next->top;
-					return true;
-				}
-				if( next == 0p ) return false;
-				link = ( next )`next;
-			}
-		}
-	} // distribution
-} // distribution
-
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/heap.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Oct 13 22:21:52 2022
-// Update Count     : 1557
+// Last Modified On : Sun Oct 30 20:56:20 2022
+// Update Count     : 1584
 //
 
@@ -43,6 +43,9 @@
 
 #define FASTLOOKUP										// use O(1) table lookup from allocation size to bucket size
-#define RETURNSPIN										// toggle spinlock / lockfree stack
 #define OWNERSHIP										// return freed memory to owner thread
+#define RETURNSPIN										// toggle spinlock / lockfree queue
+#if ! defined( OWNERSHIP ) && defined( RETURNSPIN )
+#warning "RETURNSPIN is ignored without OWNERSHIP; suggest commenting out RETURNSPIN"
+#endif // ! OWNERSHIP && RETURNSPIN
 
 #define CACHE_ALIGN 64
@@ -109,5 +112,21 @@
 
 
-//######################### Spin Lock #########################
+//######################### Helpers #########################
+
+
+// generic Bsearchl does not inline, so substitute with hand-coded binary-search.
+inline __attribute__((always_inline))
+static size_t Bsearchl( unsigned int key, const unsigned int vals[], size_t dim ) {
+	size_t l = 0, m, h = dim;
+	while ( l < h ) {
+		m = (l + h) / 2;
+		if ( (unsigned int &)(vals[m]) < key ) {		// cast away const
+			l = m + 1;
+		} else {
+			h = m;
+		} // if
+	} // while
+	return l;
+} // Bsearchl
 
 
@@ -206,14 +225,4 @@
 
 
-#define SPINLOCK 0
-#define LOCKFREE 1
-#define BUCKETLOCK SPINLOCK
-#if BUCKETLOCK == SPINLOCK
-#elif BUCKETLOCK == LOCKFREE
-#include <stackLockFree.hfa>
-#else
-	#error undefined lock type for bucket lock
-#endif // LOCKFREE
-
 // Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
 // Break recursion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
@@ -232,13 +241,8 @@
 								void * home;			// allocated block points back to home locations (must overlay alignment)
 								size_t blockSize;		// size for munmap (must overlay alignment)
-								#if BUCKETLOCK == SPINLOCK
 								Storage * next;			// freed block points to next freed block of same size
-								#endif // SPINLOCK
 							};
 							size_t size;				// allocation size in bytes
 						};
-						#if BUCKETLOCK == LOCKFREE
-						Link(Storage) next;				// freed block points next freed block of same size (double-wide)
-						#endif // LOCKFREE
 					};
 				} real; // RealHeader
@@ -259,5 +263,4 @@
 	struct __attribute__(( aligned (8) )) FreeHeader {
 		size_t blockSize __attribute__(( aligned(8) )); // size of allocations on this list
-		#if BUCKETLOCK == SPINLOCK
 		#ifdef OWNERSHIP
 		#ifdef RETURNSPIN
@@ -266,8 +269,6 @@
 		Storage * returnList;							// other thread return list
 		#endif // OWNERSHIP
+
 		Storage * freeList;								// thread free list
-		#else
-		StackLF(Storage) freeList;
-		#endif // BUCKETLOCK
 		Heap * homeManager;								// heap owner (free storage to bucket, from bucket to heap)
 	}; // FreeHeader
@@ -290,13 +291,4 @@
 	#endif // __STATISTICS__
 }; // Heap
-
-#if BUCKETLOCK == LOCKFREE
-inline __attribute__((always_inline))
-static {
-	Link(Heap.Storage) * ?`next( Heap.Storage * this ) { return &this->header.kind.real.next; }
-	void ?{}( Heap.FreeHeader & ) {}
-	void ^?{}( Heap.FreeHeader & ) {}
-} // distribution
-#endif // LOCKFREE
 
 
@@ -385,20 +377,4 @@
 
 
-// generic Bsearchl does not inline, so substitute with hand-coded binary-search.
-inline __attribute__((always_inline))
-static size_t Bsearchl( unsigned int key, const unsigned int vals[], size_t dim ) {
-	size_t l = 0, m, h = dim;
-	while ( l < h ) {
-		m = (l + h) / 2;
-		if ( (unsigned int &)(vals[m]) < key ) {		// cast away const
-			l = m + 1;
-		} else {
-			h = m;
-		} // if
-	} // while
-	return l;
-} // Bsearchl
-
-
 void heapMasterCtor() with( heapMaster ) {
 	// Singleton pattern to initialize heap master
@@ -409,6 +385,6 @@
 	__map_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
 
-	?{}( extLock );
-	?{}( mgrLock );
+	extLock = 0;
+	mgrLock = 0;
 
 	char * end = (char *)sbrk( 0 );
@@ -497,13 +473,14 @@
 				#ifdef OWNERSHIP
 				#ifdef RETURNSPIN
-				?{}( freeLists[j].returnLock );
+				freeLists[j].returnLock = 0;
+				freeLists[j].returnList = 0p;
 				#endif // RETURNSPIN
-				freeLists[j].returnList = 0p;
 				#endif // OWNERSHIP
+
 				freeLists[j].freeList = 0p;
 				freeLists[j].homeManager = heap;
 				freeLists[j].blockSize = bucketSizes[j];
 			} // for
-	
+
 			heapBuffer = 0p;
 			heapReserve = 0;
@@ -522,5 +499,5 @@
 	if ( unlikely( ! heapMasterBootFlag ) ) heapMasterCtor();
 
-	lock( heapMaster.mgrLock );		// protect heapMaster counters
+	lock( heapMaster.mgrLock );							// protect heapMaster counters
 
 	// get storage for heap manager
@@ -710,4 +687,5 @@
 	// find the closest bucket size less than or equal to the mmapStart size
 	maxBucketsUsed = Bsearchl( mmapStart, bucketSizes, NoBucketSizes ); // binary search
+
 	verify( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
 	verify( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
@@ -832,8 +810,7 @@
 
 		size_t increase = ceiling2( size > heapExpand ? size : heapExpand, libAlign() );
-		// Do not call abort or strerror( errno ) as they may call malloc.
 		if ( unlikely( sbrk( increase ) == (void *)-1 ) ) {	// failed, no memory ?
 			unlock( extLock );
-			abort( NO_MEMORY_MSG, size );				// no memory
+			abort( NO_MEMORY_MSG, size );				// give up
 		} // if
 
@@ -971,26 +948,28 @@
 		#endif // __STATISTICS__
 
-		// Spin until the lock is acquired for this particular size of block.
-
-		#if BUCKETLOCK == SPINLOCK
 		block = freeHead->freeList;						// remove node from stack
-		#else
-		block = pop( freeHead->freeList );
-		#endif // BUCKETLOCK
 		if ( unlikely( block == 0p ) ) {				// no free block ?
+			// Freelist for this size is empty, so check return list (OWNERSHIP), carve it out of the heap, if there
+			// is enough left, or get some more heap storage and carve it off.
 			#ifdef OWNERSHIP
-			// Freelist for that size is empty, so carve it out of the heap, if there is enough left, or get some more
-			// and then carve it off.
-			#ifdef RETURNSPIN
-			#if BUCKETLOCK == SPINLOCK
-			lock( freeHead->returnLock );
-			block = freeHead->returnList;
-			freeHead->returnList = 0p;
-			unlock( freeHead->returnLock );
-			#else
-			block = __atomic_exchange_n( &freeHead->returnList, nullptr, __ATOMIC_SEQ_CST );
-			#endif // RETURNSPIN
-
-			if ( likely( block == 0p ) ) {			// return list also empty?
+			if ( unlikely( freeHead->returnList ) ) {	// race, get next time if lose race
+				#ifdef RETURNSPIN
+				lock( freeHead->returnLock );
+				block = freeHead->returnList;
+				freeHead->returnList = 0p;
+				unlock( freeHead->returnLock );
+				#else
+				block = __atomic_exchange_n( &freeHead->returnList, 0p, __ATOMIC_SEQ_CST );
+				#endif // RETURNSPIN
+
+				verify( block );
+				#ifdef __STATISTICS__
+				stats.return_pulls += 1;
+				#endif // __STATISTICS__
+
+				// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
+
+				freeHead->freeList = block->header.kind.real.next; // merge returnList into freeHead
+			} else {
 			#endif // OWNERSHIP
 				// Do not leave kernel thread as manager_extend accesses heapManager.
@@ -1002,17 +981,8 @@
 
 				#ifdef __CFA_DEBUG__
-				// Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first 1024 bytes.
+				// Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first SCRUB_SIZE bytes.
 				memset( block->data, SCRUB, min( SCRUB_SIZE, tsize - sizeof(Heap.Storage) ) );
 				#endif // __CFA_DEBUG__
-			#endif // BUCKETLOCK
 			#ifdef OWNERSHIP
-			} else {									// merge returnList into freeHead
-				#ifdef __STATISTICS__
-				stats.return_pulls += 1;
-				#endif // __STATISTICS__
-
-				// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
-
-				freeHead->freeList = block->header.kind.real.next;
 			} // if
 			#endif // OWNERSHIP
@@ -1026,4 +996,5 @@
   if ( unlikely( size > ULONG_MAX - __page_size ) ) return 0p;
 		tsize = ceiling2( tsize, __page_size );			// must be multiple of page size
+
 		#ifdef __STATISTICS__
 		stats.counters[STAT_NAME].alloc += tsize;
@@ -1042,11 +1013,12 @@
 			if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
 			// Do not call strerror( errno ) as it may call malloc.
-			abort( "**** Error **** attempt to allocate large object (> %zu) of size %zu bytes and mmap failed with errno %d.", size, heapMaster.mmapStart, errno );
+			abort( "**** Error **** attempt to allocate large object (> %zu) of size %zu bytes and mmap failed with errno %d.",
+				   size, heapMaster.mmapStart, errno );
 		} // if
 		block->header.kind.real.blockSize = MarkMmappedBit( tsize ); // storage size for munmap
 
 		#ifdef __CFA_DEBUG__
-		// Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first 1024 bytes.  The rest of
-		// the storage set to 0 by mmap.
+		// Scrub new memory so subsequent uninitialized usages might fail. Only scrub the first SCRUB_SIZE bytes. The
+		// rest of the storage set to 0 by mmap.
 		memset( block->data, SCRUB, min( SCRUB_SIZE, tsize - sizeof(Heap.Storage) ) );
 		#endif // __CFA_DEBUG__
@@ -1126,4 +1098,5 @@
 		#endif // __CFA_DEBUG__
 
+		#ifdef OWNERSHIP
 		if ( likely( heapManager == freeHead->homeManager ) ) { // belongs to this thread
 			header->kind.real.next = freeHead->freeList; // push on stack
@@ -1132,5 +1105,4 @@
 			verify( heapManager );
 
-			#ifdef OWNERSHIP
 			#ifdef RETURNSPIN
 			lock( freeHead->returnLock );
@@ -1141,23 +1113,24 @@
 			header->kind.real.next = freeHead->returnList; // link new node to top node
 			// CAS resets header->kind.real.next = freeHead->returnList on failure
-			while ( ! __atomic_compare_exchange_n( &freeHead->returnList, &header->kind.real.next, header,
+			while ( ! __atomic_compare_exchange_n( &freeHead->returnList, &header->kind.real.next, (Heap.Storage *)header,
 												   false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) );
 			#endif // RETURNSPIN
-
-			#else										// no OWNERSHIP
-
-			freeHead = &heap->freeLists[ClearStickyBits( header->kind.real.home ) - &freeHead->homeManager->freeLists[0]];
-			header->kind.real.next = freeHead->freeList; // push on stack
-			freeHead->freeList = (Heap.Storage *)header;
-			#endif // ! OWNERSHIP
-
-			#ifdef __U_STATISTICS__
-			stats.return_pushes += 1;
-			stats.return_storage_request += rsize;
-			stats.return_storage_alloc += size;
-			#endif // __U_STATISTICS__
-
-			// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
-		} // if
+		} // if
+
+		#else											// no OWNERSHIP
+
+		// kind.real.home is address in owner thread's freeLists, so compute the equivalent position in this thread's freeList.
+		freeHead = &freeLists[ClearStickyBits( (Heap.FreeHeader *)(header->kind.real.home) ) - &freeHead->homeManager->freeLists[0]];
+		header->kind.real.next = freeHead->freeList;	// push on stack
+		freeHead->freeList = (Heap.Storage *)header;
+		#endif // ! OWNERSHIP
+
+		#ifdef __U_STATISTICS__
+		stats.return_pushes += 1;
+		stats.return_storage_request += rsize;
+		stats.return_storage_alloc += size;
+		#endif // __U_STATISTICS__
+
+		// OK TO BE PREEMPTED HERE AS heapManager IS NO LONGER ACCESSED.
 	} // if
 
@@ -1186,14 +1159,5 @@
 		#endif // __STATISTICS__
 
-		#if BUCKETLOCK == SPINLOCK
 		for ( Heap.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
-		#else
-			for(;;) {
-//		for ( Heap.Storage * p = top( freeLists[i].freeList ); p != 0p; p = (p)`next->top ) {
-//		for ( Heap.Storage * p = top( freeLists[i].freeList ); p != 0p; /* p = getNext( p )->top */) {
-//			Heap.Storage * temp = p->header.kind.real.next.top; // FIX ME: direct assignent fails, initialization works`
-//			typeof(p) temp = (( p )`next)->top;			// FIX ME: direct assignent fails, initialization works`
-//			p = temp;
-		#endif // BUCKETLOCK
 			total += size;
 			#ifdef __STATISTICS__
Index: libcfa/src/interpose.cfa
===================================================================
--- libcfa/src/interpose.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ libcfa/src/interpose.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -42,8 +42,30 @@
 
 typedef void (* generic_fptr_t)(void);
+static generic_fptr_t do_interpose_symbol( void * library, const char symbol[], const char version[] ) {
+	const char * error;
+
+	union { generic_fptr_t fptr; void * ptr; } originalFunc;
+
+	#if defined( _GNU_SOURCE )
+		if ( version ) {
+			originalFunc.ptr = dlvsym( library, symbol, version );
+		} else {
+			originalFunc.ptr = dlsym( library, symbol );
+		}
+	#else
+		originalFunc.ptr = dlsym( library, symbol );
+	#endif // _GNU_SOURCE
+
+	error = dlerror();
+	if ( error ) abort( "interpose_symbol : internal error, %s\n", error );
+
+	return originalFunc.fptr;
+}
+
 static generic_fptr_t interpose_symbol( const char symbol[], const char version[] ) {
 	const char * error;
 
 	static void * library;
+	static void * pthread_library;
 	if ( ! library ) {
 		#if defined( RTLD_NEXT )
@@ -58,21 +80,18 @@
 		#endif
 	} // if
-
-	union { generic_fptr_t fptr; void * ptr; } originalFunc;
-
-	#if defined( _GNU_SOURCE )
-		if ( version ) {
-			originalFunc.ptr = dlvsym( library, symbol, version );
-		} else {
-			originalFunc.ptr = dlsym( library, symbol );
-		}
-	#else
-		originalFunc.ptr = dlsym( library, symbol );
-	#endif // _GNU_SOURCE
-
-	error = dlerror();
-	if ( error ) abort( "interpose_symbol : internal error, %s\n", error );
-
-	return originalFunc.fptr;
+	if ( ! pthread_library ) {
+		#if defined( RTLD_NEXT )
+			pthread_library = RTLD_NEXT;
+		#else
+			// missing RTLD_NEXT => must hard-code library name, assuming libstdc++
+			pthread_library = dlopen( "libpthread.so", RTLD_LAZY );
+			error = dlerror();
+			if ( error ) {
+				abort( "interpose_symbol : failed to open libpthread, %s\n", error );
+			}
+		#endif
+	} // if
+
+	return do_interpose_symbol(library, symbol, version);
 }
 
@@ -97,4 +116,5 @@
 
 extern "C" {
+	void __cfathreadabi_interpose_startup( generic_fptr_t (*do_interpose_symbol)( void * library, const char symbol[], const char version[] ) ) __attribute__((weak));
 	void __cfaabi_interpose_startup( void ) {
 		const char *version = 0p;
@@ -108,4 +128,6 @@
 		INTERPOSE_LIBC( exit , version );
 #pragma GCC diagnostic pop
+
+		if(__cfathreadabi_interpose_startup) __cfathreadabi_interpose_startup( do_interpose_symbol );
 
 		// As a precaution (and necessity), errors that result in termination are delivered on a separate stack because
Index: libcfa/src/interpose_thread.cfa
===================================================================
--- libcfa/src/interpose_thread.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ libcfa/src/interpose_thread.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,137 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// interpose_thread.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Sep 21 11:55:16 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <stdarg.h>										// va_start, va_end
+#include <stdio.h>
+#include <string.h>										// strlen
+#include <signal.h>
+#include <pthread.h>
+extern "C" {
+#include <dlfcn.h>										// dlopen, dlsym
+#include <execinfo.h>									// backtrace, messages
+}
+
+#include "bits/debug.hfa"
+#include "bits/defs.hfa"
+#include <assert.h>
+
+//=============================================================================================
+// Interposing helpers
+//=============================================================================================
+
+typedef void (* generic_fptr_t)(void);
+
+generic_fptr_t interpose_symbol(
+	generic_fptr_t (*do_interpose_symbol)( void * library, const char symbol[], const char version[] ),
+	const char symbol[],
+	const char version[]
+) libcfa_public {
+	const char * error;
+
+	static void * library;
+	if ( ! library ) {
+		#if defined( RTLD_NEXT )
+			library = RTLD_NEXT;
+		#else
+			// missing RTLD_NEXT => must hard-code library name, assuming libstdc++
+			library = dlopen( "libpthread.so", RTLD_LAZY );
+			error = dlerror();
+			if ( error ) {
+				abort( "interpose_symbol : failed to open libpthread, %s\n", error );
+			}
+		#endif
+	} // if
+
+	return do_interpose_symbol(library, symbol, version);
+}
+
+#define INTERPOSE( x, ver ) __cabi_libpthread.x = (typeof(__cabi_libpthread.x))interpose_symbol( do_interpose_symbol, #x, ver )
+
+//=============================================================================================
+// Interposition Startup logic
+//=============================================================================================
+
+static struct {
+	int (*pthread_create)(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
+	int (*pthread_join)(pthread_t _thread, void **retval);
+	pthread_t (*pthread_self)(void);
+	int (*pthread_attr_init)(pthread_attr_t *attr);
+	int (*pthread_attr_destroy)(pthread_attr_t *attr);
+	int (*pthread_attr_setstack)( pthread_attr_t *attr, void *stackaddr, size_t stacksize );
+	int (*pthread_attr_getstacksize)( const pthread_attr_t *attr, size_t *stacksize );
+	int (*pthread_sigmask)(int how, const sigset_t *set, sigset_t *oldset);
+	int (*pthread_sigqueue)(pthread_t _thread, int sig, const union sigval value);
+	int (*pthread_once)(pthread_once_t *once_control, void (*init_routine)(void));
+} __cabi_libpthread;
+
+extern "C" {
+	void __cfathreadabi_interpose_startup( generic_fptr_t (*do_interpose_symbol)( void * library, const char symbol[], const char version[] ) ) libcfa_public {
+		const char *version = 0p;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdiscarded-qualifiers"
+		INTERPOSE( pthread_create , version );
+		INTERPOSE( pthread_join , version );
+		INTERPOSE( pthread_self , version );
+		INTERPOSE( pthread_attr_init , version );
+		INTERPOSE( pthread_attr_destroy , version );
+		INTERPOSE( pthread_attr_setstack , version );
+		INTERPOSE( pthread_attr_getstacksize , version );
+		INTERPOSE( pthread_sigmask , version );
+		INTERPOSE( pthread_sigqueue , version );
+		INTERPOSE( pthread_once , version );
+#pragma GCC diagnostic pop
+	}
+
+	int __cfaabi_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg){
+		return __cabi_libpthread.pthread_create(_thread, attr, start_routine, arg);
+	}
+
+	int __cfaabi_pthread_join(pthread_t _thread, void **retval){
+		return __cabi_libpthread.pthread_join(_thread, retval);
+	}
+
+	pthread_t __cfaabi_pthread_self(void){
+		return __cabi_libpthread.pthread_self();
+	}
+
+	int __cfaabi_pthread_attr_init(pthread_attr_t *attr){
+		return __cabi_libpthread.pthread_attr_init(attr);
+	}
+
+	int __cfaabi_pthread_attr_destroy(pthread_attr_t *attr){
+		return __cabi_libpthread.pthread_attr_destroy(attr);
+	}
+
+	int __cfaabi_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ){
+		return __cabi_libpthread.pthread_attr_setstack(attr, stackaddr, stacksize);
+	}
+
+	int read_pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ){
+		return __cabi_libpthread.pthread_attr_getstacksize(attr, stacksize);
+	}
+
+	int __cfaabi_pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset){
+		return __cabi_libpthread.pthread_sigmask(how, set, oldset);
+	}
+
+	int __cfaabi_pthread_sigqueue(pthread_t _thread, int sig, const union sigval value) {
+		return __cabi_libpthread.pthread_sigqueue(_thread, sig, value);
+	}
+
+	int __cfaabi_pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) {
+		return __cabi_libpthread.pthread_once(once_control, init_routine);
+	}
+}
Index: src/AST/Convert.cpp
===================================================================
--- src/AST/Convert.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Convert.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -234,4 +234,11 @@
 		}
 		return declWithTypePostamble( decl, node );
+	}
+
+	// InlineMemberDecl vanish after EnumAndPointerDecay pass so no necessary to implement NewToOld
+	const ast::DeclWithType * visit( const ast::InlineMemberDecl * node ) override final {	
+		assert( false );
+		(void) node;
+		return nullptr;
 	}
 
@@ -1614,5 +1621,4 @@
 			{ old->get_funcSpec().val }
 		);
-		decl->enumInLine = old->enumInLine;
 		cache.emplace(old, decl);
 		assert(cache.find( old ) != cache.end());
@@ -1859,4 +1865,32 @@
 		decl->uniqueId   = old->uniqueId;
 		decl->storage    = { old->storageClasses.val };
+
+		this->node = decl;
+	}
+
+	virtual void visit( const InlineMemberDecl * old ) override final {
+		if ( inCache( old ) ) {
+			return;
+		}
+		auto&& type = GET_ACCEPT_1(type, Type);
+		auto&& attr = GET_ACCEPT_V(attributes, Attribute);
+
+		auto decl = new ast::InlineMemberDecl(
+			old->location,
+			old->name,
+			type,
+			{ old->get_storageClasses().val },
+			{ old->linkage.val },
+			std::move(attr),
+			{ old->get_funcSpec().val }
+		);
+		cache.emplace(old, decl);
+		assert(cache.find( old ) != cache.end());
+		decl->scopeLevel = old->scopeLevel;
+		decl->mangleName = old->mangleName;
+		decl->isDeleted  = old->isDeleted;
+		decl->asmName    = GET_ACCEPT_1(asmName, Expr);
+		decl->uniqueId   = old->uniqueId;
+		decl->extension  = old->extension;
 
 		this->node = decl;
Index: src/AST/Decl.hpp
===================================================================
--- src/AST/Decl.hpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Decl.hpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -105,7 +105,4 @@
 	ptr<Init> init;
 	ptr<Expr> bitfieldWidth;
-	bool enumInLine = false; // enum inline is not a real object declaration. 
-	// It is a place holder for a set of enum value (ObjectDecl)
-	bool importValue = false; // if the value copied from somewhere else
 
 	ObjectDecl( const CodeLocation & loc, const std::string & name, const Type * type,
@@ -400,4 +397,5 @@
 };
 
+/// Static Assertion `_Static_assert( ... , ... );`
 class StaticAssertDecl : public Decl {
 public:
@@ -411,4 +409,23 @@
 private:
 	StaticAssertDecl * clone() const override { return new StaticAssertDecl( *this ); }
+	MUTATE_FRIEND
+};
+
+/// Inline Member Declaration `inline TypeName;`
+class InlineMemberDecl final : public DeclWithType {
+public:
+	ptr<Type> type;
+
+	InlineMemberDecl( const CodeLocation & loc, const std::string & name, const Type * type,
+		Storage::Classes storage = {}, Linkage::Spec linkage = Linkage::Cforall,
+		std::vector< ptr<Attribute> > && attrs = {}, Function::Specs fs = {} )
+	: DeclWithType( loc, name, storage, linkage, std::move(attrs), fs ), type( type ) {}
+
+	const Type * get_type() const override { return type; }
+	void set_type( const Type * ty ) override { type = ty; }
+
+	const DeclWithType * accept( Visitor& v ) const override { return v.visit( this ); }
+private:
+	InlineMemberDecl * clone() const override { return new InlineMemberDecl{ *this }; }
 	MUTATE_FRIEND
 };
Index: src/AST/Fwd.hpp
===================================================================
--- src/AST/Fwd.hpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Fwd.hpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -37,4 +37,5 @@
 class DirectiveDecl;
 class StaticAssertDecl;
+class InlineMemberDecl;
 
 class Stmt;
Index: src/AST/Pass.hpp
===================================================================
--- src/AST/Pass.hpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Pass.hpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -141,4 +141,5 @@
 	const ast::DirectiveDecl *    visit( const ast::DirectiveDecl        * ) override final;
 	const ast::StaticAssertDecl * visit( const ast::StaticAssertDecl     * ) override final;
+	const ast::DeclWithType	*     visit( const ast::InlineMemberDecl     * ) override final;
 	const ast::CompoundStmt *     visit( const ast::CompoundStmt         * ) override final;
 	const ast::Stmt *             visit( const ast::ExprStmt             * ) override final;
Index: src/AST/Pass.impl.hpp
===================================================================
--- src/AST/Pass.impl.hpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Pass.impl.hpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -617,4 +617,5 @@
 				maybe_accept( node, &FunctionDecl::returns );
 				maybe_accept( node, &FunctionDecl::type );
+				maybe_accept( node, &FunctionDecl::attributes );
 				// First remember that we are now within a function.
 				ValueGuard< bool > oldInFunction( inFunction );
@@ -625,5 +626,4 @@
 				atFunctionTop = true;
 				maybe_accept( node, &FunctionDecl::stmts );
-				maybe_accept( node, &FunctionDecl::attributes );
 			}
 		}
@@ -800,4 +800,20 @@
 
 	VISIT_END( StaticAssertDecl, node );
+}
+
+//--------------------------------------------------------------------------
+// InlineMemberDecl
+template< typename core_t >
+const ast::DeclWithType * ast::Pass< core_t >::visit( const ast::InlineMemberDecl * node ) {
+	VISIT_START( node );
+
+	if ( __visit_children() ) {
+		{
+			guard_symtab guard { *this };
+			maybe_accept( node, &InlineMemberDecl::type );
+		}
+	}
+
+	VISIT_END( DeclWithType, node );
 }
 
Index: src/AST/Print.cpp
===================================================================
--- src/AST/Print.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Print.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -398,4 +398,11 @@
 	virtual const ast::Decl * visit( const ast::StructDecl * node ) override final {
 		print(node);
+		return node;
+	}
+
+	virtual const ast::DeclWithType * visit( const ast::InlineMemberDecl * node ) override final {
+		os << "inline ";
+		if ( ! node->name.empty() ) os << node->name;
+
 		return node;
 	}
Index: src/AST/Type.cpp
===================================================================
--- src/AST/Type.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Type.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -147,4 +147,10 @@
 // --- TypeInstType
 
+bool TypeInstType::operator==( const TypeInstType & other ) const {
+	return base == other.base
+		&& formal_usage == other.formal_usage
+		&& expr_id == other.expr_id;
+}
+
 TypeInstType::TypeInstType( const TypeDecl * b,
 	CV::Qualifiers q, std::vector<ptr<Attribute>> && as )
@@ -157,4 +163,33 @@
 
 bool TypeInstType::isComplete() const { return base->sized; }
+
+std::string TypeInstType::TypeEnvKey::typeString() const {
+	return std::string("_") + std::to_string(formal_usage)
+		+ "_" + std::to_string(expr_id) + "_" + base->name;
+}
+
+bool TypeInstType::TypeEnvKey::operator==(
+		const TypeInstType::TypeEnvKey & other ) const {
+	return base == other.base
+		&& formal_usage == other.formal_usage
+		&& expr_id == other.expr_id;
+}
+
+bool TypeInstType::TypeEnvKey::operator<(
+		const TypeInstType::TypeEnvKey & other ) const {
+	// TypeEnvKey ordering is an arbitrary total ordering.
+	// It doesn't mean anything but allows for a sorting.
+	if ( base < other.base ) {
+		return true;
+	} else if ( other.base < base ) {
+		return false;
+	} else if ( formal_usage < other.formal_usage ) {
+		return true;
+	} else if ( other.formal_usage < formal_usage ) {
+		return false;
+	} else {
+		return expr_id < other.expr_id;
+	}
+}
 
 // --- TupleType
Index: src/AST/Type.hpp
===================================================================
--- src/AST/Type.hpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Type.hpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -408,11 +408,14 @@
 
 		TypeEnvKey() = default;
-		TypeEnvKey(const TypeDecl * base, int formal_usage = 0, int expr_id = 0): base(base), formal_usage(formal_usage), expr_id(expr_id) {}
-		TypeEnvKey(const TypeInstType & inst): base(inst.base), formal_usage(inst.formal_usage), expr_id(inst.expr_id) {}
-		std::string typeString() const { return std::string("_") + std::to_string(formal_usage) + "_" + std::to_string(expr_id) + "_" + base->name; }
-		bool operator==(const TypeEnvKey & other) const { return base == other.base && formal_usage == other.formal_usage && expr_id == other.expr_id; }
+		TypeEnvKey(const TypeDecl * base, int formal_usage = 0, int expr_id = 0)
+		: base(base), formal_usage(formal_usage), expr_id(expr_id) {}
+		TypeEnvKey(const TypeInstType & inst)
+		: base(inst.base), formal_usage(inst.formal_usage), expr_id(inst.expr_id) {}
+		std::string typeString() const;
+		bool operator==(const TypeEnvKey & other) const;
+		bool operator<(const TypeEnvKey & other) const;
 	};
 
-	bool operator==(const TypeInstType & other) const { return base == other.base && formal_usage == other.formal_usage && expr_id == other.expr_id; }
+	bool operator==(const TypeInstType & other) const;
 
 	TypeInstType(
Index: src/AST/Visitor.hpp
===================================================================
--- src/AST/Visitor.hpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/AST/Visitor.hpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -33,4 +33,5 @@
     virtual const ast::DirectiveDecl *    visit( const ast::DirectiveDecl        * ) = 0;
     virtual const ast::StaticAssertDecl * visit( const ast::StaticAssertDecl     * ) = 0;
+    virtual const ast::DeclWithType *     visit( const ast::InlineMemberDecl     * ) = 0;
     virtual const ast::CompoundStmt *     visit( const ast::CompoundStmt         * ) = 0;
     virtual const ast::Stmt *             visit( const ast::ExprStmt             * ) = 0;
Index: src/Common/CodeLocationTools.cpp
===================================================================
--- src/Common/CodeLocationTools.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Common/CodeLocationTools.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -111,4 +111,5 @@
     macro(DirectiveDecl, DirectiveDecl) \
     macro(StaticAssertDecl, StaticAssertDecl) \
+    macro(InlineMemberDecl, DeclWithType) \
     macro(CompoundStmt, CompoundStmt) \
     macro(ExprStmt, Stmt) \
Index: src/Common/PassVisitor.h
===================================================================
--- src/Common/PassVisitor.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Common/PassVisitor.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -81,4 +81,6 @@
 	virtual void visit( StaticAssertDecl * assertDecl ) override final;
 	virtual void visit( const StaticAssertDecl * assertDecl ) override final;
+	virtual void visit( InlineMemberDecl * valueDecl ) override final;
+	virtual void visit( const InlineMemberDecl * valueDecl ) override final;
 
 	virtual void visit( CompoundStmt * compoundStmt ) override final;
@@ -273,4 +275,5 @@
 	virtual DirectiveDecl * mutate( DirectiveDecl * directiveDecl ) override final;
 	virtual StaticAssertDecl * mutate( StaticAssertDecl * assertDecl ) override final;
+	virtual DeclarationWithType * mutate( InlineMemberDecl * valueDecl ) override final;
 
 	virtual CompoundStmt * mutate( CompoundStmt * compoundStmt ) override final;
Index: src/Common/PassVisitor.impl.h
===================================================================
--- src/Common/PassVisitor.impl.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Common/PassVisitor.impl.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -607,4 +607,5 @@
 			indexerAddId( &func );
 			maybeMutate_impl( node->type, *this );
+			maybeMutate_impl( node->attributes, *this );
 			// First remember that we are now within a function.
 			ValueGuard< bool > oldInFunction( inFunction );
@@ -615,5 +616,4 @@
 			atFunctionTop = true;
 			maybeMutate_impl( node->statements, *this );
-			maybeMutate_impl( node->attributes, *this );
 		}
 	}
@@ -1044,4 +1044,33 @@
 
 	MUTATE_END( StaticAssertDecl, node );
+}
+
+//--------------------------------------------------------------------------
+// InlineMemberDecl
+template< typename pass_type >
+void PassVisitor< pass_type >::visit( InlineMemberDecl * node ) {
+	VISIT_START( node );
+
+	maybeAccept_impl( node->type, *this );
+
+	VISIT_END( node );
+}
+
+template< typename pass_type >
+void PassVisitor< pass_type >::visit( const InlineMemberDecl * node ) {
+	VISIT_START( node );
+
+	maybeAccept_impl( node->type, *this );
+
+	VISIT_END( node );
+}
+
+template< typename pass_type >
+DeclarationWithType * PassVisitor< pass_type >::mutate( InlineMemberDecl * node ) {
+	MUTATE_START( node );
+
+	maybeMutate_impl( node->type, *this );
+
+	MUTATE_END( DeclarationWithType, node );
 }
 
Index: src/Common/utility.h
===================================================================
--- src/Common/utility.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Common/utility.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -452,47 +452,81 @@
 
 // -----------------------------------------------------------------------------
-// Helper struct and function to support
-// for ( val : group_iterate( container1, container2, ... ) ) {}
-// syntax to have a for each that iterates multiple containers of the same length
-// TODO: update to use variadic arguments
-
-template< typename T1, typename T2 >
-struct group_iterate_t {
-private:
-	std::tuple<T1, T2> args;
+// Helper struct and function to support:
+// for ( auto val : group_iterate( container1, container2, ... ) ) { ... }
+// This iteraters through multiple containers of the same size.
+
+template<typename... Args>
+class group_iterate_t {
+	using Iterables = std::tuple<Args...>;
+	Iterables iterables;
+
+	// Getting the iterator and value types this way preserves const.
+	template<size_t I> using Iter = decltype(std::get<I>(iterables).begin());
+	template<size_t I> using Data = decltype(*std::get<I>(iterables).begin());
+	template<typename> struct base_iterator;
+
+	// This inner template puts the sequence of `0, 1, ... sizeof...(Args)-1`
+	// into a pack. These are the indexes into the tuples, so unpacking can
+	// go over each element of the tuple.
+	// The std::integer_sequence is just used to build that sequence.
+	// A library reference will probably explain it better than I can.
+	template<std::size_t... Indices>
+	struct base_iterator<std::integer_sequence<std::size_t, Indices...>> {
+		using value_type = std::tuple< Data<Indices>... >;
+		std::tuple<Iter<Indices>...> iterators;
+
+		base_iterator( Iter<Indices>... is ) : iterators( is... ) {}
+		base_iterator operator++() {
+			return base_iterator( ++std::get<Indices>( iterators )... );
+		}
+		bool operator!=( const base_iterator& other ) const {
+			return iterators != other.iterators;
+		}
+		value_type operator*() const {
+			return std::tie( *std::get<Indices>( iterators )... );
+		}
+
+		static base_iterator make_begin( Iterables & data ) {
+			return base_iterator( std::get<Indices>( data ).begin()... );
+		}
+		static base_iterator make_end( Iterables & data ) {
+			return base_iterator( std::get<Indices>( data ).end()... );
+		}
+	};
+
 public:
-	group_iterate_t( bool skipBoundsCheck, const T1 & v1, const T2 & v2 ) : args(v1, v2) {
-		assertf(skipBoundsCheck || v1.size() == v2.size(), "group iteration requires containers of the same size: <%zd, %zd>.", v1.size(), v2.size());
-	};
-
-	typedef std::tuple<decltype(*std::get<0>(args).begin()), decltype(*std::get<1>(args).begin())> value_type;
-	typedef decltype(std::get<0>(args).begin()) T1Iter;
-	typedef decltype(std::get<1>(args).begin()) T2Iter;
-
-	struct iterator {
-		typedef std::tuple<T1Iter, T2Iter> IterTuple;
-		IterTuple it;
-		iterator( T1Iter i1, T2Iter i2 ) : it( i1, i2 ) {}
-		iterator operator++() {
-			return iterator( ++std::get<0>(it), ++std::get<1>(it) );
-		}
-		bool operator!=( const iterator &other ) const { return it != other.it; }
-		value_type operator*() const { return std::tie( *std::get<0>(it), *std::get<1>(it) ); }
-	};
-
-	iterator begin() { return iterator( std::get<0>(args).begin(), std::get<1>(args).begin() ); }
-	iterator end() { return iterator( std::get<0>(args).end(), std::get<1>(args).end() ); }
-};
-
-/// performs bounds check to ensure that all arguments are of the same length.
+	group_iterate_t( const Args &... args ) : iterables( args... ) {}
+
+	using iterator = base_iterator<decltype(
+		std::make_integer_sequence<std::size_t, sizeof...(Args)>())>;
+
+	iterator begin() { return iterator::make_begin( iterables ); }
+	iterator end() { return iterator::make_end( iterables ); }
+};
+
+// Helpers for the bounds checks (the non-varatic part of group_iterate):
+static inline void runGroupBoundsCheck(size_t size0, size_t size1) {
+	assertf( size0 == size1,
+		"group iteration requires containers of the same size: <%zd, %zd>.",
+		size0, size1 );
+}
+
+static inline void runGroupBoundsCheck(size_t size0, size_t size1, size_t size2) {
+	assertf( size0 == size1 && size1 == size2,
+		"group iteration requires containers of the same size: <%zd, %zd, %zd>.",
+		size0, size1, size2 );
+}
+
+/// Performs bounds check to ensure that all arguments are of the same length.
 template< typename... Args >
 group_iterate_t<Args...> group_iterate( Args &&... args ) {
-	return group_iterate_t<Args...>(false, std::forward<Args>( args )...);
-}
-
-/// does not perform a bounds check - requires user to ensure that iteration terminates when appropriate.
+	runGroupBoundsCheck( args.size()... );
+	return group_iterate_t<Args...>( std::forward<Args>( args )... );
+}
+
+/// Does not perform a bounds check - requires user to ensure that iteration terminates when appropriate.
 template< typename... Args >
 group_iterate_t<Args...> unsafe_group_iterate( Args &&... args ) {
-	return group_iterate_t<Args...>(true, std::forward<Args>( args )...);
+	return group_iterate_t<Args...>( std::forward<Args>( args )... );
 }
 
Index: src/GenPoly/Box.cc
===================================================================
--- src/GenPoly/Box.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/Box.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -58,5 +58,5 @@
 namespace GenPoly {
 	namespace {
-		FunctionType *makeAdapterType( FunctionType *adaptee, const TyVarMap &tyVars );
+		FunctionType *makeAdapterType( FunctionType const *adaptee, const TyVarMap &tyVars );
 
 		class BoxPass {
@@ -68,8 +68,5 @@
 		/// Adds layout-generation functions to polymorphic types.
 		class LayoutFunctionBuilder final : public WithDeclsToAdd, public WithVisitorRef<LayoutFunctionBuilder>, public WithShortCircuiting {
-			// Current level of nested functions:
-			unsigned int functionNesting = 0;
 		public:
-			void previsit( FunctionDecl *functionDecl );
 			void previsit( StructDecl *structDecl );
 			void previsit( UnionDecl *unionDecl );
@@ -100,21 +97,32 @@
 			void passArgTypeVars( ApplicationExpr *appExpr, Type *parmType, Type *argBaseType, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars, std::set< std::string > &seenTypes );
 			/// passes extra type parameters into a polymorphic function application
-			void passTypeVars( ApplicationExpr *appExpr, Type *polyRetType, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars );
+			/// Returns an iterator to the first argument after the added
+			/// arguments, which are added at the beginning.
+			std::list< Expression *>::iterator passTypeVars( ApplicationExpr *appExpr, Type *polyRetType, const TyVarMap &exprTyVars );
 			/// wraps a function application with a new temporary for the out-parameter return value
-			Expression *addRetParam( ApplicationExpr *appExpr, Type *retType, std::list< Expression *>::iterator &arg );
-			/// Replaces all the type parameters of a generic type with their concrete equivalents under the current environment
-			void replaceParametersWithConcrete( ApplicationExpr *appExpr, std::list< Expression* >& params );
-			/// Replaces a polymorphic type with its concrete equivalant under the current environment (returns itself if concrete).
-			/// If `doClone` is set to false, will not clone interior types
-			Type *replaceWithConcrete( ApplicationExpr *appExpr, Type *type, bool doClone = true );
+			/// The new out-parameter is the new first parameter.
+			Expression *addRetParam( ApplicationExpr *appExpr, Type *retType );
 			/// wraps a function application returning a polymorphic type with a new temporary for the out-parameter return value
-			Expression *addDynRetParam( ApplicationExpr *appExpr, Type *polyType, std::list< Expression *>::iterator &arg );
-			Expression *applyAdapter( ApplicationExpr *appExpr, FunctionType *function, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars );
-			void boxParam( Type *formal, Expression *&arg, const TyVarMap &exprTyVars );
-			void boxParams( ApplicationExpr *appExpr, FunctionType *function, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars );
-			void addInferredParams( ApplicationExpr *appExpr, FunctionType *functionType, std::list< Expression *>::iterator &arg, const TyVarMap &tyVars );
+			Expression *addDynRetParam( ApplicationExpr *appExpr, Type *polyType );
+			/// Converts a function call into a call of the adapter with the
+			/// original function as the first argument (all other arguments
+			/// are pushed back). May adjust return value.
+			Expression *applyAdapter( ApplicationExpr *appExpr, FunctionType *function );
+			/// Modifies the `arg`, replacing it with a boxed expression
+			/// that matches `formal` under the current TyVarMap.
+			void boxParam( Expression *&arg, Type *formal, const TyVarMap &exprTyVars );
+			/// Box an argument of `appExpr` for each parameter in `function`
+			/// starting at `arg`.
+			/// `exprTyVars` is the function's type variables.
+			void boxParams( ApplicationExpr *appExpr, std::list< Expression *>::iterator arg, FunctionType *function, const TyVarMap &exprTyVars );
+			/// Boxes each assertion and inserts them into `appExpr` at
+			/// `arg`. `exprTyVars` is the function's type variables.
+			void addInferredParams( ApplicationExpr *appExpr, std::list< Expression *>::iterator arg, FunctionType *functionType, const TyVarMap &tyVars );
 			/// Stores assignment operators from assertion list in local map of assignment operations
 			void passAdapters( ApplicationExpr *appExpr, FunctionType *functionType, const TyVarMap &exprTyVars );
-			FunctionDecl *makeAdapter( FunctionType *adaptee, FunctionType *realType, const std::string &mangleName, const TyVarMap &tyVars );
+			/// Creates an adapter definition from `adaptee` to `realType`, using
+			/// `mangleName` as the base name for the adapter. `tyVars` is the map of
+			/// type variables for the function type of the adapted expression.
+			FunctionDecl *makeAdapter( FunctionType const *adaptee, FunctionType *realType, const std::string &mangleName, const TyVarMap &tyVars );
 			/// Replaces intrinsic operator functions with their arithmetic desugaring
 			Expression *handleIntrinsics( ApplicationExpr *appExpr );
@@ -182,5 +190,5 @@
 			ObjectDecl *makeVar( const std::string &name, Type *type, Initializer *init = 0 );
 			/// returns true if the type has a dynamic layout; such a layout will be stored in appropriately-named local variables when the function returns
-			bool findGeneric( Type *ty );
+			bool findGeneric( Type const *ty );
 			/// adds type parameters to the layout call; will generate the appropriate parameters if needed
 			void addOtypeParamsToLayoutCall( UntypedExpr *layoutCall, const std::list< Type* > &otypeParams );
@@ -221,30 +229,4 @@
 	} // anonymous namespace
 
-	/// version of mutateAll with special handling for translation unit so you can check the end of the prelude when debugging
-	template< typename MutatorType >
-	inline void mutateTranslationUnit( std::list< Declaration* > &translationUnit, MutatorType &mutator ) {
-		bool seenIntrinsic = false;
-		SemanticErrorException errors;
-		for ( typename std::list< Declaration* >::iterator i = translationUnit.begin(); i != translationUnit.end(); ++i ) {
-			try {
-				if ( *i ) {
-					if ( (*i)->get_linkage() == LinkageSpec::Intrinsic ) {
-						seenIntrinsic = true;
-					} else if ( seenIntrinsic ) {
-						seenIntrinsic = false; // break on this line when debugging for end of prelude
-					}
-
-					*i = dynamic_cast< Declaration* >( (*i)->acceptMutator( mutator ) );
-					assert( *i );
-				} // if
-			} catch( SemanticErrorException &e ) {
-				errors.append( e );
-			} // try
-		} // for
-		if ( ! errors.isEmpty() ) {
-			throw errors;
-		} // if
-	}
-
 	void box( std::list< Declaration *>& translationUnit ) {
 		PassVisitor<LayoutFunctionBuilder> layoutBuilder;
@@ -263,19 +245,11 @@
 	////////////////////////////////// LayoutFunctionBuilder ////////////////////////////////////////////
 
-	void LayoutFunctionBuilder::previsit( FunctionDecl *functionDecl ) {
-		visit_children = false;
-		maybeAccept( functionDecl->get_functionType(), *visitor );
-		++functionNesting;
-		maybeAccept( functionDecl->get_statements(), *visitor );
-		--functionNesting;
-	}
-
 	/// Get a list of type declarations that will affect a layout function
 	std::list< TypeDecl* > takeOtypeOnly( std::list< TypeDecl* > &decls ) {
 		std::list< TypeDecl * > otypeDecls;
 
-		for ( std::list< TypeDecl* >::const_iterator decl = decls.begin(); decl != decls.end(); ++decl ) {
-			if ( (*decl)->isComplete() ) {
-				otypeDecls.push_back( *decl );
+		for ( TypeDecl * const decl : decls ) {
+			if ( decl->isComplete() ) {
+				otypeDecls.push_back( decl );
 			}
 		}
@@ -288,6 +262,6 @@
 		BasicType sizeAlignType( Type::Qualifiers(), BasicType::LongUnsignedInt );
 
-		for ( std::list< TypeDecl* >::const_iterator param = otypeParams.begin(); param != otypeParams.end(); ++param ) {
-			TypeInstType paramType( Type::Qualifiers(), (*param)->get_name(), *param );
+		for ( TypeDecl * const param : otypeParams ) {
+			TypeInstType paramType( Type::Qualifiers(), param->get_name(), param );
 			std::string paramName = mangleType( &paramType );
 			layoutFnType->get_parameters().push_back( new ObjectDecl( sizeofName( paramName ), Type::StorageClasses(), LinkageSpec::Cforall, 0, sizeAlignType.clone(), 0 ) );
@@ -297,20 +271,13 @@
 
 	/// Builds a layout function declaration
-	FunctionDecl *buildLayoutFunctionDecl( AggregateDecl *typeDecl, unsigned int functionNesting, FunctionType *layoutFnType ) {
+	FunctionDecl *buildLayoutFunctionDecl( AggregateDecl *typeDecl, bool isInFunction, FunctionType *layoutFnType ) {
 		// Routines at global scope marked "static" to prevent multiple definitions is separate translation units
 		// because each unit generates copies of the default routines for each aggregate.
 		FunctionDecl *layoutDecl = new FunctionDecl( layoutofName( typeDecl ),
-													 functionNesting > 0 ? Type::StorageClasses() : Type::StorageClasses( Type::Static ),
+													 isInFunction ? Type::StorageClasses() : Type::StorageClasses( Type::Static ),
 													 LinkageSpec::AutoGen, layoutFnType, new CompoundStmt(),
 													 std::list< Attribute * >(), Type::FuncSpecifiers( Type::Inline ) );
 		layoutDecl->fixUniqueId();
 		return layoutDecl;
-	}
-
-	/// Makes a unary operation
-	Expression *makeOp( const std::string &name, Expression *arg ) {
-		UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) );
-		expr->args.push_back( arg );
-		return expr;
 	}
 
@@ -380,5 +347,5 @@
 
 		// build function decl
-		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( structDecl, functionNesting, layoutFnType );
+		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( structDecl, isInFunction(), layoutFnType );
 
 		// calculate struct layout in function body
@@ -387,14 +354,10 @@
 		addExpr( layoutDecl->get_statements(), makeOp( "?=?", derefVar( sizeParam ), new ConstantExpr( Constant::from_ulong( 0 ) ) ) );
 		addExpr( layoutDecl->get_statements(), makeOp( "?=?", derefVar( alignParam ), new ConstantExpr( Constant::from_ulong( 1 ) ) ) );
-		unsigned long n_members = 0;
-		bool firstMember = true;
-		for ( Declaration* member : structDecl->get_members() ) {
-			DeclarationWithType *dwt = dynamic_cast< DeclarationWithType * >( member );
+		for ( auto index_member : enumerate( structDecl->members ) ) {
+			DeclarationWithType *dwt = dynamic_cast< DeclarationWithType * >( index_member.val );
 			assert( dwt );
 			Type *memberType = dwt->get_type();
 
-			if ( firstMember ) {
-				firstMember = false;
-			} else {
+			if ( 0 < index_member.idx ) {
 				// make sure all members after the first (automatically aligned at 0) are properly padded for alignment
 				addStmt( layoutDecl->get_statements(), makeAlignTo( derefVar( sizeParam ), new AlignofExpr( memberType->clone() ) ) );
@@ -402,7 +365,6 @@
 
 			// place current size in the current offset index
-			addExpr( layoutDecl->get_statements(), makeOp( "?=?", makeOp( "?[?]", new VariableExpr( offsetParam ), new ConstantExpr( Constant::from_ulong( n_members ) ) ),
+			addExpr( layoutDecl->get_statements(), makeOp( "?=?", makeOp( "?[?]", new VariableExpr( offsetParam ), new ConstantExpr( Constant::from_ulong( index_member.idx ) ) ),
 			                                                      derefVar( sizeParam ) ) );
-			++n_members;
 
 			// add member size to current size
@@ -439,11 +401,11 @@
 
 		// build function decl
-		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( unionDecl, functionNesting, layoutFnType );
+		FunctionDecl *layoutDecl = buildLayoutFunctionDecl( unionDecl, isInFunction(), layoutFnType );
 
 		// calculate union layout in function body
 		addExpr( layoutDecl->get_statements(), makeOp( "?=?", derefVar( sizeParam ), new ConstantExpr( Constant::from_ulong( 1 ) ) ) );
 		addExpr( layoutDecl->get_statements(), makeOp( "?=?", derefVar( alignParam ), new ConstantExpr( Constant::from_ulong( 1 ) ) ) );
-		for ( std::list< Declaration* >::const_iterator member = unionDecl->get_members().begin(); member != unionDecl->get_members().end(); ++member ) {
-			DeclarationWithType *dwt = dynamic_cast< DeclarationWithType * >( *member );
+		for ( Declaration * const member : unionDecl->members ) {
+			DeclarationWithType *dwt = dynamic_cast< DeclarationWithType * >( member );
 			assert( dwt );
 			Type *memberType = dwt->get_type();
@@ -464,5 +426,5 @@
 
 	namespace {
-		std::string makePolyMonoSuffix( FunctionType * function, const TyVarMap &tyVars ) {
+		std::string makePolyMonoSuffix( FunctionType const * function, const TyVarMap &tyVars ) {
 			std::stringstream name;
 
@@ -473,6 +435,6 @@
 			// to take those polymorphic types as pointers. Therefore, there can be two different functions
 			// with the same mangled name, so we need to further mangle the names.
-			for ( std::list< DeclarationWithType *>::iterator retval = function->get_returnVals().begin(); retval != function->get_returnVals().end(); ++retval ) {
-				if ( isPolyType( (*retval)->get_type(), tyVars ) ) {
+			for ( DeclarationWithType const * const ret : function->returnVals ) {
+				if ( isPolyType( ret->get_type(), tyVars ) ) {
 					name << "P";
 				} else {
@@ -481,7 +443,6 @@
 			}
 			name << "_";
-			std::list< DeclarationWithType *> &paramList = function->get_parameters();
-			for ( std::list< DeclarationWithType *>::iterator arg = paramList.begin(); arg != paramList.end(); ++arg ) {
-				if ( isPolyType( (*arg)->get_type(), tyVars ) ) {
+			for ( DeclarationWithType const * const arg : function->parameters ) {
+				if ( isPolyType( arg->get_type(), tyVars ) ) {
 					name << "P";
 				} else {
@@ -492,5 +453,5 @@
 		}
 
-		std::string mangleAdapterName( FunctionType * function, const TyVarMap &tyVars ) {
+		std::string mangleAdapterName( FunctionType const * function, const TyVarMap &tyVars ) {
 			return SymTab::Mangler::mangle( function ) + makePolyMonoSuffix( function, tyVars );
 		}
@@ -499,4 +460,8 @@
 			return "_adapter" + mangleName;
 		}
+
+		/// Replaces a polymorphic type with its concrete equivalant under the current environment (returns itself if concrete).
+		/// If `doClone` is set to false, will not clone interior types
+		Type *replaceWithConcrete( Type *type, TypeSubstitution const * env, bool doClone = true );
 
 		Pass1::Pass1() : tempNamer( "_temp" ) {}
@@ -524,19 +489,19 @@
 
 				std::list< DeclarationWithType *> &paramList = functionType->parameters;
-				std::list< FunctionType *> functions;
-				for ( Type::ForallList::iterator tyVar = functionType->forall.begin(); tyVar != functionType->forall.end(); ++tyVar ) {
-					for ( std::list< DeclarationWithType *>::iterator assert = (*tyVar)->assertions.begin(); assert != (*tyVar)->assertions.end(); ++assert ) {
-						findFunction( (*assert)->get_type(), functions, scopeTyVars, needsAdapter );
+				std::list< FunctionType const *> functions;
+				for ( TypeDecl * const tyVar : functionType->forall ) {
+					for ( DeclarationWithType * const assert : tyVar->assertions ) {
+						findFunction( assert->get_type(), functions, scopeTyVars, needsAdapter );
 					} // for
 				} // for
-				for ( std::list< DeclarationWithType *>::iterator arg = paramList.begin(); arg != paramList.end(); ++arg ) {
-					findFunction( (*arg)->get_type(), functions, scopeTyVars, needsAdapter );
+				for ( DeclarationWithType * const arg : paramList ) {
+					findFunction( arg->get_type(), functions, scopeTyVars, needsAdapter );
 				} // for
 
-				for ( std::list< FunctionType *>::iterator funType = functions.begin(); funType != functions.end(); ++funType ) {
-					std::string mangleName = mangleAdapterName( *funType, scopeTyVars );
+				for ( FunctionType const * const funType : functions ) {
+					std::string mangleName = mangleAdapterName( funType, scopeTyVars );
 					if ( adapters.find( mangleName ) == adapters.end() ) {
 						std::string adapterName = makeAdapterName( mangleName );
-						adapters.insert( std::pair< std::string, DeclarationWithType *>( mangleName, new ObjectDecl( adapterName, Type::StorageClasses(), LinkageSpec::C, nullptr, new PointerType( Type::Qualifiers(), makeAdapterType( *funType, scopeTyVars ) ), nullptr ) ) );
+						adapters.insert( std::pair< std::string, DeclarationWithType *>( mangleName, new ObjectDecl( adapterName, Type::StorageClasses(), LinkageSpec::C, nullptr, new PointerType( Type::Qualifiers(), makeAdapterType( funType, scopeTyVars ) ), nullptr ) ) );
 					} // if
 				} // for
@@ -593,28 +558,29 @@
 		}
 
-		void Pass1::passTypeVars( ApplicationExpr *appExpr, Type *polyRetType, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars ) {
+		std::list< Expression *>::iterator Pass1::passTypeVars( ApplicationExpr *appExpr, Type *polyRetType, const TyVarMap &exprTyVars ) {
+			assert( env );
+			std::list< Expression *>::iterator arg = appExpr->args.begin();
 			// pass size/align for type variables
-			for ( TyVarMap::const_iterator tyParm = exprTyVars.begin(); tyParm != exprTyVars.end(); ++tyParm ) {
+			for ( std::pair<std::string, TypeDecl::Data> const & tyParam : exprTyVars ) {
 				ResolvExpr::EqvClass eqvClass;
-				assert( env );
-				if ( tyParm->second.isComplete ) {
-					Type *concrete = env->lookup( tyParm->first );
-					if ( concrete ) {
-						arg = appExpr->get_args().insert( arg, new SizeofExpr( concrete->clone() ) );
-						arg++;
-						arg = appExpr->get_args().insert( arg, new AlignofExpr( concrete->clone() ) );
-						arg++;
-					} else {
-						// xxx - should this be an assertion?
-						SemanticError( appExpr, toString( *env, "\nunbound type variable: ", tyParm->first, " in application " ) );
-					} // if
+				if ( tyParam.second.isComplete ) {
+					Type *concrete = env->lookup( tyParam.first );
+					// If there is an unbound type variable, it should have detected already.
+					assertf( concrete, "Unbound type variable: %s in: %s",
+						toCString( tyParam.first ), toCString( *env ) );
+
+					arg = appExpr->get_args().insert( arg, new SizeofExpr( concrete->clone() ) );
+					arg++;
+					arg = appExpr->get_args().insert( arg, new AlignofExpr( concrete->clone() ) );
+					arg++;
 				} // if
 			} // for
 
 			// add size/align for generic types to parameter list
-			if ( ! appExpr->get_function()->result ) return;
+			if ( ! appExpr->get_function()->result ) return arg;
 			FunctionType *funcType = getFunctionType( appExpr->get_function()->get_result() );
 			assert( funcType );
 
+			// These iterators don't advance in unison.
 			std::list< DeclarationWithType* >::const_iterator fnParm = funcType->get_parameters().begin();
 			std::list< Expression* >::const_iterator fnArg = arg;
@@ -623,5 +589,5 @@
 			// a polymorphic return type may need to be added to the argument list
 			if ( polyRetType ) {
-				Type *concRetType = replaceWithConcrete( appExpr, polyRetType );
+				Type *concRetType = replaceWithConcrete( polyRetType, env );
 				passArgTypeVars( appExpr, polyRetType, concRetType, arg, exprTyVars, seenTypes );
 				++fnArg; // skip the return parameter in the argument list
@@ -634,4 +600,5 @@
 				passArgTypeVars( appExpr, (*fnParm)->get_type(), argType, arg, exprTyVars, seenTypes );
 			}
+			return arg;
 		}
 
@@ -642,5 +609,5 @@
 		}
 
-		Expression *Pass1::addRetParam( ApplicationExpr *appExpr, Type *retType, std::list< Expression *>::iterator &arg ) {
+		Expression *Pass1::addRetParam( ApplicationExpr *appExpr, Type *retType ) {
 			// Create temporary to hold return value of polymorphic function and produce that temporary as a result
 			// using a comma expression.
@@ -662,6 +629,6 @@
 				paramExpr = new AddressExpr( paramExpr );
 			} // if
-			arg = appExpr->args.insert( arg, paramExpr ); // add argument to function call
-			arg++;
+			// Add argument to function call.
+			appExpr->args.push_front( paramExpr );
 			// Build a comma expression to call the function and emulate a normal return.
 			CommaExpr *commaExpr = new CommaExpr( appExpr, retExpr );
@@ -671,13 +638,16 @@
 		}
 
-		void Pass1::replaceParametersWithConcrete( ApplicationExpr *appExpr, std::list< Expression* >& params ) {
-			for ( std::list< Expression* >::iterator param = params.begin(); param != params.end(); ++param ) {
-				TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
+		/// Replaces all the type parameters of a generic type with their concrete equivalents under the current environment
+		void replaceParametersWithConcrete( std::list< Expression* >& params, TypeSubstitution const * env ) {
+			for ( Expression * const param : params ) {
+				TypeExpr *paramType = dynamic_cast< TypeExpr* >( param );
 				assertf(paramType, "Aggregate parameters should be type expressions");
-				paramType->set_type( replaceWithConcrete( appExpr, paramType->get_type(), false ) );
-			}
-		}
-
-		Type *Pass1::replaceWithConcrete( ApplicationExpr *appExpr, Type *type, bool doClone ) {
+				paramType->set_type( replaceWithConcrete( paramType->get_type(), env, false ) );
+			}
+		}
+
+		// See forward definition.
+		Type *replaceWithConcrete( Type *type, TypeSubstitution const * env, bool doClone ) {
+			assert( env );
 			if ( TypeInstType *typeInst = dynamic_cast< TypeInstType * >( type ) ) {
 				Type *concrete = env->lookup( typeInst->get_name() );
@@ -690,5 +660,5 @@
 					structType = structType->clone();
 				}
-				replaceParametersWithConcrete( appExpr, structType->get_parameters() );
+				replaceParametersWithConcrete( structType->get_parameters(), env );
 				return structType;
 			} else if ( UnionInstType *unionType = dynamic_cast< UnionInstType* >( type ) ) {
@@ -696,5 +666,5 @@
 					unionType = unionType->clone();
 				}
-				replaceParametersWithConcrete( appExpr, unionType->get_parameters() );
+				replaceParametersWithConcrete( unionType->get_parameters(), env );
 				return unionType;
 			}
@@ -702,18 +672,17 @@
 		}
 
-		Expression *Pass1::addDynRetParam( ApplicationExpr *appExpr, Type *dynType, std::list< Expression *>::iterator &arg ) {
-			assert( env );
-			Type *concrete = replaceWithConcrete( appExpr, dynType );
+		Expression *Pass1::addDynRetParam( ApplicationExpr *appExpr, Type *dynType ) {
+			Type *concrete = replaceWithConcrete( dynType, env );
 			// add out-parameter for return value
-			return addRetParam( appExpr, concrete, arg );
-		}
-
-		Expression *Pass1::applyAdapter( ApplicationExpr *appExpr, FunctionType *function, std::list< Expression *>::iterator &arg, const TyVarMap &tyVars ) {
+			return addRetParam( appExpr, concrete );
+		}
+
+		Expression *Pass1::applyAdapter( ApplicationExpr *appExpr, FunctionType *function ) {
 			Expression *ret = appExpr;
 //			if ( ! function->get_returnVals().empty() && isPolyType( function->get_returnVals().front()->get_type(), tyVars ) ) {
-			if ( isDynRet( function, tyVars ) ) {
-				ret = addRetParam( appExpr, function->returnVals.front()->get_type(), arg );
+			if ( isDynRet( function, scopeTyVars ) ) {
+				ret = addRetParam( appExpr, function->returnVals.front()->get_type() );
 			} // if
-			std::string mangleName = mangleAdapterName( function, tyVars );
+			std::string mangleName = mangleAdapterName( function, scopeTyVars );
 			std::string adapterName = makeAdapterName( mangleName );
 
@@ -724,36 +693,4 @@
 
 			return ret;
-		}
-
-		void Pass1::boxParam( Type *param, Expression *&arg, const TyVarMap &exprTyVars ) {
-			assertf( arg->result, "arg does not have result: %s", toString( arg ).c_str() );
-			if ( ! needsBoxing( param, arg->result, exprTyVars, env ) ) return;
-
-			if ( arg->get_lvalue() ) {
-				// argument expression may be CFA lvalue, but not C lvalue -- apply generalizedLvalue transformations.
-				// if ( VariableExpr * varExpr = dynamic_cast< VariableExpr * >( arg ) ) {
-				// 	if ( dynamic_cast<ArrayType *>( varExpr->var->get_type() ) ){
-				// 		// temporary hack - don't box arrays, because &arr is not the same as &arr[0]
-				// 		return;
-				// 	}
-				// }
-				arg =  generalizedLvalue( new AddressExpr( arg ) );
-				if ( ! ResolvExpr::typesCompatible( param, arg->get_result(), SymTab::Indexer() ) ) {
-					// silence warnings by casting boxed parameters when the actual type does not match up with the formal type.
-					arg = new CastExpr( arg, param->clone() );
-				}
-			} else {
-				// use type computed in unification to declare boxed variables
-				Type * newType = param->clone();
-				if ( env ) env->apply( newType );
-				ObjectDecl *newObj = ObjectDecl::newObject( tempNamer.newName(), newType, nullptr );
-				newObj->get_type()->get_qualifiers() = Type::Qualifiers(); // TODO: is this right???
-				stmtsToAddBefore.push_back( new DeclStmt( newObj ) );
-				UntypedExpr *assign = new UntypedExpr( new NameExpr( "?=?" ) ); // TODO: why doesn't this just use initialization syntax?
-				assign->get_args().push_back( new VariableExpr( newObj ) );
-				assign->get_args().push_back( arg );
-				stmtsToAddBefore.push_back( new ExprStmt( assign ) );
-				arg = new AddressExpr( new VariableExpr( newObj ) );
-			} // if
 		}
 
@@ -791,21 +728,55 @@
 		}
 
-		void Pass1::boxParams( ApplicationExpr *appExpr, FunctionType *function, std::list< Expression *>::iterator &arg, const TyVarMap &exprTyVars ) {
-			for ( std::list< DeclarationWithType *>::const_iterator param = function->get_parameters().begin(); param != function->parameters.end(); ++param, ++arg ) {
-				assertf( arg != appExpr->args.end(), "boxParams: missing argument for param %s to %s in %s", toString( *param ).c_str(), toString( function ).c_str(), toString( appExpr ).c_str() );
-				addCast( *arg, (*param)->get_type(), exprTyVars );
-				boxParam( (*param)->get_type(), *arg, exprTyVars );
+		void Pass1::boxParam( Expression *&arg, Type *param, const TyVarMap &exprTyVars ) {
+			assertf( arg->result, "arg does not have result: %s", toString( arg ).c_str() );
+			addCast( arg, param, exprTyVars );
+			if ( ! needsBoxing( param, arg->result, exprTyVars, env ) ) return;
+
+			if ( arg->get_lvalue() ) {
+				// argument expression may be CFA lvalue, but not C lvalue -- apply generalizedLvalue transformations.
+				// if ( VariableExpr * varExpr = dynamic_cast< VariableExpr * >( arg ) ) {
+				// 	if ( dynamic_cast<ArrayType *>( varExpr->var->get_type() ) ){
+				// 		// temporary hack - don't box arrays, because &arr is not the same as &arr[0]
+				// 		return;
+				// 	}
+				// }
+				arg = generalizedLvalue( new AddressExpr( arg ) );
+				if ( ! ResolvExpr::typesCompatible( param, arg->get_result(), SymTab::Indexer() ) ) {
+					// silence warnings by casting boxed parameters when the actual type does not match up with the formal type.
+					arg = new CastExpr( arg, param->clone() );
+				}
+			} else {
+				// use type computed in unification to declare boxed variables
+				Type * newType = param->clone();
+				if ( env ) env->apply( newType );
+				ObjectDecl *newObj = makeTemporary( newType );
+				// TODO: is this right??? (Why wouldn't it be?)
+				newObj->get_type()->get_qualifiers() = Type::Qualifiers();
+				// TODO: why doesn't this just use initialization syntax?
+				// (Possibly to ensure code is run at the right time.)
+				UntypedExpr *assign = new UntypedExpr( new NameExpr( "?=?" ) );
+				assign->get_args().push_back( new VariableExpr( newObj ) );
+				assign->get_args().push_back( arg );
+				stmtsToAddBefore.push_back( new ExprStmt( assign ) );
+				arg = new AddressExpr( new VariableExpr( newObj ) );
+			} // if
+		}
+
+		void Pass1::boxParams( ApplicationExpr *appExpr, std::list< Expression *>::iterator arg, FunctionType *function, const TyVarMap &exprTyVars ) {
+			for ( DeclarationWithType * param : function->parameters ) {
+				assertf( arg != appExpr->args.end(), "boxParams: missing argument for param %s to %s in %s", toString( param ).c_str(), toString( function ).c_str(), toString( appExpr ).c_str() );
+				boxParam( *arg, param->get_type(), exprTyVars );
+				++arg;
 			} // for
 		}
 
-		void Pass1::addInferredParams( ApplicationExpr *appExpr, FunctionType *functionType, std::list< Expression *>::iterator &arg, const TyVarMap &tyVars ) {
+		void Pass1::addInferredParams( ApplicationExpr *appExpr, std::list< Expression *>::iterator arg, FunctionType *functionType, const TyVarMap &tyVars ) {
 			std::list< Expression *>::iterator cur = arg;
-			for ( Type::ForallList::iterator tyVar = functionType->get_forall().begin(); tyVar != functionType->get_forall().end(); ++tyVar ) {
-				for ( std::list< DeclarationWithType *>::iterator assert = (*tyVar)->assertions.begin(); assert != (*tyVar)->assertions.end(); ++assert ) {
-					InferredParams::const_iterator inferParam = appExpr->inferParams.find( (*assert)->get_uniqueId() );
-					assertf( inferParam != appExpr->inferParams.end(), "addInferredParams missing inferred parameter: %s in: %s", toString( *assert ).c_str(), toString( appExpr ).c_str() );
+			for ( TypeDecl * const tyVar : functionType->forall ) {
+				for ( DeclarationWithType * const assert : tyVar->assertions ) {
+					InferredParams::const_iterator inferParam = appExpr->inferParams.find( assert->get_uniqueId() );
+					assertf( inferParam != appExpr->inferParams.end(), "addInferredParams missing inferred parameter: %s in: %s", toString( assert ).c_str(), toString( appExpr ).c_str() );
 					Expression *newExpr = inferParam->second.expr->clone();
-					addCast( newExpr, (*assert)->get_type(), tyVars );
-					boxParam( (*assert)->get_type(), newExpr, tyVars );
+					boxParam( newExpr, assert->get_type(), tyVars );
 					appExpr->get_args().insert( cur, newExpr );
 				} // for
@@ -824,5 +795,5 @@
 		}
 
-		FunctionType *makeAdapterType( FunctionType *adaptee, const TyVarMap &tyVars ) {
+		FunctionType *makeAdapterType( FunctionType const *adaptee, const TyVarMap &tyVars ) {
 			// actually make the adapter type
 			FunctionType *adapter = adaptee->clone();
@@ -834,19 +805,28 @@
 		}
 
-		Expression *makeAdapterArg( DeclarationWithType *param, DeclarationWithType *arg, DeclarationWithType *realParam, const TyVarMap &tyVars ) {
+		Expression *makeAdapterArg(
+				DeclarationWithType *param,
+				DeclarationWithType const *arg,
+				DeclarationWithType const *realParam,
+				const TyVarMap &tyVars ) {
 			assert( param );
 			assert( arg );
-			if ( isPolyType( realParam->get_type(), tyVars ) ) {
-				if ( ! isPolyType( arg->get_type() ) ) {
-					UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) );
-					deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
-					deref->result = arg->get_type()->clone();
-					return deref;
-				} // if
+			if ( isPolyType( realParam->get_type(), tyVars )
+					&& ! isPolyType( arg->get_type() ) ) {
+				UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) );
+				deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
+				deref->result = arg->get_type()->clone();
+				return deref;
 			} // if
 			return new VariableExpr( param );
 		}
 
-		void addAdapterParams( ApplicationExpr *adapteeApp, std::list< DeclarationWithType *>::iterator arg, std::list< DeclarationWithType *>::iterator param, std::list< DeclarationWithType *>::iterator paramEnd, std::list< DeclarationWithType *>::iterator realParam, const TyVarMap &tyVars ) {
+		void addAdapterParams(
+				ApplicationExpr *adapteeApp,
+				std::list< DeclarationWithType *>::const_iterator arg,
+				std::list< DeclarationWithType *>::const_iterator param,
+				std::list< DeclarationWithType *>::const_iterator paramEnd,
+				std::list< DeclarationWithType *>::const_iterator realParam,
+				const TyVarMap &tyVars ) {
 			UniqueName paramNamer( "_p" );
 			for ( ; param != paramEnd; ++param, ++arg, ++realParam ) {
@@ -859,5 +839,5 @@
 		}
 
-		FunctionDecl *Pass1::makeAdapter( FunctionType *adaptee, FunctionType *realType, const std::string &mangleName, const TyVarMap &tyVars ) {
+		FunctionDecl *Pass1::makeAdapter( FunctionType const *adaptee, FunctionType *realType, const std::string &mangleName, const TyVarMap &tyVars ) {
 			FunctionType *adapterType = makeAdapterType( adaptee, tyVars );
 			adapterType = ScrubTyVars::scrub( adapterType, tyVars );
@@ -876,21 +856,19 @@
 			Statement *bodyStmt;
 
-			Type::ForallList::iterator tyArg = realType->get_forall().begin();
-			Type::ForallList::iterator tyParam = adapterType->get_forall().begin();
-			Type::ForallList::iterator realTyParam = adaptee->get_forall().begin();
-			for ( ; tyParam != adapterType->get_forall().end(); ++tyArg, ++tyParam, ++realTyParam ) {
-				assert( tyArg != realType->get_forall().end() );
-				std::list< DeclarationWithType *>::iterator assertArg = (*tyArg)->get_assertions().begin();
-				std::list< DeclarationWithType *>::iterator assertParam = (*tyParam)->get_assertions().begin();
-				std::list< DeclarationWithType *>::iterator realAssertParam = (*realTyParam)->get_assertions().begin();
-				for ( ; assertParam != (*tyParam)->get_assertions().end(); ++assertArg, ++assertParam, ++realAssertParam ) {
-					assert( assertArg != (*tyArg)->get_assertions().end() );
-					adapteeApp->get_args().push_back( makeAdapterArg( *assertParam, *assertArg, *realAssertParam, tyVars ) );
+			for ( auto tys : group_iterate( realType->forall, adapterType->forall, adaptee->forall ) ) {
+				TypeDecl * tyArg = std::get<0>( tys );
+				TypeDecl * tyParam = std::get<1>( tys );
+				TypeDecl * realTyParam = std::get<2>( tys );
+				for ( auto asserts : group_iterate( tyArg->assertions, tyParam->assertions, realTyParam->assertions ) ) {
+					DeclarationWithType * assertArg = std::get<0>( asserts );
+					DeclarationWithType * assertParam = std::get<1>( asserts );
+					DeclarationWithType * realAssertParam = std::get<2>( asserts );
+					adapteeApp->args.push_back( makeAdapterArg( assertParam, assertArg, realAssertParam, tyVars ) );
 				} // for
 			} // for
 
-			std::list< DeclarationWithType *>::iterator arg = realType->get_parameters().begin();
-			std::list< DeclarationWithType *>::iterator param = adapterType->get_parameters().begin();
-			std::list< DeclarationWithType *>::iterator realParam = adaptee->get_parameters().begin();
+			std::list< DeclarationWithType *>::const_iterator arg = realType->parameters.begin();
+			std::list< DeclarationWithType *>::const_iterator param = adapterType->parameters.begin();
+			std::list< DeclarationWithType *>::const_iterator realParam = adaptee->parameters.begin();
 			param++;		// skip adaptee parameter in the adapter type
 			if ( realType->get_returnVals().empty() ) {
@@ -898,5 +876,5 @@
 				addAdapterParams( adapteeApp, arg, param, adapterType->get_parameters().end(), realParam, tyVars );
 				bodyStmt = new ExprStmt( adapteeApp );
-			} else if ( isDynType( adaptee->get_returnVals().front()->get_type(), tyVars ) ) {
+			} else if ( isDynType( adaptee->returnVals.front()->get_type(), tyVars ) ) {
 				// return type T
 				if ( (*param)->get_name() == "" ) {
@@ -923,13 +901,12 @@
 		void Pass1::passAdapters( ApplicationExpr * appExpr, FunctionType * functionType, const TyVarMap & exprTyVars ) {
 			// collect a list of function types passed as parameters or implicit parameters (assertions)
-			std::list< DeclarationWithType *> &paramList = functionType->get_parameters();
-			std::list< FunctionType *> functions;
-			for ( Type::ForallList::iterator tyVar = functionType->get_forall().begin(); tyVar != functionType->get_forall().end(); ++tyVar ) {
-				for ( std::list< DeclarationWithType *>::iterator assert = (*tyVar)->get_assertions().begin(); assert != (*tyVar)->get_assertions().end(); ++assert ) {
-					findFunction( (*assert)->get_type(), functions, exprTyVars, needsAdapter );
+			std::list<FunctionType const *> functions;
+			for ( TypeDecl * const tyVar : functionType->get_forall() ) {
+				for ( DeclarationWithType * const assert : tyVar->get_assertions() ) {
+					findFunction( assert->get_type(), functions, exprTyVars, needsAdapter );
 				} // for
 			} // for
-			for ( std::list< DeclarationWithType *>::iterator arg = paramList.begin(); arg != paramList.end(); ++arg ) {
-				findFunction( (*arg)->get_type(), functions, exprTyVars, needsAdapter );
+			for ( DeclarationWithType * const arg : functionType->get_parameters() ) {
+				findFunction( arg->get_type(), functions, exprTyVars, needsAdapter );
 			} // for
 
@@ -938,7 +915,7 @@
 			std::set< std::string > adaptersDone;
 
-			for ( std::list< FunctionType *>::iterator funType = functions.begin(); funType != functions.end(); ++funType ) {
-				FunctionType *originalFunction = (*funType)->clone();
-				FunctionType *realFunction = (*funType)->clone();
+			for ( FunctionType const * const funType : functions ) {
+				FunctionType *originalFunction = funType->clone();
+				FunctionType *realFunction = funType->clone();
 				std::string mangleName = SymTab::Mangler::mangle( realFunction );
 
@@ -958,5 +935,5 @@
 					if ( adapter == adapters.end() ) {
 						// adapter has not been created yet in the current scope, so define it
-						FunctionDecl *newAdapter = makeAdapter( *funType, realFunction, mangleName, exprTyVars );
+						FunctionDecl *newAdapter = makeAdapter( funType, realFunction, mangleName, exprTyVars );
 						std::pair< AdapterIter, bool > answer = adapters.insert( std::pair< std::string, DeclarationWithType *>( mangleName, newAdapter ) );
 						adapter = answer.first;
@@ -972,10 +949,5 @@
 
 		Expression *makeIncrDecrExpr( ApplicationExpr *appExpr, Type *polyType, bool isIncr ) {
-			NameExpr *opExpr;
-			if ( isIncr ) {
-				opExpr = new NameExpr( "?+=?" );
-			} else {
-				opExpr = new NameExpr( "?-=?" );
-			} // if
+			NameExpr *opExpr = new NameExpr( ( isIncr ) ? "?+=?" : "?-=?" );
 			UntypedExpr *addAssign = new UntypedExpr( opExpr );
 			if ( AddressExpr *address = dynamic_cast< AddressExpr *>( appExpr->get_args().front() ) ) {
@@ -1120,6 +1092,6 @@
 		Expression *Pass1::postmutate( ApplicationExpr *appExpr ) {
 			// std::cerr << "mutate appExpr: " << InitTweak::getFunctionName( appExpr ) << std::endl;
-			// for ( TyVarMap::iterator i = scopeTyVars.begin(); i != scopeTyVars.end(); ++i ) {
-			// 	std::cerr << i->first << " ";
+			// for ( auto tyVar : scopeTyVars ) {
+			// 	std::cerr << tyVar.first << " ";
 			// }
 			// std::cerr << "\n";
@@ -1134,6 +1106,4 @@
 
 			Expression *ret = appExpr;
-
-			std::list< Expression *>::iterator arg = appExpr->get_args().begin();
 			std::list< Expression *>::iterator paramBegin = appExpr->get_args().begin();
 
@@ -1156,5 +1126,5 @@
 				// std::cerr << "dynRetType: " << dynRetType << std::endl;
 				Type *concRetType = appExpr->get_result()->isVoid() ? nullptr : appExpr->get_result();
-				ret = addDynRetParam( appExpr, concRetType, arg ); // xxx - used to use dynRetType instead of concRetType
+				ret = addDynRetParam( appExpr, concRetType ); // xxx - used to use dynRetType instead of concRetType
 			} else if ( needsAdapter( function, scopeTyVars ) && ! needsAdapter( function, exprTyVars) ) { // xxx - exprTyVars is used above...?
 				// xxx - the ! needsAdapter check may be incorrect. It seems there is some situation where an adapter is applied where it shouldn't be, and this fixes it for some cases. More investigation is needed.
@@ -1164,15 +1134,15 @@
 				// std::cerr << *env << std::endl;
 				// change the application so it calls the adapter rather than the passed function
-				ret = applyAdapter( appExpr, function, arg, scopeTyVars );
+				ret = applyAdapter( appExpr, function );
 			} // if
-			arg = appExpr->get_args().begin();
-
-			Type *concRetType = replaceWithConcrete( appExpr, dynRetType );
-			passTypeVars( appExpr, concRetType, arg, exprTyVars ); // xxx - used to use dynRetType instead of concRetType; this changed so that the correct type paramaters are passed for return types (it should be the concrete type's parameters, not the formal type's)
-			addInferredParams( appExpr, function, arg, exprTyVars );
-
-			arg = paramBegin;
-
-			boxParams( appExpr, function, arg, exprTyVars );
+
+			Type *concRetType = replaceWithConcrete( dynRetType, env );
+			std::list< Expression *>::iterator arg =
+				passTypeVars( appExpr, concRetType, exprTyVars ); // xxx - used to use dynRetType instead of concRetType; this changed so that the correct type paramaters are passed for return types (it should be the concrete type's parameters, not the formal type's)
+			addInferredParams( appExpr, arg, function, exprTyVars );
+
+			// This needs to point at the original first argument.
+			boxParams( appExpr, paramBegin, function, exprTyVars );
+
 			passAdapters( appExpr, function, exprTyVars );
 
@@ -1180,15 +1150,22 @@
 		}
 
-		Expression * Pass1::postmutate( UntypedExpr *expr ) {
+		bool isPolyDeref( UntypedExpr const * expr, TyVarMap const & scopeTyVars, TypeSubstitution const * env ) {
 			if ( expr->result && isPolyType( expr->result, scopeTyVars, env ) ) {
-				if ( NameExpr *name = dynamic_cast< NameExpr *>( expr->function ) ) {
+				if ( auto name = dynamic_cast<NameExpr const *>( expr->function ) ) {
 					if ( name->name == "*?" ) {
-						Expression *ret = expr->args.front();
-						expr->args.clear();
-						delete expr;
-						return ret;
+						return true;
 					} // if
 				} // if
 			} // if
+			return false;
+		}
+
+		Expression * Pass1::postmutate( UntypedExpr *expr ) {
+			if ( isPolyDeref( expr, scopeTyVars, env ) ) {
+				Expression *ret = expr->args.front();
+				expr->args.clear();
+				delete expr;
+				return ret;
+			}
 			return expr;
 		}
@@ -1200,14 +1177,10 @@
 			bool needs = false;
 			if ( UntypedExpr *expr = dynamic_cast< UntypedExpr *>( addrExpr->arg ) ) {
-				if ( expr->result && isPolyType( expr->result, scopeTyVars, env ) ) {
-					if ( NameExpr *name = dynamic_cast< NameExpr *>( expr->function ) ) {
-						if ( name->name == "*?" ) {
-							if ( ApplicationExpr * appExpr = dynamic_cast< ApplicationExpr * >( expr->args.front() ) ) {
-								assert( appExpr->function->result );
-								FunctionType *function = getFunctionType( appExpr->function->result );
-								assert( function );
-								needs = needsAdapter( function, scopeTyVars );
-							} // if
-						} // if
+				if ( isPolyDeref( expr, scopeTyVars, env ) ) {
+					if ( ApplicationExpr * appExpr = dynamic_cast< ApplicationExpr * >( expr->args.front() ) ) {
+						assert( appExpr->function->result );
+						FunctionType *function = getFunctionType( appExpr->function->result );
+						assert( function );
+						needs = needsAdapter( function, scopeTyVars );
 					} // if
 				} // if
@@ -1260,17 +1233,17 @@
 		void Pass2::addAdapters( FunctionType *functionType ) {
 			std::list< DeclarationWithType *> &paramList = functionType->parameters;
-			std::list< FunctionType *> functions;
-			for ( std::list< DeclarationWithType *>::iterator arg = paramList.begin(); arg != paramList.end(); ++arg ) {
-				Type *orig = (*arg)->get_type();
+			std::list< FunctionType const *> functions;
+			for ( DeclarationWithType * const arg : functionType->parameters ) {
+				Type *orig = arg->get_type();
 				findAndReplaceFunction( orig, functions, scopeTyVars, needsAdapter );
-				(*arg)->set_type( orig );
+				arg->set_type( orig );
 			}
 			std::set< std::string > adaptersDone;
-			for ( std::list< FunctionType *>::iterator funType = functions.begin(); funType != functions.end(); ++funType ) {
-				std::string mangleName = mangleAdapterName( *funType, scopeTyVars );
+			for ( FunctionType const * const funType : functions ) {
+				std::string mangleName = mangleAdapterName( funType, scopeTyVars );
 				if ( adaptersDone.find( mangleName ) == adaptersDone.end() ) {
 					std::string adapterName = makeAdapterName( mangleName );
 					// adapter may not be used in body, pass along with unused attribute.
-					paramList.push_front( new ObjectDecl( adapterName, Type::StorageClasses(), LinkageSpec::C, 0, new PointerType( Type::Qualifiers(), makeAdapterType( *funType, scopeTyVars ) ), 0, { new Attribute( "unused" ) } ) );
+					paramList.push_front( new ObjectDecl( adapterName, Type::StorageClasses(), LinkageSpec::C, 0, new PointerType( Type::Qualifiers(), makeAdapterType( funType, scopeTyVars ) ), 0, { new Attribute( "unused" ) } ) );
 					adaptersDone.insert( adaptersDone.begin(), mangleName );
 				}
@@ -1349,9 +1322,9 @@
 			ObjectDecl newPtr( "", Type::StorageClasses(), LinkageSpec::C, 0,
 			                   new PointerType( Type::Qualifiers(), new BasicType( Type::Qualifiers(), BasicType::LongUnsignedInt ) ), 0 );
-			for ( Type::ForallList::const_iterator tyParm = funcType->get_forall().begin(); tyParm != funcType->get_forall().end(); ++tyParm ) {
+			for ( TypeDecl * const tyParam : funcType->get_forall() ) {
 				ObjectDecl *sizeParm, *alignParm;
 				// add all size and alignment parameters to parameter list
-				if ( (*tyParm)->isComplete() ) {
-					TypeInstType parmType( Type::Qualifiers(), (*tyParm)->get_name(), *tyParm );
+				if ( tyParam->isComplete() ) {
+					TypeInstType parmType( Type::Qualifiers(), tyParam->get_name(), tyParam );
 					std::string parmName = mangleType( &parmType );
 
@@ -1367,16 +1340,16 @@
 				}
 				// move all assertions into parameter list
-				for ( std::list< DeclarationWithType *>::iterator assert = (*tyParm)->get_assertions().begin(); assert != (*tyParm)->get_assertions().end(); ++assert ) {
+				for ( DeclarationWithType * const assert : tyParam->get_assertions() ) {
 					// assertion parameters may not be used in body, pass along with unused attribute.
-					(*assert)->get_attributes().push_back( new Attribute( "unused" ) );
-					inferredParams.push_back( *assert );
-				}
-				(*tyParm)->get_assertions().clear();
+					assert->get_attributes().push_back( new Attribute( "unused" ) );
+					inferredParams.push_back( assert );
+				}
+				tyParam->get_assertions().clear();
 			}
 
 			// add size/align for generic parameter types to parameter list
 			std::set< std::string > seenTypes; // sizeofName for generic types we've seen
-			for ( std::list< DeclarationWithType* >::const_iterator fnParm = last; fnParm != funcType->get_parameters().end(); ++fnParm ) {
-				Type *polyType = isPolyType( (*fnParm)->get_type(), scopeTyVars );
+			for ( DeclarationWithType * const fnParam : funcType->get_parameters() ) {
+				Type *polyType = isPolyType( fnParam->get_type(), scopeTyVars );
 				if ( polyType && ! dynamic_cast< TypeInstType* >( polyType ) ) {
 					std::string typeName = mangleType( polyType );
@@ -1482,12 +1455,7 @@
 
 			if(!expect_func_type) {
-				GuardAction( [this]() {
-					knownLayouts.endScope();
-					knownOffsets.endScope();
-				});
 				// If this is the first function type we see
 				// Then it's the type of the declaration and we care about it
-				knownLayouts.beginScope();
-				knownOffsets.beginScope();
+				GuardScope( *this );
 			}
 
@@ -1497,7 +1465,7 @@
 
 			// make sure that any type information passed into the function is accounted for
-			for ( std::list< DeclarationWithType* >::const_iterator fnParm = funcType->get_parameters().begin(); fnParm != funcType->get_parameters().end(); ++fnParm ) {
+			for ( DeclarationWithType * const fnParam : funcType->get_parameters() ) {
 				// condition here duplicates that in Pass2::mutate( FunctionType* )
-				Type *polyType = isPolyType( (*fnParm)->get_type(), scopeTyVars );
+				Type *polyType = isPolyType( fnParam->get_type(), scopeTyVars );
 				if ( polyType && ! dynamic_cast< TypeInstType* >( polyType ) ) {
 					knownLayouts.insert( mangleType( polyType ) );
@@ -1507,5 +1475,5 @@
 
 		/// converts polymorphic type T into a suitable monomorphic representation, currently: __attribute__((aligned(8)) char[size_T]
-		Type * polyToMonoType( Type * declType ) {
+		Type * polyToMonoType( Type const * declType ) {
 			Type * charType = new BasicType( Type::Qualifiers(), BasicType::Kind::Char);
 			Expression * size = new NameExpr( sizeofName( mangleType(declType) ) );
@@ -1572,12 +1540,13 @@
 		/// Finds the member in the base list that matches the given declaration; returns its index, or -1 if not present
 		long findMember( DeclarationWithType *memberDecl, std::list< Declaration* > &baseDecls ) {
-			long i = 0;
-			for(std::list< Declaration* >::const_iterator decl = baseDecls.begin(); decl != baseDecls.end(); ++decl, ++i ) {
-				if ( memberDecl->get_name() != (*decl)->get_name() )
+			for ( auto pair : enumerate( baseDecls ) ) {
+				Declaration * decl = pair.val;
+				size_t i = pair.idx;
+				if ( memberDecl->get_name() != decl->get_name() )
 					continue;
 
 				if ( memberDecl->get_name().empty() ) {
 					// plan-9 field: match on unique_id
-					if ( memberDecl->get_uniqueId() == (*decl)->get_uniqueId() )
+					if ( memberDecl->get_uniqueId() == decl->get_uniqueId() )
 						return i;
 					else
@@ -1585,5 +1554,5 @@
 				}
 
-				DeclarationWithType *declWithType = strict_dynamic_cast< DeclarationWithType* >( *decl );
+				DeclarationWithType *declWithType = strict_dynamic_cast< DeclarationWithType* >( decl );
 
 				if ( memberDecl->get_mangleName().empty() || declWithType->get_mangleName().empty() ) {
@@ -1603,5 +1572,5 @@
 
 		/// Returns an index expression into the offset array for a type
-		Expression *makeOffsetIndex( Type *objectType, long i ) {
+		Expression *makeOffsetIndex( Type const *objectType, long i ) {
 			ConstantExpr *fieldIndex = new ConstantExpr( Constant::from_ulong( i ) );
 			UntypedExpr *fieldOffset = new UntypedExpr( new NameExpr( "?[?]" ) );
@@ -1696,13 +1665,13 @@
 
 		void PolyGenericCalculator::addOtypeParamsToLayoutCall( UntypedExpr *layoutCall, const std::list< Type* > &otypeParams ) {
-			for ( std::list< Type* >::const_iterator param = otypeParams.begin(); param != otypeParams.end(); ++param ) {
-				if ( findGeneric( *param ) ) {
+			for ( Type * const param : otypeParams ) {
+				if ( findGeneric( param ) ) {
 					// push size/align vars for a generic parameter back
-					std::string paramName = mangleType( *param );
+					std::string paramName = mangleType( param );
 					layoutCall->get_args().push_back( new NameExpr( sizeofName( paramName ) ) );
 					layoutCall->get_args().push_back( new NameExpr( alignofName( paramName ) ) );
 				} else {
-					layoutCall->get_args().push_back( new SizeofExpr( (*param)->clone() ) );
-					layoutCall->get_args().push_back( new AlignofExpr( (*param)->clone() ) );
+					layoutCall->get_args().push_back( new SizeofExpr( param->clone() ) );
+					layoutCall->get_args().push_back( new AlignofExpr( param->clone() ) );
 				}
 			}
@@ -1710,13 +1679,13 @@
 
 		/// returns true if any of the otype parameters have a dynamic layout and puts all otype parameters in the output list
-		bool findGenericParams( std::list< TypeDecl* > &baseParams, std::list< Expression* > &typeParams, std::list< Type* > &out ) {
+		bool findGenericParams( std::list< TypeDecl* > const &baseParams, std::list< Expression* > const &typeParams, std::list< Type* > &out ) {
 			bool hasDynamicLayout = false;
 
-			std::list< TypeDecl* >::const_iterator baseParam = baseParams.begin();
-			std::list< Expression* >::const_iterator typeParam = typeParams.begin();
-			for ( ; baseParam != baseParams.end() && typeParam != typeParams.end(); ++baseParam, ++typeParam ) {
+			for ( auto paramPair : group_iterate( baseParams, typeParams ) ) {
+				TypeDecl * baseParam = std::get<0>( paramPair );
+				Expression * typeParam = std::get<1>( paramPair );
 				// skip non-otype parameters
-				if ( ! (*baseParam)->isComplete() ) continue;
-				TypeExpr *typeExpr = dynamic_cast< TypeExpr* >( *typeParam );
+				if ( ! baseParam->isComplete() ) continue;
+				TypeExpr *typeExpr = dynamic_cast< TypeExpr* >( typeParam );
 				assert( typeExpr && "all otype parameters should be type expressions" );
 
@@ -1725,13 +1694,12 @@
 				if ( isPolyType( type ) ) hasDynamicLayout = true;
 			}
-			assert( baseParam == baseParams.end() && typeParam == typeParams.end() );
 
 			return hasDynamicLayout;
 		}
 
-		bool PolyGenericCalculator::findGeneric( Type *ty ) {
+		bool PolyGenericCalculator::findGeneric( Type const *ty ) {
 			ty = replaceTypeInst( ty, env );
 
-			if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( ty ) ) {
+			if ( auto typeInst = dynamic_cast< TypeInstType const * >( ty ) ) {
 				if ( scopeTyVars.find( typeInst->get_name() ) != scopeTyVars.end() ) {
 					// NOTE assumes here that getting put in the scopeTyVars included having the layout variables set
@@ -1739,5 +1707,5 @@
 				}
 				return false;
-			} else if ( StructInstType *structTy = dynamic_cast< StructInstType* >( ty ) ) {
+			} else if ( auto structTy = dynamic_cast< StructInstType const * >( ty ) ) {
 				// check if this type already has a layout generated for it
 				std::string typeName = mangleType( ty );
@@ -1746,5 +1714,5 @@
 				// check if any of the type parameters have dynamic layout; if none do, this type is (or will be) monomorphized
 				std::list< Type* > otypeParams;
-				if ( ! findGenericParams( *structTy->get_baseParameters(), structTy->get_parameters(), otypeParams ) ) return false;
+				if ( ! findGenericParams( *structTy->get_baseParameters(), structTy->parameters, otypeParams ) ) return false;
 
 				// insert local variables for layout and generate call to layout function
@@ -1776,5 +1744,5 @@
 
 				return true;
-			} else if ( UnionInstType *unionTy = dynamic_cast< UnionInstType* >( ty ) ) {
+			} else if ( auto unionTy = dynamic_cast< UnionInstType const * >( ty ) ) {
 				// check if this type already has a layout generated for it
 				std::string typeName = mangleType( ty );
@@ -1783,5 +1751,5 @@
 				// check if any of the type parameters have dynamic layout; if none do, this type is (or will be) monomorphized
 				std::list< Type* > otypeParams;
-				if ( ! findGenericParams( *unionTy->get_baseParameters(), unionTy->get_parameters(), otypeParams ) ) return false;
+				if ( ! findGenericParams( *unionTy->get_baseParameters(), unionTy->parameters, otypeParams ) ) return false;
 
 				// insert local variables for layout and generate call to layout function
@@ -1881,10 +1849,8 @@
 					// build initializer list for offset array
 					std::list< Initializer* > inits;
-					for ( std::list< Declaration* >::const_iterator member = baseMembers.begin(); member != baseMembers.end(); ++member ) {
-						if ( DeclarationWithType *memberDecl = dynamic_cast< DeclarationWithType* >( *member ) ) {
-							inits.push_back( new SingleInit( new OffsetofExpr( ty->clone(), memberDecl ) ) );
-						} else {
-							assertf( false, "Requesting offset of Non-DWT member: %s", toString( *member ).c_str() );
-						}
+					for ( Declaration * const member : baseMembers ) {
+						DeclarationWithType *memberDecl = dynamic_cast< DeclarationWithType* >( member );
+						assertf( memberDecl, "Requesting offset of Non-DWT member: %s", toString( member ).c_str() );
+						inits.push_back( new SingleInit( new OffsetofExpr( ty->clone(), memberDecl ) ) );
 					}
 
@@ -1965,2 +1931,3 @@
 // compile-command: "make install" //
 // End: //
+
Index: src/GenPoly/FindFunction.cc
===================================================================
--- src/GenPoly/FindFunction.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/FindFunction.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -29,5 +29,5 @@
 	class FindFunction : public WithGuards, public WithVisitorRef<FindFunction>, public WithShortCircuiting {
 	  public:
-		FindFunction( std::list< FunctionType* > &functions, const TyVarMap &tyVars, bool replaceMode, FindFunctionPredicate predicate );
+		FindFunction( std::list< FunctionType const* > &functions, const TyVarMap &tyVars, bool replaceMode, FindFunctionPredicate predicate );
 
 		void premutate( FunctionType * functionType );
@@ -37,5 +37,5 @@
 		void handleForall( const Type::ForallList &forall );
 
-		std::list< FunctionType* > &functions;
+		std::list< FunctionType const * > & functions;
 		TyVarMap tyVars;
 		bool replaceMode;
@@ -43,15 +43,15 @@
 	};
 
-	void findFunction( Type *type, std::list< FunctionType* > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate ) {
+	void findFunction( Type *type, std::list< FunctionType const * > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate ) {
 		PassVisitor<FindFunction> finder( functions, tyVars, false, predicate );
 		type->acceptMutator( finder );
 	}
 
-	void findAndReplaceFunction( Type *&type, std::list< FunctionType* > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate ) {
+	void findAndReplaceFunction( Type *&type, std::list< FunctionType const * > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate ) {
 		PassVisitor<FindFunction> finder( functions, tyVars, true, predicate );
 		type = type->acceptMutator( finder );
 	}
 
-	FindFunction::FindFunction( std::list< FunctionType* > &functions, const TyVarMap &tyVars, bool replaceMode, FindFunctionPredicate predicate )
+	FindFunction::FindFunction( std::list< FunctionType const * > &functions, const TyVarMap &tyVars, bool replaceMode, FindFunctionPredicate predicate )
 		: functions( functions ), tyVars( tyVars ), replaceMode( replaceMode ), predicate( predicate ) {
 	}
Index: src/GenPoly/FindFunction.h
===================================================================
--- src/GenPoly/FindFunction.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/FindFunction.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -27,7 +27,7 @@
 
 	/// recursively walk `type`, placing all functions that match `predicate` under `tyVars` into `functions`
-	void findFunction( Type *type, std::list< FunctionType* > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate );
+	void findFunction( Type *type, std::list< FunctionType const * > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate );
 	/// like `findFunction`, but also replaces the function type with void ()(void)
-	void findAndReplaceFunction( Type *&type, std::list< FunctionType* > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate );
+	void findAndReplaceFunction( Type *&type, std::list< FunctionType const * > &functions, const TyVarMap &tyVars, FindFunctionPredicate predicate );
 } // namespace GenPoly
 
Index: src/GenPoly/GenPoly.cc
===================================================================
--- src/GenPoly/GenPoly.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/GenPoly.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -10,6 +10,6 @@
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Andrew Beach
-// Last Modified On : Fri Oct  7 15:25:00 2022
-// Update Count     : 16
+// Last Modified On : Mon Oct 24 15:19:00 2022
+// Update Count     : 17
 //
 
@@ -118,7 +118,16 @@
 	}
 
+	const Type* replaceTypeInst( const Type* type, const TypeSubstitution* env ) {
+		if ( ! env ) return type;
+		if ( auto typeInst = dynamic_cast< const TypeInstType* >( type ) ) {
+			Type *newType = env->lookup( typeInst->get_name() );
+			if ( newType ) return newType;
+		}
+		return type;
+	}
+
 	const ast::Type * replaceTypeInst(const ast::Type * type, const ast::TypeSubstitution * env) {
 		if (!env) return type;
-		if (auto typeInst = dynamic_cast<const ast::TypeInstType*> (type)) {
+		if ( auto typeInst = dynamic_cast<const ast::TypeInstType*>(type) ) {
 			auto newType = env->lookup(typeInst);
 			if (newType) return newType;
@@ -194,5 +203,5 @@
 
 	if ( auto inst = dynamic_cast< const ast::TypeInstType * >( type ) ) {
-		if ( typeVars.find( inst->typeString() ) != typeVars.end() ) return type;
+		if ( typeVars.find( *inst ) != typeVars.end() ) return type;
 	} else if ( auto array = dynamic_cast< const ast::ArrayType * >( type ) ) {
 		return isPolyType( array->base, subst );
@@ -227,7 +236,7 @@
 
 	if ( auto inst = dynamic_cast<ast::TypeInstType const *>( type ) ) {
-		auto var = typeVars.find( inst->name );
+		auto var = typeVars.find( *inst );
 		if ( var != typeVars.end() && var->second.isComplete ) {
-
+			return inst;
 		}
 	} else if ( auto inst = dynamic_cast<ast::StructInstType const *>( type ) ) {
@@ -784,5 +793,5 @@
 
 void addToTypeVarMap( const ast::TypeInstType * type, TypeVarMap & typeVars ) {
-	typeVars.insert( type->typeString(), ast::TypeDecl::Data( type->base ) );
+	typeVars.insert( *type, ast::TypeDecl::Data( type->base ) );
 }
 
@@ -816,11 +825,4 @@
 	}
 
-void printTypeVarMap( std::ostream &os, const TypeVarMap & typeVars ) {
-	for ( auto const & pair : typeVars ) {
-		os << pair.first << " (" << pair.second << ") ";
-	} // for
-	os << std::endl;
-}
-
 } // namespace GenPoly
 
Index: src/GenPoly/GenPoly.h
===================================================================
--- src/GenPoly/GenPoly.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/GenPoly.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -10,6 +10,6 @@
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Andrew Beach
-// Last Modified On : Fri Oct  7 15:06:00 2022
-// Update Count     : 9
+// Last Modified On : Mon Oct 24 15:18:00 2022
+// Update Count     : 11
 //
 
@@ -22,4 +22,5 @@
 #include "AST/Decl.hpp"           // for TypeDecl::Data
 #include "AST/Fwd.hpp"            // for ApplicationExpr, BaseInstType, Func...
+#include "AST/Type.hpp"           // for TypeInstType::TypeEnvKey
 #include "SymTab/Mangler.h"       // for Mangler
 #include "SynTree/Declaration.h"  // for TypeDecl::Data, AggregateDecl, Type...
@@ -28,10 +29,11 @@
 namespace GenPoly {
 
-	// TODO Via some tricks this works for ast::TypeDecl::Data as well.
 	typedef ErasableScopedMap< std::string, TypeDecl::Data > TyVarMap;
-	using TypeVarMap = ErasableScopedMap< std::string, ast::TypeDecl::Data >;
+	using TypeVarMap = ErasableScopedMap< ast::TypeInstType::TypeEnvKey, ast::TypeDecl::Data >;
 
 	/// Replaces a TypeInstType by its referrent in the environment, if applicable
 	Type* replaceTypeInst( Type* type, const TypeSubstitution* env );
+	const Type* replaceTypeInst( const Type* type, const TypeSubstitution* env );
+	const ast::Type * replaceTypeInst( const ast::Type *, const ast::TypeSubstitution * );
 
 	/// returns polymorphic type if is polymorphic type, NULL otherwise; will look up substitution in env if provided
@@ -53,4 +55,5 @@
 	/// true iff function has dynamic-layout return type under the type variable map generated from its forall-parameters
 	ReferenceToType *isDynRet( FunctionType *function );
+	const ast::BaseInstType *isDynRet( const ast::FunctionType * func );
 
 	/// A function needs an adapter if it returns a dynamic-layout value or if any of its parameters have dynamic-layout type
@@ -112,8 +115,7 @@
 	/// Prints type variable map
 	void printTyVarMap( std::ostream &os, const TyVarMap &tyVarMap );
-	void printTypeVarMap( std::ostream &os, const TypeVarMap & typeVars );
 
 	/// Gets the mangled name of this type; alias for SymTab::Mangler::mangleType().
-	inline std::string mangleType( Type *ty ) { return SymTab::Mangler::mangleType( ty ); }
+	inline std::string mangleType( const Type *ty ) { return SymTab::Mangler::mangleType( ty ); }
 
 	/// Gets the name of the sizeof parameter for the type, given its mangled name
@@ -128,4 +130,7 @@
 	/// Gets the name of the layout function for a given aggregate type, given its declaration
 	inline std::string layoutofName( AggregateDecl *decl ) { return std::string( "_layoutof_" ) + decl->get_name(); }
+	inline std::string layoutofName( ast::AggregateDecl const * decl ) {
+		return std::string( "_layoutof_" ) + decl->name;
+	}
 
 } // namespace GenPoly
Index: src/GenPoly/InstantiateGenericNew.cpp
===================================================================
--- src/GenPoly/InstantiateGenericNew.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/InstantiateGenericNew.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -10,6 +10,6 @@
 // Created On       : Tue Aug 16 10:51:00 2022
 // Last Modified By : Andrew Beach
-// Last Modified On : Tue Sep 13 16:03:00 2022
-// Update Count     : 0
+// Last Modified On : Mon Oct 31 16:48:00 2022
+// Update Count     : 1
 //
 
@@ -378,23 +378,35 @@
 		//   Ptr(int) p;
 		//   int i;
+		// The original expression:
 		//   p.x = &i;
-		// becomes
-		//   int *& _dtype_static_member_0 = (int **)&p.x;
-		//   _dtype_static_member_0 = &i;
+		// Becomes the expression/declaration:
+		//   int *& _dtype_static_member_0;
+		//   (_dtype_static_member_0 = (int**)&p.x,
+		//    _dtype_static_member_0) = &i;
+
+		// The declaration is simple:
 		static UniqueName tmpNamer( "_dtype_static_member_" );
-		ast::Expr * init = new ast::CastExpr( location,
-			new ast::AddressExpr( location, memberExpr ),
-			new ast::PointerType( ast::deepCopy( concType ) ),
-			ast::ExplicitCast
-		);
 		ast::ObjectDecl * tmp = new ast::ObjectDecl( location,
 			tmpNamer.newName(),
 			new ast::ReferenceType( concType ),
-			new ast::SingleInit( location, init ),
+			nullptr,
 			ast::Storage::Classes(),
 			ast::Linkage::C
 		);
 		stmtsToAddBefore.push_back( new ast::DeclStmt( location, tmp ) );
-		return new ast::VariableExpr( location, tmp );
+
+		// The expression is more complex, uses references and reference /
+		// pointer parity. But breaking it up risks reordering.
+		return new ast::CommaExpr( location,
+			ast::UntypedExpr::createAssign( location,
+				new ast::VariableExpr( location, tmp ),
+				new ast::CastExpr( location,
+					new ast::AddressExpr( location, memberExpr ),
+					new ast::PointerType( ast::deepCopy( concType ) ),
+					ast::ExplicitCast
+				)
+			),
+			new ast::VariableExpr( location, tmp )
+		);
 	} else {
 		// Here, it can simply add a cast to actual types.
@@ -476,5 +488,4 @@
 };
 
-// I think this and the UnionInstType can be made into a template function.
 ast::Type const * GenericInstantiator::postvisit(
 		ast::StructInstType const * inst ) {
Index: src/GenPoly/ScrubTyVars.cc
===================================================================
--- src/GenPoly/ScrubTyVars.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/GenPoly/ScrubTyVars.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -20,5 +20,5 @@
 #include "GenPoly/ErasableScopedMap.h"  // for ErasableScopedMap<>::const_it...
 #include "ScrubTyVars.h"
-#include "SymTab/Mangler.h"             // for mangle, typeMode
+#include "SymTab/Mangler.h"             // for mangleType
 #include "SynTree/Declaration.h"        // for TypeDecl, TypeDecl::Data, Typ...
 #include "SynTree/Expression.h"         // for Expression (ptr only), NameExpr
@@ -195,5 +195,5 @@
 	}
 
-	auto typeVar = typeVars->find( type->name );
+	auto typeVar = typeVars->find( *type );
 	if ( typeVar == typeVars->end() ) {
 		return type;
@@ -227,5 +227,5 @@
 	if ( dynType ) {
 		return new ast::NameExpr( expr->location,
-			sizeofName( Mangle::mangle( dynType, Mangle::typeMode() ) ) );
+			sizeofName( Mangle::mangleType( dynType ) ) );
 	} else {
 		return expr;
@@ -237,5 +237,5 @@
 	if ( dynType ) {
 		return new ast::NameExpr( expr->location,
-			alignofName( Mangle::mangle( dynType, Mangle::typeMode() ) ) );
+			alignofName( Mangle::mangleType( dynType ) ) );
 	} else {
 		return expr;
Index: src/Parser/DeclarationNode.cc
===================================================================
--- src/Parser/DeclarationNode.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Parser/DeclarationNode.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -27,5 +27,5 @@
 #include "SynTree/LinkageSpec.h"   // for Spec, linkageName, Cforall
 #include "SynTree/Attribute.h"     // for Attribute
-#include "SynTree/Declaration.h"   // for TypeDecl, ObjectDecl, Declaration
+#include "SynTree/Declaration.h"   // for TypeDecl, ObjectDecl, InlineMemberDecl, Declaration
 #include "SynTree/Expression.h"    // for Expression, ConstantExpr
 #include "SynTree/Statement.h"     // for AsmStmt
@@ -1165,4 +1165,7 @@
 		SemanticError( this, "invalid function specifier for " );
 	} // if
+	if ( enumInLine ) {
+		return new InlineMemberDecl( *name, storageClasses, linkage, nullptr );
+	} // if
 	assertf( name, "ObjectDecl must a have name\n" );
 	return (new ObjectDecl( *name, storageClasses, linkage, maybeBuild< Expression >( bitfieldWidth ), nullptr, maybeBuild< Initializer >( initializer ) ))->set_asmName( asmName )->set_extension( extension );
Index: src/Parser/ExpressionNode.cc
===================================================================
--- src/Parser/ExpressionNode.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Parser/ExpressionNode.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -519,10 +519,5 @@
 		}
 	}
-	auto ret =  new QualifiedNameExpr( newDecl, name->name );
-	if ( auto e = dynamic_cast<EnumDecl*>(newDecl) ) {
-		auto enumInst = new EnumInstType( Type::Qualifiers(), e );
-		auto obj = new ObjectDecl( name->name, Type::StorageClasses(), LinkageSpec::Cforall, nullptr, enumInst, nullptr );
-	}
-	return ret;
+	return new QualifiedNameExpr( newDecl, name->name );
 }
 
Index: src/Parser/ParseNode.h
===================================================================
--- src/Parser/ParseNode.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Parser/ParseNode.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -10,6 +10,6 @@
 // Created On       : Sat May 16 13:28:16 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Oct 18 16:22:15 2022
-// Update Count     : 937
+// Last Modified On : Wed Nov  2 21:27:07 2022
+// Update Count     : 939
 //
 
@@ -168,4 +168,6 @@
 	Ctor, Dtor,
 }; // OperKinds
+
+enum class EnumHiding { Visible, Hide };
 
 struct LabelNode {
Index: src/Parser/TypeData.cc
===================================================================
--- src/Parser/TypeData.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Parser/TypeData.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -925,9 +925,6 @@
 	for ( const DeclarationNode * cur = td->enumeration.constants; cur != nullptr; cur = dynamic_cast< DeclarationNode * >( cur->get_next() ), ++members ) {
 		if ( cur->enumInLine ) {
-			// Tell the compiler this is a inline value placeholder
-			ObjectDecl * member = dynamic_cast< ObjectDecl* >(* members);
-			member->enumInLine = true;
-		}
-		if ( ret->isTyped && !ret->base && cur->has_enumeratorValue() ) {
+			// Do Nothing
+		} else if ( ret->isTyped && !ret->base && cur->has_enumeratorValue() ) {
 			SemanticError( td->location, "Enumerator of enum(void) cannot have an explicit initializer value." );
 		} else if ( cur->has_enumeratorValue() ) {
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Parser/parser.yy	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Oct 14 14:04:43 2022
-// Update Count     : 5751
+// Last Modified On : Wed Nov  2 21:31:21 2022
+// Update Count     : 5810
 //
 
@@ -278,4 +278,5 @@
 
 // Types declaration for productions
+
 %union {
 	Token tok;
@@ -290,5 +291,5 @@
 	CondCtl * ifctl;
 	ForCtrl * fctl;
-	enum OperKinds compop;
+	OperKinds compop;
 	LabelNode * label;
 	InitializerNode * in;
@@ -296,4 +297,5 @@
 	std::string * str;
 	bool flag;
+	EnumHiding hide;
 	CatchStmt::Kind catch_kind;
 	GenericExpr * genexpr;
@@ -364,4 +366,6 @@
 %type<constant> string_literal
 %type<str> string_literal_list
+
+%type<hide> hide_opt					visible_hide_opt
 
 // expressions
@@ -2553,9 +2557,9 @@
 	| ENUM attribute_list_opt identifier
 		{ typedefTable.makeTypedef( *$3 ); }
-	  '{' enumerator_list comma_opt '}'
-		{ $$ = DeclarationNode::newEnum( $3, $6, true, false )->addQualifiers( $2 ); }
+	  hide_opt '{' enumerator_list comma_opt '}'
+	  { $$ = DeclarationNode::newEnum( $3, $7, true, false )->addQualifiers( $2 ); }
 	| ENUM attribute_list_opt typedef_name				// unqualified type name
-	  '{' enumerator_list comma_opt '}'
-		{ $$ = DeclarationNode::newEnum( $3->name, $5, true, false )->addQualifiers( $2 ); }
+	  hide_opt '{' enumerator_list comma_opt '}'
+		{ $$ = DeclarationNode::newEnum( $3->name, $6, true, false )->addQualifiers( $2 ); }
 	| ENUM '(' cfa_abstract_parameter_declaration ')' attribute_list_opt '{' enumerator_list comma_opt '}'
 	 	{
@@ -2574,22 +2578,31 @@
 			typedefTable.makeTypedef( *$6 );
 		}
-	  '{' enumerator_list comma_opt '}'
-		{
-			$$ = DeclarationNode::newEnum( $6, $10, true, true, $3 )->addQualifiers( $5 )->addQualifiers( $7 );
+	  hide_opt '{' enumerator_list comma_opt '}'
+		{
+			$$ = DeclarationNode::newEnum( $6, $11, true, true, $3 )->addQualifiers( $5 )->addQualifiers( $7 );
 		}
 	| ENUM '(' ')' attribute_list_opt identifier attribute_list_opt
-	  '{' enumerator_list comma_opt '}'
-		{
-			$$ = DeclarationNode::newEnum( $5, $8, true, true, nullptr )->addQualifiers( $4 )->addQualifiers( $6 );
-		}
-	| ENUM '(' cfa_abstract_parameter_declaration ')' attribute_list_opt typedef_name attribute_list_opt '{' enumerator_list comma_opt '}'
-		{
-			$$ = DeclarationNode::newEnum( $6->name, $9, true, true, $3 )->addQualifiers( $5 )->addQualifiers( $7 );
-		}
-	| ENUM '(' ')' attribute_list_opt typedef_name attribute_list_opt '{' enumerator_list comma_opt '}'
-		{
-			$$ = DeclarationNode::newEnum( $5->name, $8, true, true, nullptr )->addQualifiers( $4 )->addQualifiers( $6 );
+	  hide_opt '{' enumerator_list comma_opt '}'
+		{
+			$$ = DeclarationNode::newEnum( $5, $9, true, true, nullptr )->addQualifiers( $4 )->addQualifiers( $6 );
+		}
+	| ENUM '(' cfa_abstract_parameter_declaration ')' attribute_list_opt typedef_name attribute_list_opt
+	  hide_opt '{' enumerator_list comma_opt '}'
+		{
+			$$ = DeclarationNode::newEnum( $6->name, $10, true, true, $3 )->addQualifiers( $5 )->addQualifiers( $7 );
+		}
+	| ENUM '(' ')' attribute_list_opt typedef_name attribute_list_opt
+	  hide_opt '{' enumerator_list comma_opt '}'
+		{
+			$$ = DeclarationNode::newEnum( $5->name, $9, true, true, nullptr )->addQualifiers( $4 )->addQualifiers( $6 );
 		}
 	| enum_type_nobody
+	;
+
+hide_opt:
+	// empty
+		{ $$ = EnumHiding::Visible; }
+	| '!'
+		{ $$ = EnumHiding::Hide; }
 	;
 
@@ -2602,12 +2615,18 @@
 
 enumerator_list:
-	identifier_or_type_name enumerator_value_opt
-		{ $$ = DeclarationNode::newEnumValueGeneric( $1, $2 ); }
+	visible_hide_opt identifier_or_type_name enumerator_value_opt
+		{ $$ = DeclarationNode::newEnumValueGeneric( $2, $3 ); }
 	| INLINE type_name
 		{ $$ = DeclarationNode::newEnumInLine( *$2->type->symbolic.name ); }
-	| enumerator_list ',' identifier_or_type_name enumerator_value_opt
-		{ $$ = $1->appendList( DeclarationNode::newEnumValueGeneric( $3, $4 ) ); }
+	| enumerator_list ',' visible_hide_opt identifier_or_type_name enumerator_value_opt
+		{ $$ = $1->appendList( DeclarationNode::newEnumValueGeneric( $4, $5 ) ); }
 	| enumerator_list ',' INLINE type_name enumerator_value_opt
 		{ $$ = $1->appendList( DeclarationNode::newEnumValueGeneric( new string("inline"), nullptr ) ); }
+	;
+
+visible_hide_opt:
+	hide_opt
+	| '^'
+		{ $$ = EnumHiding::Visible; }
 	;
 
Index: src/ResolvExpr/CommonType.cc
===================================================================
--- src/ResolvExpr/CommonType.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/ResolvExpr/CommonType.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -991,5 +991,4 @@
 				add_qualifiers( result, type2->qualifiers );
 			} else {
-				// xxx - does unifying a ref with typed enumInst makes sense?
 				if (!dynamic_cast<const ast::EnumInstType *>(type2))
 					result = commonType( type2, ref, tenv, need, have, open, widen, symtab );
@@ -1010,6 +1009,4 @@
 
 		void postvisit( const ast::EnumInstType * enumInst ) {
-			// reuse BasicType/EnumInstType common type by swapping
-			// xxx - is this already handled by unify?
 			if (!dynamic_cast<const ast::EnumInstType *>(type2))
 				result = commonType( type2, enumInst, tenv, need, have, open, widen, symtab);
Index: src/ResolvExpr/ConversionCost.cc
===================================================================
--- src/ResolvExpr/ConversionCost.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/ResolvExpr/ConversionCost.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -720,5 +720,4 @@
 		costCalc( baseType, dst, srcIsLvalue, symtab, env );
 	} else {
-		(void)enumInstType;
 		static ast::ptr<ast::BasicType> integer = { new ast::BasicType( ast::BasicType::SignedInt ) };
 		cost = costCalc( integer, dst, srcIsLvalue, symtab, env );
Index: src/ResolvExpr/SatisfyAssertions.cpp
===================================================================
--- src/ResolvExpr/SatisfyAssertions.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/ResolvExpr/SatisfyAssertions.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -268,5 +268,5 @@
 		ast::ptr< ast::Type > resType = cand.expr->result;
 		cand.env.apply( resType );
-		return Mangle::mangle( resType, Mangle::typeMode() );
+		return Mangle::mangleType( resType );
 	}
 
Index: src/SymTab/Mangler.cc
===================================================================
--- src/SymTab/Mangler.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SymTab/Mangler.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -9,7 +9,7 @@
 // Author           : Richard C. Bilson
 // Created On       : Sun May 17 21:40:29 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Jan 11 21:56:06 2021
-// Update Count     : 74
+// Last Modified By : Andrew Beach
+// Last Modified On : Fri Oct 21 16:18:00 2022
+// Update Count     : 75
 //
 #include "Mangler.h"
@@ -418,5 +418,6 @@
 			void postvisit( const ast::QualifiedType * qualType );
 
-			std::string get_mangleName() { return mangleName; }
+			/// The result is the current constructed mangled name.
+			std::string result() const { return mangleName; }
 		  private:
 			std::string mangleName;         ///< Mangled name being constructed
@@ -444,9 +445,6 @@
 	} // namespace
 
-
 	std::string mangle( const ast::Node * decl, Mangle::Mode mode ) {
-		ast::Pass<Mangler_new> mangler( mode );
-		maybeAccept( decl, mangler );
-		return mangler.core.get_mangleName();
+		return ast::Pass<Mangler_new>::read( decl, mode );
 	}
 
@@ -689,8 +687,7 @@
 					} // for
 					for ( auto & assert : ptype->assertions ) {
-						ast::Pass<Mangler_new> sub_mangler(
-							mangleOverridable, typeMode, mangleGenericParams, nextVarNum, varNums );
-						assert->var->accept( sub_mangler );
-						assertionNames.push_back( sub_mangler.core.get_mangleName() );
+						assertionNames.push_back( ast::Pass<Mangler_new>::read(
+							assert->var.get(),
+							mangleOverridable, typeMode, mangleGenericParams, nextVarNum, varNums ) );
 						acount++;
 					} // for
Index: src/SymTab/Mangler.h
===================================================================
--- src/SymTab/Mangler.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SymTab/Mangler.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -9,7 +9,7 @@
 // Author           : Richard C. Bilson
 // Created On       : Sun May 17 21:44:03 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul 22 09:45:30 2017
-// Update Count     : 15
+// Last Modified By : Andrew Beach
+// Last Modified On : Thu Oct 27 11:58:00 2022
+// Update Count     : 16
 //
 
@@ -22,5 +22,4 @@
 
 #include "AST/Bitfield.hpp"
-#include "AST/Fwd.hpp"
 #include "SynTree/SynTree.h"  // for Types
 #include "SynTree/Visitor.h"  // for Visitor, maybeAccept
@@ -33,4 +32,7 @@
 // * Currently name compression is not implemented.
 
+namespace ast {
+	class Node;
+}
 namespace ResolvExpr {
 	class TypeEnvironment;
@@ -101,8 +103,11 @@
 	using Mode = bitfield<mangle_flags>;
 
-	static inline Mode typeMode() { return NoOverrideable | Type; }
+	/// Mangle declaration name.
+	std::string mangle( const ast::Node * decl, Mode mode = {} );
 
-	/// Mangle declaration name
-	std::string mangle( const ast::Node * decl, Mode mode = {} );
+	/// Most common mangle configuration for types.
+	static inline std::string mangleType( const ast::Node * type ) {
+		return mangle( type, { NoOverrideable | Type } );
+	}
 
 	namespace Encoding {
Index: src/SymTab/Validate.cc
===================================================================
--- src/SymTab/Validate.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SymTab/Validate.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -47,11 +47,4 @@
 #include <utility>                     // for pair
 
-#include "AST/Chain.hpp"
-#include "AST/Decl.hpp"
-#include "AST/Node.hpp"
-#include "AST/Pass.hpp"
-#include "AST/SymbolTable.hpp"
-#include "AST/Type.hpp"
-#include "AST/TypeSubstitution.hpp"
 #include "CodeGen/CodeGenerator.h"     // for genName
 #include "CodeGen/OperatorTable.h"     // for isCtorDtor, isCtorDtorAssign
@@ -1326,471 +1319,4 @@
 	}
 
-namespace {
-	/// Replaces enum types by int, and function/array types in function parameter and return
-	/// lists by appropriate pointers
-	/*
-	struct EnumAndPointerDecay_new {
-		const ast::EnumDecl * previsit( const ast::EnumDecl * enumDecl ) {
-			// set the type of each member of the enumeration to be EnumConstant
-			for ( unsigned i = 0; i < enumDecl->members.size(); ++i ) {
-				// build new version of object with EnumConstant
-				ast::ptr< ast::ObjectDecl > obj =
-					enumDecl->members[i].strict_as< ast::ObjectDecl >();
-				obj.get_and_mutate()->type =
-					new ast::EnumInstType{ enumDecl->name, ast::CV::Const };
-
-				// set into decl
-				ast::EnumDecl * mut = mutate( enumDecl );
-				mut->members[i] = obj.get();
-				enumDecl = mut;
-			}
-			return enumDecl;
-		}
-
-		static const ast::FunctionType * fixFunctionList(
-			const ast::FunctionType * func,
-			std::vector< ast::ptr< ast::DeclWithType > > ast::FunctionType::* field,
-			ast::ArgumentFlag isVarArgs = ast::FixedArgs
-		) {
-			const auto & dwts = func->* field;
-			unsigned nvals = dwts.size();
-			bool hasVoid = false;
-			for ( unsigned i = 0; i < nvals; ++i ) {
-				func = ast::mutate_field_index( func, field, i, fixFunction( dwts[i], hasVoid ) );
-			}
-
-			// the only case in which "void" is valid is where it is the only one in the list
-			if ( hasVoid && ( nvals > 1 || isVarArgs ) ) {
-				SemanticError(
-					dwts.front()->location, func, "invalid type void in function type" );
-			}
-
-			// one void is the only thing in the list, remove it
-			if ( hasVoid ) {
-				func = ast::mutate_field(
-					func, field, std::vector< ast::ptr< ast::DeclWithType > >{} );
-			}
-
-			return func;
-		}
-
-		const ast::FunctionType * previsit( const ast::FunctionType * func ) {
-			func = fixFunctionList( func, &ast::FunctionType::params, func->isVarArgs );
-			return fixFunctionList( func, &ast::FunctionType::returns );
-		}
-	};
-
-	/// expand assertions from a trait instance, performing appropriate type variable substitutions
-	void expandAssertions(
-		const ast::TraitInstType * inst, std::vector< ast::ptr< ast::DeclWithType > > & out
-	) {
-		assertf( inst->base, "Trait instance not linked to base trait: %s", toCString( inst ) );
-
-		// build list of trait members, substituting trait decl parameters for instance parameters
-		ast::TypeSubstitution sub{
-			inst->base->params.begin(), inst->base->params.end(), inst->params.begin() };
-		// deliberately take ast::ptr by-value to ensure this does not mutate inst->base
-		for ( ast::ptr< ast::Decl > decl : inst->base->members ) {
-			auto member = decl.strict_as< ast::DeclWithType >();
-			sub.apply( member );
-			out.emplace_back( member );
-		}
-	}
-
-	/// Associates forward declarations of aggregates with their definitions
-	class LinkReferenceToTypes_new final
-	: public ast::WithSymbolTable, public ast::WithGuards, public
-	  ast::WithVisitorRef<LinkReferenceToTypes_new>, public ast::WithShortCircuiting {
-
-		// these maps of uses of forward declarations of types need to have the actual type
-		// declaration switched in * after * they have been traversed. To enable this in the
-		// ast::Pass framework, any node that needs to be so mutated has mutate() called on it
-		// before it is placed in the map, properly updating its parents in the usual traversal,
-		// then can have the actual mutation applied later
-		using ForwardEnumsType = std::unordered_multimap< std::string, ast::EnumInstType * >;
-		using ForwardStructsType = std::unordered_multimap< std::string, ast::StructInstType * >;
-		using ForwardUnionsType = std::unordered_multimap< std::string, ast::UnionInstType * >;
-
-		const CodeLocation & location;
-		const ast::SymbolTable * localSymtab;
-
-		ForwardEnumsType forwardEnums;
-		ForwardStructsType forwardStructs;
-		ForwardUnionsType forwardUnions;
-
-		/// true if currently in a generic type body, so that type parameter instances can be
-		/// renamed appropriately
-		bool inGeneric = false;
-
-	public:
-		/// contstruct using running symbol table
-		LinkReferenceToTypes_new( const CodeLocation & loc )
-		: location( loc ), localSymtab( &symtab ) {}
-
-		/// construct using provided symbol table
-		LinkReferenceToTypes_new( const CodeLocation & loc, const ast::SymbolTable & syms )
-		: location( loc ), localSymtab( &syms ) {}
-
-		const ast::Type * postvisit( const ast::TypeInstType * typeInst ) {
-			// ensure generic parameter instances are renamed like the base type
-			if ( inGeneric && typeInst->base ) {
-				typeInst = ast::mutate_field(
-					typeInst, &ast::TypeInstType::name, typeInst->base->name );
-			}
-
-			if (
-				auto typeDecl = dynamic_cast< const ast::TypeDecl * >(
-					localSymtab->lookupType( typeInst->name ) )
-			) {
-				typeInst = ast::mutate_field( typeInst, &ast::TypeInstType::kind, typeDecl->kind );
-			}
-
-			return typeInst;
-		}
-
-		const ast::Type * postvisit( const ast::EnumInstType * inst ) {
-			const ast::EnumDecl * decl = localSymtab->lookupEnum( inst->name );
-			// not a semantic error if the enum is not found, just an implicit forward declaration
-			if ( decl ) {
-				inst = ast::mutate_field( inst, &ast::EnumInstType::base, decl );
-			}
-			if ( ! decl || ! decl->body ) {
-				// forward declaration
-				auto mut = mutate( inst );
-				forwardEnums.emplace( inst->name, mut );
-				inst = mut;
-			}
-			return inst;
-		}
-
-		void checkGenericParameters( const ast::BaseInstType * inst ) {
-			for ( const ast::Expr * param : inst->params ) {
-				if ( ! dynamic_cast< const ast::TypeExpr * >( param ) ) {
-					SemanticError(
-						location, inst, "Expression parameters for generic types are currently "
-						"unsupported: " );
-				}
-			}
-		}
-
-		const ast::StructInstType * postvisit( const ast::StructInstType * inst ) {
-			const ast::StructDecl * decl = localSymtab->lookupStruct( inst->name );
-			// not a semantic error if the struct is not found, just an implicit forward declaration
-			if ( decl ) {
-				inst = ast::mutate_field( inst, &ast::StructInstType::base, decl );
-			}
-			if ( ! decl || ! decl->body ) {
-				// forward declaration
-				auto mut = mutate( inst );
-				forwardStructs.emplace( inst->name, mut );
-				inst = mut;
-			}
-			checkGenericParameters( inst );
-			return inst;
-		}
-
-		const ast::UnionInstType * postvisit( const ast::UnionInstType * inst ) {
-			const ast::UnionDecl * decl = localSymtab->lookupUnion( inst->name );
-			// not a semantic error if the struct is not found, just an implicit forward declaration
-			if ( decl ) {
-				inst = ast::mutate_field( inst, &ast::UnionInstType::base, decl );
-			}
-			if ( ! decl || ! decl->body ) {
-				// forward declaration
-				auto mut = mutate( inst );
-				forwardUnions.emplace( inst->name, mut );
-				inst = mut;
-			}
-			checkGenericParameters( inst );
-			return inst;
-		}
-
-		const ast::Type * postvisit( const ast::TraitInstType * traitInst ) {
-			// handle other traits
-			const ast::TraitDecl * traitDecl = localSymtab->lookupTrait( traitInst->name );
-			if ( ! traitDecl )	 {
-				SemanticError( location, "use of undeclared trait " + traitInst->name );
-			}
-			if ( traitDecl->params.size() != traitInst->params.size() ) {
-				SemanticError( location, traitInst, "incorrect number of trait parameters: " );
-			}
-			traitInst = ast::mutate_field( traitInst, &ast::TraitInstType::base, traitDecl );
-
-			// need to carry over the "sized" status of each decl in the instance
-			for ( unsigned i = 0; i < traitDecl->params.size(); ++i ) {
-				auto expr = traitInst->params[i].as< ast::TypeExpr >();
-				if ( ! expr ) {
-					SemanticError(
-						traitInst->params[i].get(), "Expression parameters for trait instances "
-						"are currently unsupported: " );
-				}
-
-				if ( auto inst = expr->type.as< ast::TypeInstType >() ) {
-					if ( traitDecl->params[i]->sized && ! inst->base->sized ) {
-						// traitInst = ast::mutate_field_index(
-						// 	traitInst, &ast::TraitInstType::params, i,
-						// 	...
-						// );
-						ast::TraitInstType * mut = ast::mutate( traitInst );
-						ast::chain_mutate( mut->params[i] )
-							( &ast::TypeExpr::type )
-								( &ast::TypeInstType::base )->sized = true;
-						traitInst = mut;
-					}
-				}
-			}
-
-			return traitInst;
-		}
-
-		void previsit( const ast::QualifiedType * ) { visit_children = false; }
-
-		const ast::Type * postvisit( const ast::QualifiedType * qualType ) {
-			// linking only makes sense for the "oldest ancestor" of the qualified type
-			return ast::mutate_field(
-				qualType, &ast::QualifiedType::parent, qualType->parent->accept( * visitor ) );
-		}
-
-		const ast::Decl * postvisit( const ast::EnumDecl * enumDecl ) {
-			// visit enum members first so that the types of self-referencing members are updated
-			// properly
-			if ( ! enumDecl->body ) return enumDecl;
-
-			// update forward declarations to point here
-			auto fwds = forwardEnums.equal_range( enumDecl->name );
-			if ( fwds.first != fwds.second ) {
-				auto inst = fwds.first;
-				do {
-					// forward decl is stored * mutably * in map, can thus be updated
-					inst->second->base = enumDecl;
-				} while ( ++inst != fwds.second );
-				forwardEnums.erase( fwds.first, fwds.second );
-			}
-
-			// ensure that enumerator initializers are properly set
-			for ( unsigned i = 0; i < enumDecl->members.size(); ++i ) {
-				auto field = enumDecl->members[i].strict_as< ast::ObjectDecl >();
-				if ( field->init ) {
-					// need to resolve enumerator initializers early so that other passes that
-					// determine if an expression is constexpr have appropriate information
-					auto init = field->init.strict_as< ast::SingleInit >();
-
-					enumDecl = ast::mutate_field_index(
-						enumDecl, &ast::EnumDecl::members, i,
-						ast::mutate_field( field, &ast::ObjectDecl::init,
-							ast::mutate_field( init, &ast::SingleInit::value,
-								ResolvExpr::findSingleExpression(
-									init->value, new ast::BasicType{ ast::BasicType::SignedInt },
-									symtab ) ) ) );
-				}
-			}
-
-			return enumDecl;
-		}
-
-		/// rename generic type parameters uniquely so that they do not conflict with user defined
-		/// function forall parameters, e.g. the T in Box and the T in f, below
-		///   forall(otype T)
-		///   struct Box {
-		///     T x;
-		///   };
-		///   forall(otype T)
-		///   void f(Box(T) b) {
-		///     ...
-		///   }
-		template< typename AggrDecl >
-		const AggrDecl * renameGenericParams( const AggrDecl * aggr ) {
-			GuardValue( inGeneric );
-			inGeneric = ! aggr->params.empty();
-
-			for ( unsigned i = 0; i < aggr->params.size(); ++i ) {
-				const ast::TypeDecl * td = aggr->params[i];
-
-				aggr = ast::mutate_field_index(
-					aggr, &AggrDecl::params, i,
-					ast::mutate_field( td, &ast::TypeDecl::name, "__" + td->name + "_generic_" ) );
-			}
-			return aggr;
-		}
-
-		const ast::StructDecl * previsit( const ast::StructDecl * structDecl ) {
-			return renameGenericParams( structDecl );
-		}
-
-		void postvisit( const ast::StructDecl * structDecl ) {
-			// visit struct members first so that the types of self-referencing members are
-			// updated properly
-			if ( ! structDecl->body ) return;
-
-			// update forward declarations to point here
-			auto fwds = forwardStructs.equal_range( structDecl->name );
-			if ( fwds.first != fwds.second ) {
-				auto inst = fwds.first;
-				do {
-					// forward decl is stored * mutably * in map, can thus be updated
-					inst->second->base = structDecl;
-				} while ( ++inst != fwds.second );
-				forwardStructs.erase( fwds.first, fwds.second );
-			}
-		}
-
-		const ast::UnionDecl * previsit( const ast::UnionDecl * unionDecl ) {
-			return renameGenericParams( unionDecl );
-		}
-
-		void postvisit( const ast::UnionDecl * unionDecl ) {
-			// visit union members first so that the types of self-referencing members are updated
-			// properly
-			if ( ! unionDecl->body ) return;
-
-			// update forward declarations to point here
-			auto fwds = forwardUnions.equal_range( unionDecl->name );
-			if ( fwds.first != fwds.second ) {
-				auto inst = fwds.first;
-				do {
-					// forward decl is stored * mutably * in map, can thus be updated
-					inst->second->base = unionDecl;
-				} while ( ++inst != fwds.second );
-				forwardUnions.erase( fwds.first, fwds.second );
-			}
-		}
-
-		const ast::Decl * postvisit( const ast::TraitDecl * traitDecl ) {
-			// set the "sized" status for the special "sized" trait
-			if ( traitDecl->name == "sized" ) {
-				assertf( traitDecl->params.size() == 1, "Built-in trait 'sized' has incorrect "
-					"number of parameters: %zd", traitDecl->params.size() );
-
-				traitDecl = ast::mutate_field_index(
-					traitDecl, &ast::TraitDecl::params, 0,
-					ast::mutate_field(
-						traitDecl->params.front().get(), &ast::TypeDecl::sized, true ) );
-			}
-
-			// move assertions from type parameters into the body of the trait
-			std::vector< ast::ptr< ast::DeclWithType > > added;
-			for ( const ast::TypeDecl * td : traitDecl->params ) {
-				for ( const ast::DeclWithType * assn : td->assertions ) {
-					auto inst = dynamic_cast< const ast::TraitInstType * >( assn->get_type() );
-					if ( inst ) {
-						expandAssertions( inst, added );
-					} else {
-						added.emplace_back( assn );
-					}
-				}
-			}
-			if ( ! added.empty() ) {
-				auto mut = mutate( traitDecl );
-				for ( const ast::DeclWithType * decl : added ) {
-					mut->members.emplace_back( decl );
-				}
-				traitDecl = mut;
-			}
-
-			return traitDecl;
-		}
-	};
-
-	/// Replaces array and function types in forall lists by appropriate pointer type and assigns
-	/// each object and function declaration a unique ID
-	class ForallPointerDecay_new {
-		const CodeLocation & location;
-	public:
-		ForallPointerDecay_new( const CodeLocation & loc ) : location( loc ) {}
-
-		const ast::ObjectDecl * previsit( const ast::ObjectDecl * obj ) {
-			// ensure that operator names only apply to functions or function pointers
-			if (
-				CodeGen::isOperator( obj->name )
-				&& ! dynamic_cast< const ast::FunctionType * >( obj->type->stripDeclarator() )
-			) {
-				SemanticError( obj->location, toCString( "operator ", obj->name.c_str(), " is not "
-					"a function or function pointer." )  );
-			}
-
-			// ensure object has unique ID
-			if ( obj->uniqueId ) return obj;
-			auto mut = mutate( obj );
-			mut->fixUniqueId();
-			return mut;
-		}
-
-		const ast::FunctionDecl * previsit( const ast::FunctionDecl * func ) {
-			// ensure function has unique ID
-			if ( func->uniqueId ) return func;
-			auto mut = mutate( func );
-			mut->fixUniqueId();
-			return mut;
-		}
-
-		/// Fix up assertions -- flattens assertion lists, removing all trait instances
-		template< typename node_t, typename parent_t >
-		static const node_t * forallFixer(
-			const CodeLocation & loc, const node_t * node,
-			ast::FunctionType::ForallList parent_t::* forallField
-		) {
-			for ( unsigned i = 0; i < (node->* forallField).size(); ++i ) {
-				const ast::TypeDecl * type = (node->* forallField)[i];
-				if ( type->assertions.empty() ) continue;
-
-				std::vector< ast::ptr< ast::DeclWithType > > asserts;
-				asserts.reserve( type->assertions.size() );
-
-				// expand trait instances into their members
-				for ( const ast::DeclWithType * assn : type->assertions ) {
-					auto traitInst =
-						dynamic_cast< const ast::TraitInstType * >( assn->get_type() );
-					if ( traitInst ) {
-						// expand trait instance to all its members
-						expandAssertions( traitInst, asserts );
-					} else {
-						// pass other assertions through
-						asserts.emplace_back( assn );
-					}
-				}
-
-				// apply FixFunction to every assertion to check for invalid void type
-				for ( ast::ptr< ast::DeclWithType > & assn : asserts ) {
-					bool isVoid = false;
-					assn = fixFunction( assn, isVoid );
-					if ( isVoid ) {
-						SemanticError( loc, node, "invalid type void in assertion of function " );
-					}
-				}
-
-				// place mutated assertion list in node
-				auto mut = mutate( type );
-				mut->assertions = move( asserts );
-				node = ast::mutate_field_index( node, forallField, i, mut );
-			}
-			return node;
-		}
-
-		const ast::FunctionType * previsit( const ast::FunctionType * ftype ) {
-			return forallFixer( location, ftype, &ast::FunctionType::forall );
-		}
-
-		const ast::StructDecl * previsit( const ast::StructDecl * aggrDecl ) {
-			return forallFixer( aggrDecl->location, aggrDecl, &ast::StructDecl::params );
-		}
-
-		const ast::UnionDecl * previsit( const ast::UnionDecl * aggrDecl ) {
-			return forallFixer( aggrDecl->location, aggrDecl, &ast::UnionDecl::params );
-		}
-	};
-	*/
-} // anonymous namespace
-
-/*
-const ast::Type * validateType(
-		const CodeLocation & loc, const ast::Type * type, const ast::SymbolTable & symtab ) {
-	// ast::Pass< EnumAndPointerDecay_new > epc;
-	ast::Pass< LinkReferenceToTypes_new > lrt{ loc, symtab };
-	ast::Pass< ForallPointerDecay_new > fpd{ loc };
-
-	return type->accept( lrt )->accept( fpd );
-}
-*/
-
 } // namespace SymTab
 
Index: src/SynTree/Declaration.h
===================================================================
--- src/SynTree/Declaration.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SynTree/Declaration.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -449,4 +449,29 @@
 };
 
+
+class InlineMemberDecl : public DeclarationWithType {
+	typedef DeclarationWithType Parent;
+  public:
+	Type * type;
+
+	InlineMemberDecl( const std::string & name, Type::StorageClasses scs, LinkageSpec::Spec linkage, Type * type,
+				const std::list< Attribute * > attributes = std::list< Attribute * >(), Type::FuncSpecifiers fs = Type::FuncSpecifiers() );
+	InlineMemberDecl( const InlineMemberDecl & other );
+	virtual ~InlineMemberDecl();
+
+	virtual Type * get_type() const override { return type; }
+	virtual void set_type(Type * newType) override { type = newType; }
+
+	static InlineMemberDecl * newInlineMemberDecl( const std::string & name, Type * type );
+
+	virtual InlineMemberDecl * clone() const override { return new InlineMemberDecl( *this ); }
+	virtual void accept( Visitor & v ) override { v.visit( this ); }
+	virtual void accept( Visitor & v ) const override { v.visit( this ); }
+	virtual DeclarationWithType * acceptMutator( Mutator & m )  override { return m.mutate( this ); }
+	virtual void print( std::ostream & os, Indenter indent = {} ) const override;
+	virtual void printShort( std::ostream & os, Indenter indent = {} ) const override;
+
+};
+
 std::ostream & operator<<( std::ostream & os, const TypeDecl::Data & data );
 
Index: src/SynTree/InlineMemberDecl.cc
===================================================================
--- src/SynTree/InlineMemberDecl.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ src/SynTree/InlineMemberDecl.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,57 @@
+#include <list>                  // for list
+#include <ostream>               // for operator<<, ostream, basic_ostream
+#include <string>                // for operator<<, string, char_traits, ope...
+
+#include "Attribute.h"           // for Attribute
+#include "Declaration.h"
+#include "Common/utility.h"      // for maybeClone, printAll
+#include "LinkageSpec.h"         // for Spec, linkageName, Cforall
+#include "Type.h"                // for Type, Type::StorageClasses, Type::Fu...
+
+InlineMemberDecl::InlineMemberDecl( const std::string &name, Type::StorageClasses scs, LinkageSpec::Spec linkage,
+Type * type, const std::list< Attribute * >attributes, Type::FuncSpecifiers fs) 
+    : Parent( name, scs, linkage, attributes, fs ), type( type ) {}
+
+InlineMemberDecl::InlineMemberDecl( const InlineMemberDecl &other) 
+    : Parent( other), type( maybeClone( other.type ) ) {}
+
+InlineMemberDecl::~InlineMemberDecl() { delete type; }
+
+InlineMemberDecl * InlineMemberDecl::newInlineMemberDecl( const std::string &name, Type * type ) {
+    return new InlineMemberDecl( name, Type::StorageClasses(), LinkageSpec::C, type );
+}
+
+void InlineMemberDecl::print( std::ostream &os, Indenter indent ) const {
+    if ( name != "" ) os << name << ": ";
+
+	if ( linkage != LinkageSpec::Cforall ) {
+		os << LinkageSpec::name( linkage ) << " ";
+	} // if
+
+	get_storageClasses().print( os );
+
+	if ( type ) {
+		type->print( os, indent );
+	} else {
+		os << " untyped entity ";
+	} // if
+
+	if ( ! attributes.empty() ) {
+		os << std::endl << indent << "... with attributes:" << std::endl;
+		printAll( attributes, os, indent+1 );
+	} // if
+
+}
+
+void InlineMemberDecl::printShort( std::ostream &os, Indenter indent ) const {
+    if ( name != "" ) os << name << ": ";
+
+	get_storageClasses().print( os );
+
+	if ( type ) {
+		type->print( os, indent );
+	} else {
+		os << "untyped entity ";
+	} // if
+    
+}
Index: src/SynTree/Mutator.h
===================================================================
--- src/SynTree/Mutator.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SynTree/Mutator.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -36,4 +36,5 @@
 	virtual DirectiveDecl * mutate( DirectiveDecl * directiveDecl ) = 0;
 	virtual StaticAssertDecl * mutate( StaticAssertDecl * assertDecl ) = 0;
+	virtual DeclarationWithType * mutate( InlineMemberDecl * InlineMemberDecl ) = 0;
 
 	virtual CompoundStmt * mutate( CompoundStmt * compoundStmt ) = 0;
Index: src/SynTree/SynTree.h
===================================================================
--- src/SynTree/SynTree.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SynTree/SynTree.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -38,4 +38,5 @@
 class DirectiveDecl;
 class StaticAssertDecl;
+class InlineMemberDecl;
 
 class Statement;
Index: src/SynTree/Visitor.h
===================================================================
--- src/SynTree/Visitor.h	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SynTree/Visitor.h	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -49,4 +49,6 @@
 	virtual void visit( StaticAssertDecl * node ) { visit( const_cast<const StaticAssertDecl *>(node) ); }
 	virtual void visit( const StaticAssertDecl * assertDecl ) = 0;
+	virtual void visit( InlineMemberDecl * node ) { visit( const_cast<const InlineMemberDecl *>(node) ); }
+	virtual void visit( const InlineMemberDecl * valueDecl ) = 0;
 
 	virtual void visit( CompoundStmt * node ) { visit( const_cast<const CompoundStmt *>(node) ); }
Index: src/SynTree/module.mk
===================================================================
--- src/SynTree/module.mk	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/SynTree/module.mk	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -42,4 +42,5 @@
       SynTree/Initializer.cc \
       SynTree/Initializer.h \
+      SynTree/InlineMemberDecl.cc \
       SynTree/Label.h \
       SynTree/LinkageSpec.cc \
Index: src/Validate/EnumAndPointerDecay.cpp
===================================================================
--- src/Validate/EnumAndPointerDecay.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Validate/EnumAndPointerDecay.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -21,4 +21,5 @@
 #include "AST/Type.hpp"
 #include "SymTab/FixFunction.h"
+#include "Validate/NoIdSymbolTable.hpp"
 
 namespace Validate {
@@ -26,5 +27,5 @@
 namespace {
 
-struct EnumAndPointerDecayCore final : public ast::WithCodeLocation {
+struct EnumAndPointerDecayCore final : public WithNoIdSymbolTable, public ast::WithCodeLocation {
 	ast::EnumDecl const * previsit( ast::EnumDecl const * decl );
 	ast::FunctionDecl const * previsit( ast::FunctionDecl const * decl );
@@ -39,9 +40,32 @@
 	// Set the type of each member of the enumeration to be EnumContant.
 	auto mut = ast::mutate( decl );
-	for ( ast::ptr<ast::Decl> & member : mut->members ) {
-		ast::ObjectDecl const * object = member.strict_as<ast::ObjectDecl>();
-		member = ast::mutate_field( object, &ast::ObjectDecl::type,
-			new ast::EnumInstType( decl, ast::CV::Const ) );
+	std::vector<ast::ptr<ast::Decl>> buffer;
+	for ( auto member : decl->members ) {
+		if ( ast::ObjectDecl const * object = member.as<ast::ObjectDecl>() ) {
+			buffer.push_back( ast::mutate_field( object,
+				&ast::ObjectDecl::type,
+				new ast::EnumInstType( decl, ast::CV::Const ) ) );
+		} else if ( auto value = member.as<ast::InlineMemberDecl>() ) {
+			if ( auto targetEnum = symtab.lookupEnum( value->name ) ) {
+				for ( auto enumMember : targetEnum->members ) {
+					auto enumObject = enumMember.strict_as<ast::ObjectDecl>();
+					buffer.push_back( new ast::ObjectDecl(
+						// Get the location from the "inline" declaration.
+						value->location,
+						enumObject->name,
+						// Construct a new EnumInstType as the type.
+						new ast::EnumInstType( decl, ast::CV::Const ),
+						enumObject->init,
+						enumObject->storage,
+						enumObject->linkage,
+						enumObject->bitfieldWidth,
+						{},
+						enumObject->funcSpec
+					) );
+				}
+			}
+		}
 	}
+	mut->members = buffer;
 	return mut;
 }
Index: src/Validate/LinkReferenceToTypes.cpp
===================================================================
--- src/Validate/LinkReferenceToTypes.cpp	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Validate/LinkReferenceToTypes.cpp	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -185,4 +185,5 @@
 				decl = mut;
 			}
+			// visit the base
 		} else if ( auto ptr = decl->base.as<ast::PointerType>() ) {
 			if ( auto base = ptr->base.as<ast::TypeInstType>() ) {
@@ -203,43 +204,4 @@
 
 	// The following section 
-	auto mut = ast::mutate( decl );
-	std::vector<ast::ptr<ast::Decl>> buffer;
-	for ( auto it = decl->members.begin(); it != decl->members.end(); ++it) {
-		auto member = (*it).as<ast::ObjectDecl>();
-		if ( member->enumInLine ) {
-			auto targetEnum = symtab.lookupEnum( member->name );
-			if ( targetEnum ) {			
-				for ( auto singleMamber : targetEnum->members ) {
-					auto tm = singleMamber.as<ast::ObjectDecl>();
-					auto t = new ast::ObjectDecl(
-						member->location, // use the "inline" location
-						tm->name,
-						new ast::EnumInstType( decl, ast::CV::Const ),
-						// Construct a new EnumInstType as the type
-						tm->init,
-						tm->storage,
-						tm->linkage,
-						tm->bitfieldWidth,
-						{}, // enum member doesn't have attribute
-						tm->funcSpec
-					);
-					t->importValue = true;
-					buffer.push_back(t);
-				}
-			}
-		} else {
-			auto search_it = std::find_if( buffer.begin(), buffer.end(), [member](ast::ptr<ast::Decl> cur) {
-				auto curAsObjDecl = cur.as<ast::ObjectDecl>();
-				return (curAsObjDecl->importValue) && (curAsObjDecl->name == member->name);
-			});
-			if ( search_it != buffer.end() ) {
-				buffer.erase( search_it ); // Found an import enum value that has the same name
-				// override the imported value
-			}
-			buffer.push_back( *it );
-		}
-	}
-	mut->members = buffer;
-	decl = mut;
 
 	ForwardEnumsType::iterator fwds = forwardEnums.find( decl->name );
Index: src/Virtual/ExpandCasts.cc
===================================================================
--- src/Virtual/ExpandCasts.cc	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ src/Virtual/ExpandCasts.cc	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -295,6 +295,5 @@
 	// returns the previous declaration for error messages.
 	ast::ObjectDecl const * insert( ast::ObjectDecl const * typeIdDecl ) {
-		std::string const & mangledName =
-				Mangle::mangle( typeIdDecl->type, Mangle::typeMode() );
+		std::string mangledName = Mangle::mangleType( typeIdDecl->type );
 		ast::ObjectDecl const *& value = instances[ mangledName ];
 		if ( value ) {
@@ -310,6 +309,5 @@
 
 	ast::ObjectDecl const * lookup( ast::Type const * typeIdType ) {
-		std::string const & mangledName =
-				Mangle::mangle( typeIdType, Mangle::typeMode() );
+		std::string mangledName = Mangle::mangleType( typeIdType );
 		auto const it = instances.find( mangledName );
 		return ( instances.end() == it ) ? nullptr : it->second;
Index: tests/.expect/loop-inc.txt
===================================================================
--- tests/.expect/loop-inc.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/.expect/loop-inc.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,3 @@
+loop
+loop
+done
Index: tests/array-container/.expect/array-sbscr-types.txt
===================================================================
--- tests/array-container/.expect/array-sbscr-types.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/array-container/.expect/array-sbscr-types.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,162 @@
+Simple array
+
+100.3
+100.3
+
+100.0
+100.1
+100.2
+100.3
+100.4
+
+100.0
+100.1
+100.2
+100.3
+
+Via trait
+
+100.3
+100.3
+
+100.0
+100.1
+100.2
+100.3
+100.4
+
+100.0
+100.1
+100.2
+100.3
+
+Simple array, multidim
+
+3.3
+3.3
+3.3
+3.3
+
+0.3
+1.3
+2.3
+3.3
+4.3
+
+0.3
+1.3
+2.3
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+Via trait, multidim
+
+3.3
+3.3
+3.3
+3.3
+
+0.3
+1.3
+2.3
+3.3
+4.3
+
+0.3
+1.3
+2.3
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+Transposed, Via trait, multidim
+
+3.3
+3.3
+3.3
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+0.3
+1.3
+2.3
+3.3
+4.3
+
+0.3
+1.3
+2.3
+3.3
+
+Slice giving Simple array
+
+2.3
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+Same slice Via trait
+
+2.3
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+Strided slice Via trait
+
+3.2
+3.2
+
+0.2
+1.2
+2.2
+3.2
+4.2
+
+0.2
+1.2
+2.2
+3.2
+
Index: tests/array-container/array-basic.cfa
===================================================================
--- tests/array-container/array-basic.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/array-container/array-basic.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -78,8 +78,8 @@
 }
 
-forall( A & | ar(A, float) )
+forall( [N], A & | ar(A, float, N) )
 float total1d_hi( A & a ) {
     float total = 0.0f;
-    for (i; a`len)
+    for (i; N)
         total += a[i];
     return total;
Index: tests/array-container/array-sbscr-types.cfa
===================================================================
--- tests/array-container/array-sbscr-types.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/array-container/array-sbscr-types.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,135 @@
+#include <containers/array.hfa>
+
+// Shows support for many required ways a user can index into a new array.
+//
+// A successful run of this test on 32 bit is necessary, before concluding
+// that a relevant change has good quality.  Even though the test has no 
+// differentiated 64/32-bit versions.
+//
+// Repetition, within this test, beween indexing directly into an `array(...)`
+// and indexing into a `A`, as in `forall(A...|ar(A...))`, represents indexing
+// into a (statically programmer-known) contiguous view, and a (potentially)
+// noncontiguous view, respectively.  Users obtain noncontiguous views by
+// slicing or transposing higher-dimensional arrays.  The limited uses of
+// `a[..., all, ...]` within this test create such situations.  Working via
+// the `ar` trait is the first of two ways that users depend on the array
+// implementation tunneling subscript operators through the CFA assertion
+// system.
+//
+// This test uses the `a[i,j]` form for subscriping higher-dimensional arrays,
+// which is the "new" form, compared with the C-style `a[i][j]` form.  The
+// "new" subscripting form is the second of two ways that users depend on the
+// array implementation tunneling subscript operators through the CFA
+// assertion system.
+//
+// This test covers types and syntactic forms that can convey a numeric value
+// to `a[-]` or `a[-,-,-]`.  The array-md-sbscr-cases test covers combinations
+// of `a[i][j,k]` vs `a[i,j,k]` and `a[all,3][42]` vs `a[42,3]`, though
+// generally using ptrdiff_t-typed variables to convey numeric values.
+
+
+#define show( expr ) printf( "%.1f\n", expr )
+
+#define singleDimTestBody(testName) {                      \
+                                                           \
+    printf(testName "\n\n");                               \
+                                                           \
+    assert( 3 < N );                                       \
+                                                           \
+    show( a[i1] );                                         \
+    show( a[i2] );                                         \
+    printf("\n");                                          \
+                                                           \
+    for( i_dynbounded; N ) show( a[i_dynbounded] );        \
+    printf("\n");                                          \
+                                                           \
+    for( i_stabounded; 4 ) show( a[i_stabounded] );        \
+    printf("\n");                                          \
+}
+
+forall( [N] )
+void test_common_arg_types(array(float, N) & a, ptrdiff_t i1, size_t i2)
+    singleDimTestBody("Simple array")
+
+forall( [N], A& | ar(A, float, N) )
+void test_common_arg_types__via_trait(A & a, ptrdiff_t i1, size_t i2)
+    singleDimTestBody("Via trait")
+
+void do1dimTest() {
+    array(float, 5) a;
+    a[0] = 100.0;
+    a[1] = 100.1;
+    a[2] = 100.2;
+    a[3] = 100.3;
+    a[4] = 100.4;
+
+    test_common_arg_types(a, 3, 3);
+    test_common_arg_types__via_trait(a, 3, 3);
+}
+
+#define multiDimTestBody(testName) {                         \
+                                                             \
+    printf(testName "\n\n");                                 \
+                                                             \
+    assert( 3 < M );                                         \
+    assert( 3 < N );                                         \
+                                                             \
+    show(( a[x1,x1] ));                                      \
+    show(( a[x1,x2] ));                                      \
+    show(( a[x2,x1] ));                                      \
+    show(( a[x2,x2] ));                                      \
+    printf("\n");                                            \
+                                                             \
+    for( i_dynbounded; M ) show(( a[i_dynbounded, 3] ));     \
+    printf("\n");                                            \
+                                                             \
+    for( i_stabounded; 4 ) show(( a[i_stabounded, 3] ));     \
+    printf("\n");                                            \
+                                                             \
+    for( j_dynbounded; N ) show(( a[3, j_dynbounded] ));     \
+    printf("\n");                                            \
+                                                             \
+    for( j_stabounded; 4 ) show(( a[3, j_stabounded] ));     \
+    printf("\n");                                            \
+}
+
+forall( [M], [N] )
+void test_common_arg_types__md(array(float, M, N) & a, ptrdiff_t x1, size_t x2)
+    multiDimTestBody("Simple array, multidim")
+
+
+forall( [M], [N], A_outer &, A_inner & | ar(A_outer, A_inner, M) | ar(A_inner, float, N) )
+void test_common_arg_types__md__via_trait(A_outer & a, ptrdiff_t x1, size_t x2)
+    multiDimTestBody("Via trait, multidim")
+
+
+void doMdimTest() {
+
+    array(float, 5, 4) b;
+    b[ix0,ix0] = 0.0; b[ix0,1] = 0.1; b[ix0,2] = 0.2; b[ix0,3] = 0.3;
+    b[  1,ix0] = 1.0; b[  1,1] = 1.1; b[  1,2] = 1.2; b[  1,3] = 1.3;
+    b[  2,ix0] = 2.0; b[  2,1] = 2.1; b[  2,2] = 2.2; b[  2,3] = 2.3;
+    b[  3,ix0] = 3.0; b[  3,1] = 3.1; b[  3,2] = 3.2; b[  3,3] = 3.3;
+    b[  4,ix0] = 4.0; b[  4,1] = 4.1; b[  4,2] = 4.2; b[  4,3] = 4.3;
+
+    test_common_arg_types__md(b, 3, 3);
+    test_common_arg_types__md__via_trait(b, 3, 3);
+    printf("Transposed, ");
+    test_common_arg_types__md__via_trait(b[all], 3, 3);
+
+    printf("Slice giving ");
+    test_common_arg_types(b[2], 3, 3);
+
+    printf("Same slice ");
+    test_common_arg_types__via_trait(b[2], 3, 3);
+
+    printf("Strided slice ");
+    test_common_arg_types__via_trait(b[all,2], 3, 3);
+}
+
+int main() {
+
+    // can't be inlined in same func due to Trac #175.
+    do1dimTest();
+    doMdimTest();
+}
Index: tests/collections/atomic_mpsc.cfa
===================================================================
--- tests/collections/atomic_mpsc.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/collections/atomic_mpsc.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -1,4 +1,4 @@
 #include <fstream.hfa>
-#include <queueLockFree.hfa>
+#include <containers/lockfree.hfa>
 #include <thread.hfa>
 
Index: tests/concurrent/.expect/migrate.txt
===================================================================
--- tests/concurrent/.expect/migrate.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/.expect/migrate.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/.expect/once.txt
===================================================================
--- tests/concurrent/.expect/once.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/.expect/once.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,2 @@
+starting
+done
Index: tests/concurrent/migrate.cfa
===================================================================
--- tests/concurrent/migrate.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/migrate.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,85 @@
+#include <fstream.hfa>
+#include <kernel.hfa>
+#include <thread.hfa>
+
+#include <stdatomic.h>
+#include <assert.h>
+
+struct cluster_wrapper {
+	cluster self;
+	const uint64_t canary;
+	struct {
+		volatile uint64_t want;
+		volatile uint64_t have;
+	} checksum;
+};
+
+void ?{}( cluster_wrapper & this ) {
+	(this.self){};
+	(*(uint64_t *)&this.canary) = 0xDEAD2BADDEAD2BAD;
+	this.checksum.want = 0;
+	this.checksum.have = 0;
+}
+
+void ^?{}( cluster_wrapper & this ) {
+	assert(this.canary == 0xDEAD2BADDEAD2BAD);
+}
+
+static cluster_wrapper * the_clusters;
+static unsigned cluster_cnt;
+
+thread MyThread {
+
+};
+
+void ?{}( MyThread & this ) {}
+
+void checkcl( MyThread & this, cluster * cl) {
+	if(((thread&)this).curr_cluster != cl) {
+		abort | "Thread has unexpected cluster";
+	}
+}
+
+void main( MyThread & this ) {
+	waitfor( migrate : this ) {
+		assert( ((thread&)this).curr_cluster == active_cluster() );
+		assert( ((thread&)this).curr_cluster == active_processor()->cltr );
+	}
+
+	struct cluster_wrapper * curr = (struct cluster_wrapper *)&the_clusters[0];
+
+	for(100) {
+		unsigned idx = prng( this, cluster_cnt );
+
+		struct cluster_wrapper * next = &the_clusters[ idx ];
+		assert(next->canary == 0xDEAD2BADDEAD2BAD);
+
+		// next->
+
+		migrate( this, next->self );
+
+		assert( active_cluster() == &next->self );
+		assert( ((thread&)this).curr_cluster == active_cluster() );
+		assert( ((thread&)this).curr_cluster == active_processor()->cltr );
+	}
+}
+
+int main() {
+	cluster_cnt = 3;
+	cluster_wrapper cl[cluster_cnt];
+	the_clusters = cl;
+
+	{
+		set_concurrency( cl[0].self, 2 );
+		set_concurrency( cl[1].self, 2 );
+		set_concurrency( cl[2].self, 1 );
+
+		MyThread threads[17];
+		for(i;17) {
+			migrate( threads[i], cl[0].self );
+		}
+
+	}
+	// non-empty .expect file
+	printf( "done\n" );
+}
Index: tests/concurrent/once.cfa
===================================================================
--- tests/concurrent/once.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/once.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,44 @@
+#include <barrier.hfa>
+#include <fstream.hfa>
+#include <kernel.hfa>
+#include <once.hfa>
+#include <thread.hfa>
+
+once_flag global;
+
+volatile int check;
+
+void reset() {
+	(global){};
+	check = 0;
+}
+
+void must_once(void) {
+	int prev = __atomic_fetch_add( &check, 1, __ATOMIC_SEQ_CST );
+	if(prev != 0) {
+		abort | "'must_once' appears to have been called more than once, check was" | prev;
+	}
+}
+
+barrier barr = { 11 };
+
+thread Tester {};
+
+void main( Tester & this ) {
+	for(500) {
+		block( barr, reset );
+
+		// sometime yields
+		yield(prng(this, 3));
+	}
+}
+
+int main() {
+	processor p[2];
+
+	sout | "starting";
+	{
+		Tester testers[11];
+	}
+	sout | "done";
+}
Index: tests/concurrent/pthread/.expect/bounded_buffer.txt
===================================================================
--- tests/concurrent/pthread/.expect/bounded_buffer.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/bounded_buffer.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,2 @@
+producer total value is 23426
+consumer total value is 23426
Index: tests/concurrent/pthread/.expect/pthread_attr_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_attr_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/pthread_attr_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,1 @@
+stack size is 123456789
Index: tests/concurrent/pthread/.expect/pthread_cond_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_cond_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/pthread_cond_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,1001 @@
+S1 done 0
+S1 done 1
+S1 done 2
+S1 done 3
+S1 done 4
+S1 done 5
+S1 done 6
+S1 done 7
+S1 done 8
+S1 done 9
+S1 done 10
+S1 done 11
+S1 done 12
+S1 done 13
+S1 done 14
+S1 done 15
+S1 done 16
+S1 done 17
+S1 done 18
+S1 done 19
+S1 done 20
+S1 done 21
+S1 done 22
+S1 done 23
+S1 done 24
+S1 done 25
+S1 done 26
+S1 done 27
+S1 done 28
+S1 done 29
+S1 done 30
+S1 done 31
+S1 done 32
+S1 done 33
+S1 done 34
+S1 done 35
+S1 done 36
+S1 done 37
+S1 done 38
+S1 done 39
+S1 done 40
+S1 done 41
+S1 done 42
+S1 done 43
+S1 done 44
+S1 done 45
+S1 done 46
+S1 done 47
+S1 done 48
+S1 done 49
+S1 done 50
+S1 done 51
+S1 done 52
+S1 done 53
+S1 done 54
+S1 done 55
+S1 done 56
+S1 done 57
+S1 done 58
+S1 done 59
+S1 done 60
+S1 done 61
+S1 done 62
+S1 done 63
+S1 done 64
+S1 done 65
+S1 done 66
+S1 done 67
+S1 done 68
+S1 done 69
+S1 done 70
+S1 done 71
+S1 done 72
+S1 done 73
+S1 done 74
+S1 done 75
+S1 done 76
+S1 done 77
+S1 done 78
+S1 done 79
+S1 done 80
+S1 done 81
+S1 done 82
+S1 done 83
+S1 done 84
+S1 done 85
+S1 done 86
+S1 done 87
+S1 done 88
+S1 done 89
+S1 done 90
+S1 done 91
+S1 done 92
+S1 done 93
+S1 done 94
+S1 done 95
+S1 done 96
+S1 done 97
+S1 done 98
+S1 done 99
+S1 done 100
+S1 done 101
+S1 done 102
+S1 done 103
+S1 done 104
+S1 done 105
+S1 done 106
+S1 done 107
+S1 done 108
+S1 done 109
+S1 done 110
+S1 done 111
+S1 done 112
+S1 done 113
+S1 done 114
+S1 done 115
+S1 done 116
+S1 done 117
+S1 done 118
+S1 done 119
+S1 done 120
+S1 done 121
+S1 done 122
+S1 done 123
+S1 done 124
+S1 done 125
+S1 done 126
+S1 done 127
+S1 done 128
+S1 done 129
+S1 done 130
+S1 done 131
+S1 done 132
+S1 done 133
+S1 done 134
+S1 done 135
+S1 done 136
+S1 done 137
+S1 done 138
+S1 done 139
+S1 done 140
+S1 done 141
+S1 done 142
+S1 done 143
+S1 done 144
+S1 done 145
+S1 done 146
+S1 done 147
+S1 done 148
+S1 done 149
+S1 done 150
+S1 done 151
+S1 done 152
+S1 done 153
+S1 done 154
+S1 done 155
+S1 done 156
+S1 done 157
+S1 done 158
+S1 done 159
+S1 done 160
+S1 done 161
+S1 done 162
+S1 done 163
+S1 done 164
+S1 done 165
+S1 done 166
+S1 done 167
+S1 done 168
+S1 done 169
+S1 done 170
+S1 done 171
+S1 done 172
+S1 done 173
+S1 done 174
+S1 done 175
+S1 done 176
+S1 done 177
+S1 done 178
+S1 done 179
+S1 done 180
+S1 done 181
+S1 done 182
+S1 done 183
+S1 done 184
+S1 done 185
+S1 done 186
+S1 done 187
+S1 done 188
+S1 done 189
+S1 done 190
+S1 done 191
+S1 done 192
+S1 done 193
+S1 done 194
+S1 done 195
+S1 done 196
+S1 done 197
+S1 done 198
+S1 done 199
+S1 done 200
+S1 done 201
+S1 done 202
+S1 done 203
+S1 done 204
+S1 done 205
+S1 done 206
+S1 done 207
+S1 done 208
+S1 done 209
+S1 done 210
+S1 done 211
+S1 done 212
+S1 done 213
+S1 done 214
+S1 done 215
+S1 done 216
+S1 done 217
+S1 done 218
+S1 done 219
+S1 done 220
+S1 done 221
+S1 done 222
+S1 done 223
+S1 done 224
+S1 done 225
+S1 done 226
+S1 done 227
+S1 done 228
+S1 done 229
+S1 done 230
+S1 done 231
+S1 done 232
+S1 done 233
+S1 done 234
+S1 done 235
+S1 done 236
+S1 done 237
+S1 done 238
+S1 done 239
+S1 done 240
+S1 done 241
+S1 done 242
+S1 done 243
+S1 done 244
+S1 done 245
+S1 done 246
+S1 done 247
+S1 done 248
+S1 done 249
+S1 done 250
+S1 done 251
+S1 done 252
+S1 done 253
+S1 done 254
+S1 done 255
+S1 done 256
+S1 done 257
+S1 done 258
+S1 done 259
+S1 done 260
+S1 done 261
+S1 done 262
+S1 done 263
+S1 done 264
+S1 done 265
+S1 done 266
+S1 done 267
+S1 done 268
+S1 done 269
+S1 done 270
+S1 done 271
+S1 done 272
+S1 done 273
+S1 done 274
+S1 done 275
+S1 done 276
+S1 done 277
+S1 done 278
+S1 done 279
+S1 done 280
+S1 done 281
+S1 done 282
+S1 done 283
+S1 done 284
+S1 done 285
+S1 done 286
+S1 done 287
+S1 done 288
+S1 done 289
+S1 done 290
+S1 done 291
+S1 done 292
+S1 done 293
+S1 done 294
+S1 done 295
+S1 done 296
+S1 done 297
+S1 done 298
+S1 done 299
+S1 done 300
+S1 done 301
+S1 done 302
+S1 done 303
+S1 done 304
+S1 done 305
+S1 done 306
+S1 done 307
+S1 done 308
+S1 done 309
+S1 done 310
+S1 done 311
+S1 done 312
+S1 done 313
+S1 done 314
+S1 done 315
+S1 done 316
+S1 done 317
+S1 done 318
+S1 done 319
+S1 done 320
+S1 done 321
+S1 done 322
+S1 done 323
+S1 done 324
+S1 done 325
+S1 done 326
+S1 done 327
+S1 done 328
+S1 done 329
+S1 done 330
+S1 done 331
+S1 done 332
+S1 done 333
+S1 done 334
+S1 done 335
+S1 done 336
+S1 done 337
+S1 done 338
+S1 done 339
+S1 done 340
+S1 done 341
+S1 done 342
+S1 done 343
+S1 done 344
+S1 done 345
+S1 done 346
+S1 done 347
+S1 done 348
+S1 done 349
+S1 done 350
+S1 done 351
+S1 done 352
+S1 done 353
+S1 done 354
+S1 done 355
+S1 done 356
+S1 done 357
+S1 done 358
+S1 done 359
+S1 done 360
+S1 done 361
+S1 done 362
+S1 done 363
+S1 done 364
+S1 done 365
+S1 done 366
+S1 done 367
+S1 done 368
+S1 done 369
+S1 done 370
+S1 done 371
+S1 done 372
+S1 done 373
+S1 done 374
+S1 done 375
+S1 done 376
+S1 done 377
+S1 done 378
+S1 done 379
+S1 done 380
+S1 done 381
+S1 done 382
+S1 done 383
+S1 done 384
+S1 done 385
+S1 done 386
+S1 done 387
+S1 done 388
+S1 done 389
+S1 done 390
+S1 done 391
+S1 done 392
+S1 done 393
+S1 done 394
+S1 done 395
+S1 done 396
+S1 done 397
+S1 done 398
+S1 done 399
+S1 done 400
+S1 done 401
+S1 done 402
+S1 done 403
+S1 done 404
+S1 done 405
+S1 done 406
+S1 done 407
+S1 done 408
+S1 done 409
+S1 done 410
+S1 done 411
+S1 done 412
+S1 done 413
+S1 done 414
+S1 done 415
+S1 done 416
+S1 done 417
+S1 done 418
+S1 done 419
+S1 done 420
+S1 done 421
+S1 done 422
+S1 done 423
+S1 done 424
+S1 done 425
+S1 done 426
+S1 done 427
+S1 done 428
+S1 done 429
+S1 done 430
+S1 done 431
+S1 done 432
+S1 done 433
+S1 done 434
+S1 done 435
+S1 done 436
+S1 done 437
+S1 done 438
+S1 done 439
+S1 done 440
+S1 done 441
+S1 done 442
+S1 done 443
+S1 done 444
+S1 done 445
+S1 done 446
+S1 done 447
+S1 done 448
+S1 done 449
+S1 done 450
+S1 done 451
+S1 done 452
+S1 done 453
+S1 done 454
+S1 done 455
+S1 done 456
+S1 done 457
+S1 done 458
+S1 done 459
+S1 done 460
+S1 done 461
+S1 done 462
+S1 done 463
+S1 done 464
+S1 done 465
+S1 done 466
+S1 done 467
+S1 done 468
+S1 done 469
+S1 done 470
+S1 done 471
+S1 done 472
+S1 done 473
+S1 done 474
+S1 done 475
+S1 done 476
+S1 done 477
+S1 done 478
+S1 done 479
+S1 done 480
+S1 done 481
+S1 done 482
+S1 done 483
+S1 done 484
+S1 done 485
+S1 done 486
+S1 done 487
+S1 done 488
+S1 done 489
+S1 done 490
+S1 done 491
+S1 done 492
+S1 done 493
+S1 done 494
+S1 done 495
+S1 done 496
+S1 done 497
+S1 done 498
+S1 done 499
+S1 done 500
+S1 done 501
+S1 done 502
+S1 done 503
+S1 done 504
+S1 done 505
+S1 done 506
+S1 done 507
+S1 done 508
+S1 done 509
+S1 done 510
+S1 done 511
+S1 done 512
+S1 done 513
+S1 done 514
+S1 done 515
+S1 done 516
+S1 done 517
+S1 done 518
+S1 done 519
+S1 done 520
+S1 done 521
+S1 done 522
+S1 done 523
+S1 done 524
+S1 done 525
+S1 done 526
+S1 done 527
+S1 done 528
+S1 done 529
+S1 done 530
+S1 done 531
+S1 done 532
+S1 done 533
+S1 done 534
+S1 done 535
+S1 done 536
+S1 done 537
+S1 done 538
+S1 done 539
+S1 done 540
+S1 done 541
+S1 done 542
+S1 done 543
+S1 done 544
+S1 done 545
+S1 done 546
+S1 done 547
+S1 done 548
+S1 done 549
+S1 done 550
+S1 done 551
+S1 done 552
+S1 done 553
+S1 done 554
+S1 done 555
+S1 done 556
+S1 done 557
+S1 done 558
+S1 done 559
+S1 done 560
+S1 done 561
+S1 done 562
+S1 done 563
+S1 done 564
+S1 done 565
+S1 done 566
+S1 done 567
+S1 done 568
+S1 done 569
+S1 done 570
+S1 done 571
+S1 done 572
+S1 done 573
+S1 done 574
+S1 done 575
+S1 done 576
+S1 done 577
+S1 done 578
+S1 done 579
+S1 done 580
+S1 done 581
+S1 done 582
+S1 done 583
+S1 done 584
+S1 done 585
+S1 done 586
+S1 done 587
+S1 done 588
+S1 done 589
+S1 done 590
+S1 done 591
+S1 done 592
+S1 done 593
+S1 done 594
+S1 done 595
+S1 done 596
+S1 done 597
+S1 done 598
+S1 done 599
+S1 done 600
+S1 done 601
+S1 done 602
+S1 done 603
+S1 done 604
+S1 done 605
+S1 done 606
+S1 done 607
+S1 done 608
+S1 done 609
+S1 done 610
+S1 done 611
+S1 done 612
+S1 done 613
+S1 done 614
+S1 done 615
+S1 done 616
+S1 done 617
+S1 done 618
+S1 done 619
+S1 done 620
+S1 done 621
+S1 done 622
+S1 done 623
+S1 done 624
+S1 done 625
+S1 done 626
+S1 done 627
+S1 done 628
+S1 done 629
+S1 done 630
+S1 done 631
+S1 done 632
+S1 done 633
+S1 done 634
+S1 done 635
+S1 done 636
+S1 done 637
+S1 done 638
+S1 done 639
+S1 done 640
+S1 done 641
+S1 done 642
+S1 done 643
+S1 done 644
+S1 done 645
+S1 done 646
+S1 done 647
+S1 done 648
+S1 done 649
+S1 done 650
+S1 done 651
+S1 done 652
+S1 done 653
+S1 done 654
+S1 done 655
+S1 done 656
+S1 done 657
+S1 done 658
+S1 done 659
+S1 done 660
+S1 done 661
+S1 done 662
+S1 done 663
+S1 done 664
+S1 done 665
+S1 done 666
+S1 done 667
+S1 done 668
+S1 done 669
+S1 done 670
+S1 done 671
+S1 done 672
+S1 done 673
+S1 done 674
+S1 done 675
+S1 done 676
+S1 done 677
+S1 done 678
+S1 done 679
+S1 done 680
+S1 done 681
+S1 done 682
+S1 done 683
+S1 done 684
+S1 done 685
+S1 done 686
+S1 done 687
+S1 done 688
+S1 done 689
+S1 done 690
+S1 done 691
+S1 done 692
+S1 done 693
+S1 done 694
+S1 done 695
+S1 done 696
+S1 done 697
+S1 done 698
+S1 done 699
+S1 done 700
+S1 done 701
+S1 done 702
+S1 done 703
+S1 done 704
+S1 done 705
+S1 done 706
+S1 done 707
+S1 done 708
+S1 done 709
+S1 done 710
+S1 done 711
+S1 done 712
+S1 done 713
+S1 done 714
+S1 done 715
+S1 done 716
+S1 done 717
+S1 done 718
+S1 done 719
+S1 done 720
+S1 done 721
+S1 done 722
+S1 done 723
+S1 done 724
+S1 done 725
+S1 done 726
+S1 done 727
+S1 done 728
+S1 done 729
+S1 done 730
+S1 done 731
+S1 done 732
+S1 done 733
+S1 done 734
+S1 done 735
+S1 done 736
+S1 done 737
+S1 done 738
+S1 done 739
+S1 done 740
+S1 done 741
+S1 done 742
+S1 done 743
+S1 done 744
+S1 done 745
+S1 done 746
+S1 done 747
+S1 done 748
+S1 done 749
+S1 done 750
+S1 done 751
+S1 done 752
+S1 done 753
+S1 done 754
+S1 done 755
+S1 done 756
+S1 done 757
+S1 done 758
+S1 done 759
+S1 done 760
+S1 done 761
+S1 done 762
+S1 done 763
+S1 done 764
+S1 done 765
+S1 done 766
+S1 done 767
+S1 done 768
+S1 done 769
+S1 done 770
+S1 done 771
+S1 done 772
+S1 done 773
+S1 done 774
+S1 done 775
+S1 done 776
+S1 done 777
+S1 done 778
+S1 done 779
+S1 done 780
+S1 done 781
+S1 done 782
+S1 done 783
+S1 done 784
+S1 done 785
+S1 done 786
+S1 done 787
+S1 done 788
+S1 done 789
+S1 done 790
+S1 done 791
+S1 done 792
+S1 done 793
+S1 done 794
+S1 done 795
+S1 done 796
+S1 done 797
+S1 done 798
+S1 done 799
+S1 done 800
+S1 done 801
+S1 done 802
+S1 done 803
+S1 done 804
+S1 done 805
+S1 done 806
+S1 done 807
+S1 done 808
+S1 done 809
+S1 done 810
+S1 done 811
+S1 done 812
+S1 done 813
+S1 done 814
+S1 done 815
+S1 done 816
+S1 done 817
+S1 done 818
+S1 done 819
+S1 done 820
+S1 done 821
+S1 done 822
+S1 done 823
+S1 done 824
+S1 done 825
+S1 done 826
+S1 done 827
+S1 done 828
+S1 done 829
+S1 done 830
+S1 done 831
+S1 done 832
+S1 done 833
+S1 done 834
+S1 done 835
+S1 done 836
+S1 done 837
+S1 done 838
+S1 done 839
+S1 done 840
+S1 done 841
+S1 done 842
+S1 done 843
+S1 done 844
+S1 done 845
+S1 done 846
+S1 done 847
+S1 done 848
+S1 done 849
+S1 done 850
+S1 done 851
+S1 done 852
+S1 done 853
+S1 done 854
+S1 done 855
+S1 done 856
+S1 done 857
+S1 done 858
+S1 done 859
+S1 done 860
+S1 done 861
+S1 done 862
+S1 done 863
+S1 done 864
+S1 done 865
+S1 done 866
+S1 done 867
+S1 done 868
+S1 done 869
+S1 done 870
+S1 done 871
+S1 done 872
+S1 done 873
+S1 done 874
+S1 done 875
+S1 done 876
+S1 done 877
+S1 done 878
+S1 done 879
+S1 done 880
+S1 done 881
+S1 done 882
+S1 done 883
+S1 done 884
+S1 done 885
+S1 done 886
+S1 done 887
+S1 done 888
+S1 done 889
+S1 done 890
+S1 done 891
+S1 done 892
+S1 done 893
+S1 done 894
+S1 done 895
+S1 done 896
+S1 done 897
+S1 done 898
+S1 done 899
+S1 done 900
+S1 done 901
+S1 done 902
+S1 done 903
+S1 done 904
+S1 done 905
+S1 done 906
+S1 done 907
+S1 done 908
+S1 done 909
+S1 done 910
+S1 done 911
+S1 done 912
+S1 done 913
+S1 done 914
+S1 done 915
+S1 done 916
+S1 done 917
+S1 done 918
+S1 done 919
+S1 done 920
+S1 done 921
+S1 done 922
+S1 done 923
+S1 done 924
+S1 done 925
+S1 done 926
+S1 done 927
+S1 done 928
+S1 done 929
+S1 done 930
+S1 done 931
+S1 done 932
+S1 done 933
+S1 done 934
+S1 done 935
+S1 done 936
+S1 done 937
+S1 done 938
+S1 done 939
+S1 done 940
+S1 done 941
+S1 done 942
+S1 done 943
+S1 done 944
+S1 done 945
+S1 done 946
+S1 done 947
+S1 done 948
+S1 done 949
+S1 done 950
+S1 done 951
+S1 done 952
+S1 done 953
+S1 done 954
+S1 done 955
+S1 done 956
+S1 done 957
+S1 done 958
+S1 done 959
+S1 done 960
+S1 done 961
+S1 done 962
+S1 done 963
+S1 done 964
+S1 done 965
+S1 done 966
+S1 done 967
+S1 done 968
+S1 done 969
+S1 done 970
+S1 done 971
+S1 done 972
+S1 done 973
+S1 done 974
+S1 done 975
+S1 done 976
+S1 done 977
+S1 done 978
+S1 done 979
+S1 done 980
+S1 done 981
+S1 done 982
+S1 done 983
+S1 done 984
+S1 done 985
+S1 done 986
+S1 done 987
+S1 done 988
+S1 done 989
+S1 done 990
+S1 done 991
+S1 done 992
+S1 done 993
+S1 done 994
+S1 done 995
+S1 done 996
+S1 done 997
+S1 done 998
+S1 done 999
+S2 statement done!
Index: tests/concurrent/pthread/.expect/pthread_demo_create_join.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_demo_create_join.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/pthread_demo_create_join.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,1 @@
+final res is 190
Index: tests/concurrent/pthread/.expect/pthread_demo_lock.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_demo_lock.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/pthread_demo_lock.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,6 @@
+lock res is 2000000000
+in trylocktest1 res1 is 0
+in trylocktest1 res2 is 0
+in trylocktest2 res1 is 16
+in trylocktest2 res2 is 16
+cnt_trylock is 200000000
Index: tests/concurrent/pthread/.expect/pthread_key_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_key_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/pthread_key_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,6 @@
+Destructor function invoked
+Destructor function invoked
+Destructor function invoked
+Destructor function invoked
+Destructor function invoked
+total value is 207, total value by pthread_getspecific is 207
Index: tests/concurrent/pthread/.expect/pthread_once_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_once_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/.expect/pthread_once_test.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,1 @@
+in once_fn
Index: tests/concurrent/pthread/bounded_buffer.cfa
===================================================================
--- tests/concurrent/pthread/bounded_buffer.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/bounded_buffer.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,156 @@
+#include <stdlib.h>										// prototype: rand
+#include <fstream.hfa>
+#include <thread.hfa>
+#include <pthread.h>
+#include <errno.h>
+// tested pthread mutex related routines, pthread cond related routines
+// tested pthread_create/join
+
+enum { BufferSize = 50 };
+
+volatile int producer_val_total;
+volatile int consumer_val_total;
+
+pthread_mutex_t producer_cnt_lock, consumer_cnt_lock;
+
+
+forall( T ){
+    struct Buffer
+    {
+        int front, back, count;
+		T elements[BufferSize];
+        pthread_mutex_t _mutex;
+	    pthread_cond_t Full, Empty;							// waiting consumers & producers
+    };
+
+    void ?{}( Buffer(T) & buffer ) with( buffer ) {
+         [front, back, count] = 0; 
+         pthread_mutex_init(&_mutex, NULL);
+         pthread_cond_init(&Full, NULL);
+         pthread_cond_init(&Empty, NULL);
+    }
+
+    void ^?{}( Buffer(T) & buffer ) with( buffer ){
+        pthread_mutex_destroy(&_mutex);
+        pthread_cond_destroy(&Full);
+        pthread_cond_destroy(&Empty);
+    }
+
+    int query( Buffer(T) & buffer ) { return buffer.count; } // read-only, no mutual exclusion
+    
+    void insert( Buffer(T) & buffer, T elem ) with(buffer) {
+		pthread_mutex_lock(&_mutex);
+		while ( count == 20 ) pthread_cond_wait( &Empty, &_mutex ); // block producer
+		elements[back] = elem;
+		back = ( back + 1 ) % 20;
+		count += 1;
+		pthread_cond_signal( &Full );					// unblock consumer
+        pthread_mutex_unlock(&_mutex);
+	}
+
+    T remove(Buffer(T) & buffer) with(buffer) {
+		pthread_mutex_lock(&_mutex);
+		while ( count == 0 ) pthread_cond_wait( &Full, &_mutex ); // block consumer
+		T elem = elements[front];
+		front = ( front + 1 ) % 20;
+		count -= 1;
+		pthread_cond_signal( &Empty );					// unblock producer
+        pthread_mutex_unlock(&_mutex);
+		return elem;
+	}
+
+}
+
+void *producer( void *arg ) {
+	Buffer(int) &buf = *(Buffer(int)*)arg;
+	const int NoOfItems = rand() % 40;
+	int item;
+	for ( int i = 1; i <= NoOfItems; i += 1 ) {			// produce a bunch of items
+		item = rand() % 100 + 1;						// produce a random number
+		//sout | "Producer:" | pthread_self() | " value:" | item;
+		insert( buf,item );								// insert element into queue
+        pthread_mutex_lock(&producer_cnt_lock);
+        producer_val_total += item;
+        pthread_mutex_unlock(&producer_cnt_lock);
+	} // for
+	//sout | "Producer:" | pthread_self() | " is finished";
+	return NULL;
+} // producer
+
+void *consumer( void *arg ) {
+	Buffer(int) &buf = *(Buffer(int) *)arg;
+	int item;
+	for ( ;; ) {										// consume until a negative element appears
+		item = remove(buf);							// remove from front of queue
+		//sout | "Consumer:" | pthread_self() | " value:" | item;
+	  if ( item == -1 ) break;
+        pthread_mutex_lock(&consumer_cnt_lock);
+        consumer_val_total += item;
+        pthread_mutex_unlock(&consumer_cnt_lock);
+	} // for
+	//sout | "Consumer:" | pthread_self() | " is finished";
+	return NULL;
+} // consumer
+
+int main() {
+	const int NoOfCons = 20, NoOfProds = 30;
+	Buffer(int) buf;								// create a buffer monitor
+	pthread_t cons[NoOfCons];							// pointer to an array of consumers
+	pthread_t prods[NoOfProds];							// pointer to an array of producers
+    pthread_mutex_init(&producer_cnt_lock, NULL);
+    pthread_mutex_init(&consumer_cnt_lock, NULL);
+	// parallelism
+    srandom( 1003 );
+
+	processor p[5];
+    {
+        // create/join and mutex/condition test
+        //sout | "create/join and mutex/condition test";
+        for ( int i = 0; i < NoOfCons; i += 1 ) {			// create consumers
+            if ( pthread_create( &cons[i], NULL, consumer, (void*)&buf ) != 0 ) {
+                sout | "create thread failure, errno:" | errno;
+                exit( EXIT_FAILURE );
+            } // if
+        } // for
+        for ( int i = 0; i < NoOfProds; i += 1 ) {			// 	create producers
+            if ( pthread_create( &prods[i], NULL, producer, (void*)&buf ) != 0 ) {
+                sout | "create thread failure";
+                exit( EXIT_FAILURE );
+            } // if
+        } // for
+
+        void *result;
+        for ( int i = 0; i < NoOfProds; i += 1 ) {			// wait for producers to end
+            if ( pthread_join( prods[i], &result ) != 0 ) {
+                sout | " producers join thread failure";
+                exit( EXIT_FAILURE );
+            } // if
+            if ( (uint64_t)result != 0 ) {
+                sout | "producers" | prods[i] |" bad return value " | result;
+                exit( EXIT_FAILURE );
+            } // if
+            //sout | "join prods[" | i | "]:" | prods[i] | " result:" | result;
+        } // for
+
+        for ( int i = 0; i < NoOfCons; i += 1 ) {			// terminate each consumer
+            insert(buf, -1 );
+        } // for
+
+        for ( int i = 0; i < NoOfCons; i += 1 ) {			// wait for consumer to end
+            if ( pthread_join( cons[i], &result ) != 0 ) {
+                sout| "consumers join thread failure" ;
+                exit( EXIT_FAILURE );
+            } // if
+            if ( (uint64_t)result != 0 ) {
+                sout| "consumers bad return value" | result;
+                exit( EXIT_FAILURE );
+            } // if
+        } // for
+        sout | "producer total value is " | producer_val_total;
+        sout | "consumer total value is " | consumer_val_total;
+    }
+
+	
+
+	
+}
Index: tests/concurrent/pthread/pthread_attr_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_attr_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/pthread_attr_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,29 @@
+/* test attr init; set stack; get stack */
+
+#include <fstream.hfa>
+#include <thread.hfa>
+
+void* foo(void* _attr){
+    size_t size;
+    pthread_attr_t* attr = (pthread_attr_t*)_attr;
+    int status = pthread_attr_getstacksize(attr, &size);
+    if (status != 0){
+        sout | "error return code";
+        exit(1);
+    }
+    sout | "stack size is " | size;
+    return NULL;
+}
+
+int main(int argc, char const *argv[])
+{
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setstacksize(&attr, 123456789);
+    pthread_t thr;
+    void* res;
+    pthread_create(&thr, &attr, foo, (void*)&attr);
+    pthread_join(thr, &res);
+    pthread_attr_destroy(&attr);
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_cond_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_cond_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/pthread_cond_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,46 @@
+/* small test of pthread cond */
+
+#include <fstream.hfa>
+#include <thread.hfa>
+#include <pthread.h>
+
+int done_flag = 0;
+pthread_mutex_t _mutex;
+pthread_cond_t cond;
+
+extern "C"{
+    void* S1(void* arg){
+        pthread_mutex_lock(&_mutex);
+        for (int i = 0; i < 1000; i++) sout | "S1 done " | i;
+        done_flag = 1;
+        pthread_mutex_unlock(&_mutex);
+        pthread_cond_signal(&cond);
+        return NULL;
+    }
+
+    void* S2(void* arg){
+        pthread_mutex_lock(&_mutex);
+        if (!done_flag) pthread_cond_wait(&cond, &_mutex);
+        sout | "S2 statement done!";
+        pthread_mutex_unlock(&_mutex);
+        return NULL;
+    }
+}
+
+
+
+int main(int argc, char const *argv[])
+{
+    /* code */
+    pthread_mutex_init(&_mutex, NULL);
+    pthread_cond_init(&cond, NULL);
+    pthread_t s1,s2;
+    pthread_create(&s1, NULL, S1, NULL);
+    pthread_create(&s2, NULL, S2, NULL);
+    void* res = NULL;
+    pthread_join(s1, &res);
+    pthread_join(s2, &res);
+    pthread_mutex_destroy(&_mutex);
+    pthread_cond_destroy(&cond);
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_demo_create_join.cfa
===================================================================
--- tests/concurrent/pthread/pthread_demo_create_join.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/pthread_demo_create_join.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,41 @@
+#include <fstream.hfa>
+#include <thread.hfa>
+/* test pthread create/join/exit */
+
+int arr[20];
+
+void* fetch(void* idx){
+    int res = arr[(uint64_t)idx];
+    pthread_exit((void*)res);
+    sout | "it should not be here";
+    exit(1);
+    //return (void*)res;
+}
+
+void arr_init(){
+    for (int i = 0; i < 20; i++){
+        arr[i] = i;
+    }
+}
+
+int main(int argc, char const *argv[])
+{
+    pthread_t threads[20];
+    arr_init();
+    int status;
+    for (int i = 0; i < 20; i++){
+        status = pthread_create(&threads[i], NULL, fetch, (void*)i);
+        if (status != 0) exit(1);
+    }
+    int res = 0;
+    for (int i = 0; i < 20; i++){
+        void* _res = NULL;
+        status = pthread_join(threads[i], &_res);
+        if (status != 0) exit(2);
+        if (((uint64_t)_res) != i) exit(3);
+        res += (uint64_t)_res;
+    }
+    sout | "final res is" | res;
+
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_demo_lock.cfa
===================================================================
--- tests/concurrent/pthread/pthread_demo_lock.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/pthread_demo_lock.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,124 @@
+#include <fstream.hfa>
+#include <thread.hfa>
+/* 
+    test pthread_mutex to provide mutual exclusion
+    test pthread_mutex_trylock not block when lock is acquired by others; test pthread_mutex_trylock can acquire the lock
+*/
+volatile int cnt_nolock = 0;
+volatile int cnt_lock = 0;
+volatile int cnt_trylock = 0;
+extern "C"{
+    static pthread_mutex_t _mutex;
+}
+
+/* mutex pthread routine  */
+// unlocked increnment
+void* inc_unlock(void* cnt){
+    for (int i = 0; i < (uint64_t)cnt; i++){
+        cnt_nolock++;
+    }   // for
+    return NULL;
+}   
+// locked increment
+void* inc_lock(void* cnt){
+    pthread_mutex_lock(&_mutex);
+    for (int i = 0; i < (uint64_t)cnt; i++){
+        cnt_lock++;
+    }   // for
+    pthread_mutex_unlock(&_mutex);
+    return NULL;
+}
+
+/* test lock vs unlock */
+void test_unlock(){
+    pthread_t threads[20];
+    for (int i = 0; i < 20; i++){
+        pthread_create(&threads[i], NULL, inc_unlock, (void*)100000000);
+    }
+    for (int i = 0; i < 20; i++){
+        void * res = NULL;
+        pthread_join(threads[i], &res);
+    }
+    sout | "unlock res is" | cnt_nolock;
+    cnt_nolock = 0;
+}
+extern "C"{
+    void test_lock(){
+        pthread_mutex_init(&_mutex, NULL);
+        pthread_t threads[20];
+        for (int i = 0; i < 20; i++){
+            
+            pthread_create(&threads[i], NULL, inc_lock, (void*)100000000);
+        }
+        for (int i = 0; i < 20; i++){
+            void * res = NULL;
+            pthread_join(threads[i], &res);
+        }
+        sout | "lock res is" | cnt_lock;
+        pthread_mutex_destroy(&_mutex);
+        if (cnt_lock != 100000000 * 20) {
+            sout | "pthread mutex not working";
+            exit(1);
+        }
+        cnt_lock = 0;
+    }
+}
+
+
+/* mutex trylock pthread routine  */
+void* trylock_test2(void* arg){
+    int res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest2 res1 is" | res;
+    res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest2 res2 is" | res;
+    pthread_mutex_lock(&_mutex);
+    for (int i = 0; i < (uint64_t)arg; i++) cnt_trylock++;
+    pthread_mutex_unlock(&_mutex);
+    return NULL;
+}
+
+void* trylock_test1(void* arg){
+    int res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest1 res1 is" | res;
+    res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest1 res2 is" | res;
+    pthread_t task2;
+    pthread_create(&task2, NULL, trylock_test2, (void*)100000000);
+
+    // inc cnt then release the lock
+    for (int i = 0; i < (uint64_t)arg; i++) cnt_trylock++;
+    pthread_mutex_unlock(&_mutex);
+    pthread_mutex_unlock(&_mutex);
+    void * dummy = NULL;
+    pthread_join(task2, &dummy);
+    sout | "cnt_trylock is " | cnt_trylock;
+    return NULL;
+}
+
+// trylock test
+void test_trylock(){
+    pthread_mutex_init(&_mutex, NULL);
+    pthread_t task1;
+    pthread_create(&task1, NULL, trylock_test1, (void*)100000000);
+    void * dummy = NULL;
+    pthread_join(task1,&dummy);
+    pthread_mutex_destroy(&_mutex);
+    if (cnt_trylock != 100000000 * 2) {
+        sout | "pthread try mutex not working";
+        exit(1);
+    }
+    cnt_trylock = 0;
+}
+
+
+
+int main(int argc, char const *argv[])
+{
+    
+    // compare unlock vs lock
+    test_lock();
+    // test trylock
+    test_trylock();
+    
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_key_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_key_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/pthread_key_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,126 @@
+
+/* test pthread_key_create/set_specific/get_specific
+    get specific == set specific
+    dtor is invoked (no mem leak)
+*/
+
+
+extern "C"{
+
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <errno.h>
+    #include <pthread.h>
+
+    #define THREADS 5
+    #define BUFFSZ  48
+    pthread_key_t   key;
+    volatile int total_value,total_value_getspec;
+    pthread_mutex_t value_mutex;
+
+    void            *threadfunc(void *parm)
+    {
+        int        status;
+        void      *value;
+        int        threadnum;
+        int       *tnum;
+        void      *getvalue;
+        char       Buffer[BUFFSZ];
+
+        tnum = (int*)parm;
+        threadnum = *tnum;
+
+        //printf("Thread %d executing\n", threadnum);
+        value = (void *)(rand()%100);
+        status = pthread_setspecific(key, (void *) value);
+        if ( status !=  0) {
+            printf("pthread_setspecific failed, thread %d, errno %d",
+                                                        threadnum, errno);
+            return (void*)12;
+        }
+        pthread_mutex_lock(&value_mutex);
+        total_value_getspec += (int)value;
+        total_value += (int)pthread_getspecific(key);
+        pthread_mutex_unlock(&value_mutex);
+
+
+        if (!(value = malloc(sizeof(Buffer))))
+            printf("Thread %d could not allocate storage, errno = %d\n",
+                                                        threadnum, errno);
+        status = pthread_setspecific(key, (void *) value);
+        if ( status !=  0) {
+            printf("pthread_setspecific failed, thread %d, errno %d",
+                                                        threadnum, errno);
+            return (void*)12;
+        }
+        //printf("Thread %d setspecific value: %d\n", threadnum, value);
+
+        getvalue = 0;
+        getvalue = pthread_getspecific(key);
+
+        if (getvalue != value) {
+        printf("getvalue not valid, getvalue=%d", (u_int64_t)getvalue);
+            return (void*)68;
+        }
+
+        pthread_exit((void *)0);
+    }
+
+    void  destr_fn(void *parm)
+    {
+
+        printf("Destructor function invoked\n");
+        free(parm);
+    }
+
+
+    int main() {
+        int          getvalue;
+        int          status;
+        int          i;
+        int          threadparm[THREADS];
+        pthread_t    threadid[THREADS];
+        void*          thread_stat[THREADS];
+
+        // rand seed for testing
+        srand(1003);
+        pthread_mutex_init(&value_mutex, NULL);
+
+        // testing getspec and setspec
+        total_value = 0;
+        total_value_getspec = 0;
+
+        if ((status = pthread_key_create(&key, destr_fn )) < 0) {
+            printf("pthread_key_create failed, errno=%d", errno);
+            exit(1);
+        }
+
+        // create 3 THREADS, pass each its number
+        for (i=0; i<THREADS; i++) {
+            threadparm[i] = i+1;
+            status = pthread_create( &threadid[i],
+                                    NULL,
+                                    threadfunc,
+                                    (void *)&threadparm[i]);
+            if ( status <  0) {
+            printf("pthread_create failed, errno=%d", errno);
+            exit(2);
+            }
+        }
+
+        for ( i=0; i<THREADS; i++) {
+            status = pthread_join( threadid[i], (void **)&thread_stat[i]);
+            if ( status <  0) {
+            printf("pthread_join failed, thread %d, errno=%d\n", i+1, errno);
+            }
+
+            if (thread_stat[i] != 0)   {
+                printf("bad thread status, thread %d, status=%d\n", i+1,
+                                                        (u_int64_t)thread_stat[i]);
+            }
+        }
+        printf("total value is %d, total value by pthread_getspecific is %d\n", total_value, total_value_getspec);
+        exit(0);
+    }   // main
+}
+
Index: tests/concurrent/pthread/pthread_once_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_once_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/concurrent/pthread/pthread_once_test.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,96 @@
+// tested pthread once,create,join
+
+                                                          
+                                                                                
+                                                            
+#include <fstream.hfa>
+#include <thread.hfa>                                                                    
+#define THREADS 20                                                                                                                                                    
+
+extern "C"{
+    #include <pthread.h>
+    #include <stdio.h>
+    #include <errno.h> 
+    int             once_counter=0;                                                 
+    pthread_once_t  once_control = PTHREAD_ONCE_INIT;                               
+                                                                                    
+    void  once_fn(void)                                                             
+    {                                                                               
+    puts("in once_fn");                                                            
+    once_counter++;                                                                
+    }                                                                               
+                                                                                    
+    void            *threadfunc(void *parm)                                         
+    {                                                                               
+    int        status;                                                             
+    int        threadnum;                                                          
+    int        *tnum;                                                              
+                                                                                    
+    tnum = (int *)parm;                                                                   
+    threadnum = *tnum;                                                             
+                                                                                    
+    //printf("Thread %d executing\n", threadnum);                                    
+                                                                                    
+    status = pthread_once(&once_control, once_fn);                                 
+    if ( status <  0)                                                              
+        printf("pthread_once failed, thread %d, errno=%d\n", threadnum,             
+                                                                errno);             
+                                                                                    
+    //pthread_exit((void *)0);  
+    return NULL;                                                     
+    }  
+
+
+    void once_rtn(){
+        printf("in once init\n");
+    }
+    void test(){
+
+        processor p[10];
+        
+
+        int          status;                                                           
+        int          i;                                                                
+        int          threadparm[THREADS];                                              
+        pthread_t    threadid[THREADS];                                                
+        void*          thread_stat[THREADS];                                             
+                                                                                        
+        for (i=0; i<THREADS; i++) {                                                    
+            threadparm[i] = i+1;                                                        
+            status = pthread_create( &threadid[i],                                      
+                                    NULL,                                              
+                                    threadfunc,                                        
+                                    (void *)&threadparm[i]);                           
+            if ( status <  0) {                                                         
+            printf("pthread_create failed, errno=%d", errno);                        
+            exit(2);                                                                 
+            }                                                                           
+        }                                                                             
+                                                                                        
+        for ( i=0; i<THREADS; i++) {                                                   
+            status = pthread_join( threadid[i], (void **)&thread_stat[i]);               
+            if ( status <  0)                                                           
+            printf("pthread_join failed, thread %d, errno=%d\n", i+1, errno);        
+                                                                                        
+            if (thread_stat[i] != 0)                                                    
+                printf("bad thread status, thread %d, status=%d\n", i+1,                
+                                                        (int)thread_stat[i]);             
+        }                                                                             
+                                                                                        
+        if (once_counter != 1) {
+            printf("once_fn did not get control once, counter=%d",once_counter);         
+            exit(1);
+        }                                                        
+        
+        exit(0);
+        
+    }
+}
+
+
+
+int main(int argc, char const *argv[])
+{
+    test();
+    return 0;
+}
Index: tests/configs/parsebools.cfa
===================================================================
--- tests/configs/parsebools.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/configs/parsebools.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -16,7 +16,9 @@
 
 #include <fstream.hfa>
-#include <parseargs.hfa>
 
 #include "../meta/fork+exec.hfa"
+
+// last as a work around to a parse bug
+#include <parseargs.hfa>
 
 int main(int argc, char * argv[]) {
Index: tests/configs/parsenums.cfa
===================================================================
--- tests/configs/parsenums.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/configs/parsenums.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -16,7 +16,9 @@
 
 #include <fstream.hfa>
+
+#include "../meta/fork+exec.hfa"
+
+// last as workaround to parser bug
 #include <parseargs.hfa>
-
-#include "../meta/fork+exec.hfa"
 
 #if __SIZEOF_LONG__ == 4
Index: tests/configs/usage.cfa
===================================================================
--- tests/configs/usage.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/configs/usage.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -16,7 +16,7 @@
 
 #include <fstream.hfa>
+#include "../meta/fork+exec.hfa"
 #include <parseargs.hfa>
 
-#include "../meta/fork+exec.hfa"
 
 int main() {
Index: tests/enum_tests/.expect/enumInlineValue.txt
===================================================================
--- tests/enum_tests/.expect/enumInlineValue.txt	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/enum_tests/.expect/enumInlineValue.txt	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -1,4 +1,4 @@
 enumB.A is 5
-enumB.B is 10
+enumB.B is 6
 enumB.D is 11
 enumB.E is 12
Index: tests/enum_tests/.expect/qualifiedEnum.cfa
===================================================================
--- tests/enum_tests/.expect/qualifiedEnum.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/enum_tests/.expect/qualifiedEnum.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -1,1 +1,1 @@
-l :0
+l :1
Index: tests/enum_tests/enumInlineValue.cfa
===================================================================
--- tests/enum_tests/enumInlineValue.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/enum_tests/enumInlineValue.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -6,5 +6,5 @@
 enum enumB {
     inline enumA,
-    E, B=10
+    E
 };
 
Index: tests/enum_tests/qualifiedEnum.cfa
===================================================================
--- tests/enum_tests/qualifiedEnum.cfa	(revision b77f0e1fb94f6a4a2617000cc28b1371637e1fb8)
+++ tests/enum_tests/qualifiedEnum.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -8,5 +8,5 @@
 
 int main() {
-    enum Level l = Level.LOW;
+    enum Level l = Level.MEDIUM;
     sout | "l :" | l;
     return 0;
Index: tests/loop-inc.cfa
===================================================================
--- tests/loop-inc.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
+++ tests/loop-inc.cfa	(revision 63be33872128e89a0dc1fe9bdb8448d00934a453)
@@ -0,0 +1,20 @@
+forall(T &)
+struct A {
+    T * next;
+};
+
+struct B {
+    A(B) link;
+};
+
+int main(void) {
+	B end = { { 0p } };
+	B two = { { &end } };
+	B one = { { &two } };
+	B * head = &one;
+
+	for (B ** it = &head ; (*it)->link.next ; it = &(*it)->link.next) {
+		printf("loop\n");
+	}
+	printf("done\n");
+}
