Index: benchmark/io/http/worker.hfa
===================================================================
--- benchmark/io/http/worker.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ benchmark/io/http/worker.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -2,5 +2,5 @@
 
 #include <iofwd.hfa>
-#include <queueLockFree.hfa>
+#include <containers/lockfree.hfa>
 #include <thread.hfa>
 
Index: doc/theses/mike_brooks_MMath/programs/hello-md.cfa
===================================================================
--- doc/theses/mike_brooks_MMath/programs/hello-md.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ doc/theses/mike_brooks_MMath/programs/hello-md.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -1,12 +1,3 @@
 #include "array.hfa"
-
-
-trait ix( C &, E &, ztype(N) ) {
-    E & ?[?]( C &, ptrdiff_t );
-    void __taglen( tag(C), tag(N) );
-};
-
-forall( ztype(Zn), ztype(S), Timmed &, Tbase & )
-void __taglen( tag(arpk(Zn, S, Timmed, Tbase)), tag(Zn) ) {}
 
 
@@ -38,8 +29,17 @@
 
 
-forall( ztype( N ) )
+
+
+
+
+
+
+
+
+
+forall( [N] )
 void print1d_cstyle( array(float, N) & c );
 
-forall( C &, ztype( N ) | ix( C, float, N ) )
+forall( [N], C & | ar( C, float, N ) )
 void print1d( C & c );
 
@@ -58,7 +58,7 @@
 
 
-forall( ztype( N ) )
+forall( [N] )
 void print1d_cstyle( array(float, N) & c ) {
-    for( i; z(N) ) {
+    for( i; N ) {
         printf("%.1f  ", c[i]);
     }
@@ -78,7 +78,7 @@
 
 
-forall( C &, ztype( N ) | ix( C, float, N ) )
+forall( [N], C & | ar( C, float, N ) )
 void print1d( C & c ) {
-    for( i; z(N) ) {
+    for( i; N ) {
         printf("%.1f  ", c[i]);
     }
@@ -99,9 +99,9 @@
 
 
-void fill( array(float, Z(5), Z(7)) & a ) {
+void fill( array(float, 5, 7) & a ) {
     for ( i; (ptrdiff_t) 5 ) {
         for ( j; 7 ) {
-            a[[i,j]] = 1.0 * i + 0.1 * j;
-            printf("%.1f  ", a[[i,j]]);
+            a[i,j] = 1.0 * i + 0.1 * j;
+            printf("%.1f  ", a[i,j]);
         }
         printf("\n");
@@ -118,5 +118,5 @@
 
 
-array( float, Z(5), Z(7) ) a;
+array( float, 5, 7 ) a;
 fill(a);
 /*
@@ -148,10 +148,10 @@
 
 
-print1d( a[[ 2, all ]] );  // 2.0  2.1  2.2  2.3  2.4  2.5  2.6
-print1d( a[[ all, 3 ]] );  // 0.3  1.3  2.3  3.3  4.3
+print1d( a[ 2, all ] );  // 2.0  2.1  2.2  2.3  2.4  2.5  2.6
+print1d( a[ all, 3 ] );  // 0.3  1.3  2.3  3.3  4.3
 
 
 
-print1d_cstyle( a[[ 2, all ]] );
+print1d_cstyle( a[ 2, all ] );
 
 
@@ -161,7 +161,7 @@
 
 
-#ifdef SHOWERR1
+#ifdef SHOW_ERROR_1
 
-print1d_cstyle( a[[ all, 2 ]] );  // bad
+print1d_cstyle( a[ all, 2 ] );  // bad
 
 #endif
Index: doc/theses/thierry_delisle_PhD/thesis/text/front.tex
===================================================================
--- doc/theses/thierry_delisle_PhD/thesis/text/front.tex	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ doc/theses/thierry_delisle_PhD/thesis/text/front.tex	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -161,5 +161,5 @@
 Thanks to Andrew Beach, Michael Brooks, Colby Parsons, Mubeen Zulfiqar, Fangren Yu and Jiada Liang for their work on the \CFA project as well as all the discussions which have helped me concretize the ideas in this thesis.
 
-Finally, I acknowledge that this has been possible thanks to the financial help offered by the David R. Cheriton School of Computer Science and the corporate partnership with Huawei Ltd.
+Finally, I acknowledge that this has been possible thanks to the financial help offered by the David R. Cheriton School of Computer Science, the corporate partnership with Huawei Ltd. and the Natural Sciences and Engineering Research Council.
 \cleardoublepage
 
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/Makefile.am	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -62,6 +62,5 @@
 	containers/array.hfa \
 	containers/list.hfa \
-	containers/queueLockFree.hfa \
-	containers/stackLockFree.hfa \
+	containers/lockfree.hfa \
 	containers/string_sharectx.hfa \
 	containers/vector2.hfa \
@@ -127,4 +126,5 @@
 
 thread_libsrc = ${inst_thread_headers_src} ${inst_thread_headers_src:.hfa=.cfa} \
+	interpose_thread.cfa \
 	bits/signal.hfa \
 	concurrency/clib/cfathread.cfa \
@@ -145,5 +145,6 @@
 	concurrency/stats.cfa \
 	concurrency/stats.hfa \
-	concurrency/stats.hfa
+	concurrency/stats.hfa \
+	concurrency/pthread.cfa
 
 else
Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -172,9 +172,9 @@
 
 		pthread_attr_t attr;
-		if (int ret = pthread_attr_init(&attr); 0 != ret) {
+		if (int ret = __cfaabi_pthread_attr_init(&attr); 0 != ret) {
 			abort | "failed to create master epoll thread attr: " | ret | strerror(ret);
 		}
 
-		if (int ret = pthread_create(&master_poller, &attr, master_epoll, 0p); 0 != ret) {
+		if (int ret = __cfaabi_pthread_create(&master_poller, &attr, master_epoll, 0p); 0 != ret) {
 			abort | "failed to create master epoll thread: " | ret | strerror(ret);
 		}
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/invoke.h	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -214,4 +214,5 @@
 
 		#if defined( __CFA_WITH_VERIFY__ )
+			struct processor * volatile executing;
 			void * canary;
 		#endif
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/io.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -610,5 +610,5 @@
 		if( we ) {
 			sigval_t value = { PREEMPT_IO };
-			pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
+			__cfaabi_pthread_sigqueue(ctx->proc->kernel_thread, SIGUSR1, value);
 		}
 
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -344,5 +344,5 @@
 	// 	iopoll.run = false;
 	// 	sigval val = { 1 };
-	// 	pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
+	// 	__cfaabi_pthread_sigqueue( iopoll.thrd, SIGUSR1, val );
 
 	// 	// Make sure all this is done
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/kernel.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -321,4 +321,5 @@
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
+		/* paranoid */ verify( __atomic_exchange_n( &thrd_dst->executing, this, __ATOMIC_SEQ_CST) == 0p );
 		/* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
 
@@ -332,8 +333,9 @@
 
 		/* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
+		/* paranoid */ verify( __atomic_exchange_n( &thrd_dst->executing, 0p, __ATOMIC_SEQ_CST) == this );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->corctx_flag, "ERROR : Destination thread$ %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
+		/* paranoid */ verify( thrd_dst->state != Halted );
 		/* paranoid */ verify( thrd_dst->context.SP );
-		/* paranoid */ verify( thrd_dst->curr_cluster == this->cltr );
 		/* paranoid */ verify( kernelTLS().this_thread == thrd_dst );
 		/* paranoid */ verify( ! __preemption_enabled() );
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/kernel.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -160,7 +160,7 @@
 // P9_EMBEDDED( processor, dlink(processor) )
 static inline tytagref( dlink(processor), dlink(processor) ) ?`inner( processor & this ) {
-    dlink(processor) & b = this.link;
-    tytagref( dlink(processor), dlink(processor) ) result = { b };
-    return result;
+	dlink(processor) & b = this.link;
+	tytagref( dlink(processor), dlink(processor) ) result = { b };
+	return result;
 }
 
@@ -269,4 +269,9 @@
 		io_context_params params;
 	} io;
+
+	struct {
+		struct processor ** procs;
+		unsigned cnt;
+	} managed;
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -298,4 +303,8 @@
 static inline struct cluster   * active_cluster  () { return publicTLS_get( this_processor )->cltr; }
 
+// set the number of internal processors
+// these processors are in addition to any explicitly declared processors
+unsigned set_concurrency( cluster & this, unsigned new_count );
+
 #if !defined(__CFA_NO_STATISTICS__)
 	void print_stats_now( cluster & this, int flags );
Index: libcfa/src/concurrency/kernel/private.hfa
===================================================================
--- libcfa/src/concurrency/kernel/private.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/kernel/private.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -20,4 +20,6 @@
 #endif
 
+#include <signal.h>
+
 #include "kernel.hfa"
 #include "thread.hfa"
@@ -48,5 +50,4 @@
 	#endif
 #endif
-
 // #define READYQ_USE_LINEAR_AVG
 #define READYQ_USE_LOGDBL_AVG
@@ -62,4 +63,16 @@
 #error must pick a scheme for averaging
 #endif
+
+extern "C" {
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_join(pthread_t _thread, void **retval);
+	__attribute__((visibility("protected"))) pthread_t __cfaabi_pthread_self(void);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_init(pthread_attr_t *attr);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_destroy(pthread_attr_t *attr);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize );
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize );
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_sigqueue(pthread_t _thread, int sig, const union sigval value);
+	__attribute__((visibility("protected"))) int __cfaabi_pthread_sigmask( int how, const sigset_t *set, sigset_t *oset);
+}
 
 //-----------------------------------------------------------------------------
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -16,4 +16,6 @@
 #define __cforall_thread__
 #define _GNU_SOURCE
+
+// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
 
 // C Includes
@@ -222,5 +224,5 @@
 		( this.runner ){};
 		init( this, "Main Processor", *mainCluster, 0p );
-		kernel_thread = pthread_self();
+		kernel_thread = __cfaabi_pthread_self();
 
 		runner{ &this };
@@ -283,6 +285,15 @@
 }
 
+extern "C"{
+	void pthread_delete_kernel_threads_();
+}
+
+
 static void __kernel_shutdown(void) {
 	if(!cfa_main_returned) return;
+
+	//delete kernel threads for pthread_concurrency
+	pthread_delete_kernel_threads_();
+
 	/* paranoid */ verify( __preemption_enabled() );
 	disable_interrupts();
@@ -327,5 +338,5 @@
 
 		/* paranoid */ verify( this.do_terminate == true );
-		__cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
+		__cfadbg_print_safe(runtime_core, "Kernel : destroyed main processor context %p\n", &runner);
 	}
 
@@ -388,5 +399,5 @@
 	(proc->runner){ proc, &info };
 
-	__cfaabi_dbg_print_safe("Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
+	__cfadbg_print_safe(runtime_core, "Coroutine : created stack %p\n", get_coroutine(proc->runner)->stack.storage);
 
 	//Set global state
@@ -520,4 +531,5 @@
 	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
 	#if defined( __CFA_WITH_VERIFY__ )
+		executing = 0p;
 		canary = 0x0D15EA5E0D15EA5Ep;
 	#endif
@@ -652,4 +664,7 @@
 	io.params = io_params;
 
+	managed.procs = 0p;
+	managed.cnt = 0;
+
 	doregister(this);
 
@@ -667,4 +682,6 @@
 
 void ^?{}(cluster & this) libcfa_public {
+	set_concurrency( this, 0 );
+
 	destroy(this.io.arbiter);
 
@@ -777,5 +794,5 @@
 	pthread_attr_t attr;
 
-	check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+	check( __cfaabi_pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
 
 	size_t stacksize = max( PTHREAD_STACK_MIN, DEFAULT_STACK_SIZE );
@@ -804,11 +821,11 @@
 	#endif
 
-	check( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
-	check( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
+	check( __cfaabi_pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" );
+	check( __cfaabi_pthread_create( pthread, &attr, start, arg ), "pthread_create" );
 	return stack;
 }
 
 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval ) {
-	int err = pthread_join( pthread, retval );
+	int err = __cfaabi_pthread_join( pthread, retval );
 	if( err != 0 ) abort("KERNEL ERROR: joining pthread %p caused error %s\n", (void*)pthread, strerror(err));
 
@@ -816,9 +833,9 @@
 		pthread_attr_t attr;
 
-		check( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+		check( __cfaabi_pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
 
 		size_t stacksize;
 		// default stack size, normally defined by shell limit
-		check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
+		check( __cfaabi_pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
 		assert( stacksize >= PTHREAD_STACK_MIN );
 		stacksize += __page_size;
@@ -838,4 +855,29 @@
 }
 
+unsigned set_concurrency( cluster & this, unsigned new ) libcfa_public {
+	unsigned old = this.managed.cnt;
+
+	__cfadbg_print_safe(runtime_core, "Kernel : resizing cluster from %u to %u\n", old, (unsigned)new);
+
+	// Delete all the old unneeded procs
+	if(old > new) for(i; (unsigned)new ~ old) {
+		__cfadbg_print_safe(runtime_core, "Kernel : destroying %u\n", i);
+		delete( this.managed.procs[i] );
+	}
+
+	// Allocate new array (uses realloc and memcpies the data)
+	this.managed.procs = alloc( new, this.managed.procs`realloc );
+	this.managed.cnt = new;
+
+	// Create the desired new procs
+	if(old < new) for(i; old ~ new) {
+		__cfadbg_print_safe(runtime_core, "Kernel : constructing %u\n", i);
+		(*(this.managed.procs[i] = alloc())){ this };
+	}
+
+	// return the old count
+	return old;
+}
+
 #if defined(__CFA_WITH_VERIFY__)
 static bool verify_fwd_bck_rng(void) {
Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/locks.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -21,5 +21,5 @@
 
 #include "bits/weakso_locks.hfa"
-#include "containers/queueLockFree.hfa"
+#include "containers/lockfree.hfa"
 #include "containers/list.hfa"
 
@@ -423,5 +423,5 @@
 }
 
-static inline size_t on_wait(simple_owner_lock & this) with(this) { 
+static inline size_t on_wait(simple_owner_lock & this) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	/* paranoid */ verifyf( owner != 0p, "Attempt to release lock %p that isn't held", &this );
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/preemption.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -352,5 +352,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
+	ret = __cfaabi_pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
@@ -385,5 +385,5 @@
 	sigaddset( &mask, sig );
 
-	if ( pthread_sigmask( SIG_UNBLOCK, &mask, 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_UNBLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
@@ -396,5 +396,5 @@
 	sigaddset( &mask, sig );
 
-	if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 		abort( "internal error, pthread_sigmask" );
 	}
@@ -404,5 +404,5 @@
 static void preempt( processor * this ) {
 	sigval_t value = { PREEMPT_NORMAL };
-	pthread_sigqueue( this->kernel_thread, SIGUSR1, value );
+	__cfaabi_pthread_sigqueue( this->kernel_thread, SIGUSR1, value );
 }
 
@@ -415,5 +415,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
+	ret = __cfaabi_pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
@@ -434,5 +434,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
+	ret = __cfaabi_pthread_sigmask(0, ( const sigset_t * ) 0p, &oldset);  // workaround trac#208: cast should be unnecessary
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
@@ -505,5 +505,5 @@
 	sigval val;
 	val.sival_int = 0;
-	pthread_sigqueue( alarm_thread, SIGALRM, val );
+	__cfaabi_pthread_sigqueue( alarm_thread, SIGALRM, val );
 
 	// Wait for the preemption thread to finish
@@ -579,5 +579,5 @@
 	static_assert( sizeof( sigset_t ) == sizeof( cxt->uc_sigmask ), "Expected cxt->uc_sigmask to be of sigset_t" );
 	#endif
-	if ( pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), 0p ) == -1 ) {
 		abort( "internal error, sigprocmask" );
 	}
@@ -607,5 +607,5 @@
 	sigset_t mask;
 	sigfillset(&mask);
-	if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
+	if ( __cfaabi_pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
Index: libcfa/src/concurrency/pthread.cfa
===================================================================
--- libcfa/src/concurrency/pthread.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ libcfa/src/concurrency/pthread.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,920 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2019 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// pthread.cfa --
+//
+// Author           : Zhenyan Zhu
+// Created On       : Sat Aug 6 16:29:18 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#define __cforall_thread__
+#define _GNU_SOURCE
+
+#include <signal.h>
+#include <pthread.h>
+#include <errno.h>
+#include "locks.hfa"
+#include "bits/stack.hfa"
+
+
+#define check_nonnull(x) asm("": "+rm"(x)); if( x == 0p ) return EINVAL;
+
+/* pthread key, pthread once inner routine mutual exclusion */
+static simple_owner_lock once_lock,key_lock,magic_mutex_check, concurrency_lock;
+
+//######################### Local Storage Helpers #########################
+
+enum { PTHREAD_KEYS_MAX = 1024 };
+
+struct pthread_values{
+	inline Seqable;
+	void* value;
+	bool in_use;
+};
+
+static inline {
+	pthread_values *& Back( pthread_values * n ) {
+		return (pthread_values *)Back( (Seqable *)n );
+	}
+
+	pthread_values *& Next( pthread_values * n ) {
+		return (pthread_values *)Next( (Colable *)n );
+	}
+}
+
+struct pthread_keys {
+	bool in_use;
+	void (*destructor)( void * );
+	Sequence(pthread_values) threads;
+};
+
+static void ?{}(pthread_keys& k){
+	k.threads{};
+}
+
+// Create storage separately to ensure no constructors are called.
+static pthread_keys cfa_pthread_keys_storage[PTHREAD_KEYS_MAX] __attribute__((aligned (16)));
+
+static void init_pthread_storage(){
+	for (int i = 0; i < PTHREAD_KEYS_MAX; i++){
+		cfa_pthread_keys_storage[i]{};
+	}
+}
+
+#define cfa_pthread_keys ((pthread_keys *)cfa_pthread_keys_storage)
+
+/* Controlling the iterations of destructors for thread-specific data.  */
+#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS	4
+/* Number of iterations this implementation does.  */
+#define PTHREAD_DESTRUCTOR_ITERATIONS	_POSIX_THREAD_DESTRUCTOR_ITERATIONS
+
+//######################### Parallelism Helpers #########################
+
+struct Pthread_kernel_threads{
+	inline Colable;
+	processor p;
+};
+
+Pthread_kernel_threads *& Next( Pthread_kernel_threads * n ) {
+	return (Pthread_kernel_threads *)Next( (Colable *)n );
+}
+
+static Stack(Pthread_kernel_threads) cfa_pthreads_kernel_threads;
+static bool cfa_pthreads_kernel_threads_zero = false;	// set to zero ?
+static int cfa_pthreads_no_kernel_threads = 1;	// number of kernel threads
+
+
+//######################### Cond Helpers #########################
+
+typedef pthread_cond_var(simple_owner_lock) cfa2pthr_cond_var_t;
+
+/* condvar helper routines */
+static void init(pthread_cond_t* pcond){
+	static_assert(sizeof(pthread_cond_t) >= sizeof(cfa2pthr_cond_var_t),"sizeof(pthread_t) < sizeof(cfa2pthr_cond_var_t)");
+	cfa2pthr_cond_var_t* _cond = (cfa2pthr_cond_var_t*)pcond;
+	?{}(*_cond);
+}
+
+static cfa2pthr_cond_var_t* get(pthread_cond_t* pcond){
+	static_assert(sizeof(pthread_cond_t) >= sizeof(cfa2pthr_cond_var_t),"sizeof(pthread_t) < sizeof(cfa2pthr_cond_var_t)");
+	return (cfa2pthr_cond_var_t*)pcond;
+}
+
+static void destroy(pthread_cond_t* cond){
+	static_assert(sizeof(pthread_cond_t) >= sizeof(cfa2pthr_cond_var_t),"sizeof(pthread_t) < sizeof(cfa2pthr_cond_var_t)");
+	^?{}(*get(cond));
+}
+
+
+//######################### Mutex Helper #########################
+
+/* mutex helper routines */
+static void mutex_check(pthread_mutex_t* t){
+	// Use double check to improve performance.
+	// Check is safe on x86; volatile prevents compiler reordering
+	volatile pthread_mutex_t *const mutex_ = t;
+
+	// SKULLDUGGERY: not a portable way to access the kind field, /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h
+	int _lock_val = ((pthread_mutex_t *)mutex_)->__data.__lock;
+
+	// if pthread_mutex_t is initialized by PTHREAD_MUTEX_INITIALIZER, _lock_val should be 0
+	if ( _lock_val == 0 ) {
+		lock(magic_mutex_check);
+		_lock_val = ((pthread_mutex_t *)mutex_)->__data.__lock;
+		if ( _lock_val == 0 ) {
+			pthread_mutex_init( t, NULL );
+		}
+		unlock(magic_mutex_check);
+	}
+} // mutex_check
+
+
+static void init(pthread_mutex_t* plock){
+	static_assert(sizeof(pthread_mutex_t) >= sizeof(simple_owner_lock),"sizeof(pthread_mutex_t) < sizeof(simple_owner_lock)");
+	simple_owner_lock* _lock = (simple_owner_lock*)plock;
+	?{}(*_lock);
+}
+
+static simple_owner_lock* get(pthread_mutex_t* plock){
+	static_assert(sizeof(pthread_mutex_t) >= sizeof(simple_owner_lock),"sizeof(pthread_mutex_t) < sizeof(simple_owner_lock)");
+	return (simple_owner_lock*)plock;
+}
+
+static void destroy(pthread_mutex_t* plock){
+	static_assert(sizeof(pthread_mutex_t) >= sizeof(simple_owner_lock),"sizeof(pthread_mutex_t) < sizeof(simple_owner_lock)");
+	^?{}(*get(plock));
+}
+
+//######################### Attr helpers #########################
+struct cfaPthread_attr_t {								// thread attributes
+		int contentionscope;
+		int detachstate;
+		size_t stacksize;
+		void *stackaddr;
+		int policy;
+		int inheritsched;
+		struct sched_param param;
+} typedef cfaPthread_attr_t;
+
+static const cfaPthread_attr_t default_attrs{
+	0,
+	0,
+	(size_t)65000,
+	(void *)NULL,
+	0,
+	0,
+	{0}
+};
+
+static cfaPthread_attr_t* get(const pthread_attr_t* attr){
+	static_assert(sizeof(pthread_attr_t) >= sizeof(cfaPthread_attr_t),"sizeof(pthread_attr_t) < sizeof(cfaPthread_attr_t)");
+	return (cfaPthread_attr_t*)attr;
+}
+
+
+//######################### Threads Helper #########################
+
+// exception for cancel_stack in pthread_exit
+exception pthread_exit_exp {};
+static vtable(pthread_exit_exp) exp_vt;
+
+thread cfaPthread{
+	cfaPthread_attr_t attr;
+	pthread_t pthreadId;
+
+	// pthreads return value
+	void *joinval;
+
+	// pthread attributes
+	pthread_attr_t pthread_attr;
+
+	void *(*start_routine)(void *);
+	void *start_arg;
+
+	// thread local data
+	pthread_values* pthreadData;
+
+	// flag used for tryjoin
+	bool isTerminated;
+};
+
+/* thread part routines */
+//  cfaPthread entry point
+void main(cfaPthread& _thread) with(_thread){
+	joinval =  start_routine(start_arg);
+	isTerminated = true;
+}
+
+static cfaPthread *lookup( pthread_t p ){
+	static_assert(sizeof(pthread_t) >= sizeof(cfaPthread*),"sizeof(pthread_t) < sizeof(cfaPthread*)");
+	return (cfaPthread*)p;
+}
+
+static void pthread_deletespecific_( pthread_values* values )  { // see uMachContext::invokeTask
+	pthread_values* value;
+	pthread_keys* key;
+	bool destcalled = true;
+	if (values != NULL){
+		for ( int attempts = 0; attempts < PTHREAD_DESTRUCTOR_ITERATIONS && destcalled ; attempts += 1 ) {
+			destcalled = false;
+			lock(key_lock);
+			for (int i = 0; i < PTHREAD_KEYS_MAX; i++){
+				// for each valid key
+				if ( values[i].in_use){
+					value = &values[i];
+					key = &cfa_pthread_keys[i];
+					value->in_use = false;
+					remove(key->threads, *value);
+					// if  a  key  value  has  a  non-NULL  destructor pointer,  and  the  thread  has  a  non-NULL  value associated with that key,
+					// the value of the key is set to NULL, and then the function pointed to is called with the previously associated value as its sole argument.
+					if (value->value != NULL && key->destructor != NULL){
+						unlock(key_lock);
+						key->destructor(value->value); // run destructor
+						lock(key_lock);
+						destcalled = true;
+					}   // if
+					value->value = NULL;
+				}   // if
+			}   // for
+			unlock(key_lock);
+		}   // for
+		free(values);
+	}   // if
+}
+
+static void ^?{}(cfaPthread & mutex t){
+	// delete pthread local storage
+	pthread_values * values = t.pthreadData;
+	pthread_deletespecific_(values);
+}
+
+static void ?{}(cfaPthread &t, pthread_t* _thread, const pthread_attr_t * _attr,void *(*start_routine)(void *), void * arg) {
+	static_assert(sizeof(pthread_t) >= sizeof(cfaPthread*), "pthread_t too small to hold a pointer: sizeof(pthread_t) < sizeof(cfaPthread*)");
+
+	// set up user thread stackSize
+	cfaPthread_attr_t * attr = get(_attr);
+	((thread&)t){ attr ? attr->stacksize: DEFAULT_STACK_SIZE };
+
+	// initialize _thread & cfaPthread id
+	*_thread = t.pthreadId = (pthread_t)(&t);
+
+	// if attr null, self attr will be set as default_attrs; else set to attr
+	t.attr = (attr != NULL ? *attr : default_attrs);
+
+	// init start routine and arguments
+	t.start_routine = start_routine;
+	t.start_arg = arg;
+	t.pthreadData = NULL;
+}
+
+
+extern "C"{
+	//######################### Pthread Attrs #########################
+
+	int pthread_attr_init(pthread_attr_t *attr) libcfa_public __THROW {
+		cfaPthread_attr_t* _attr = get(attr);
+		?{}(*_attr, default_attrs);
+		return 0;
+	}
+	int pthread_attr_destroy(pthread_attr_t *attr) libcfa_public __THROW {
+		^?{}(*get(attr));
+		return 0;
+	}
+
+	int pthread_attr_setscope( pthread_attr_t *attr, int contentionscope ) libcfa_public __THROW {
+		get( attr )->contentionscope = contentionscope;
+		return 0;
+	} // pthread_attr_setscope
+
+	int pthread_attr_getscope( const pthread_attr_t *attr, int *contentionscope ) libcfa_public __THROW {
+		*contentionscope = get( attr )->contentionscope;
+		return 0;
+	} // pthread_attr_getscope
+
+	int pthread_attr_setdetachstate( pthread_attr_t *attr, int detachstate ) libcfa_public __THROW {
+		get( attr )->detachstate = detachstate;
+		return 0;
+	} // pthread_attr_setdetachstate
+
+	int pthread_attr_getdetachstate( const pthread_attr_t *attr, int *detachstate ) libcfa_public __THROW {
+		*detachstate = get( attr )->detachstate;
+		return 0;
+	} // pthread_attr_getdetachstate
+
+	int pthread_attr_setstacksize( pthread_attr_t *attr, size_t stacksize ) libcfa_public __THROW {
+		get( attr )->stacksize = stacksize;
+		return 0;
+	} // pthread_attr_setstacksize
+
+	int pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ) libcfa_public __THROW {
+		*stacksize = get( attr )->stacksize;
+		return 0;
+	} // pthread_attr_getstacksize
+
+	int pthread_attr_getguardsize( const pthread_attr_t * /* attr */, size_t * /* guardsize */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_attr_getguardsize
+
+	int pthread_attr_setguardsize( pthread_attr_t * /* attr */, size_t /* guardsize */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_attr_setguardsize
+
+	int pthread_attr_setstackaddr( pthread_attr_t *attr, void *stackaddr ) libcfa_public __THROW {
+		get( attr )->stackaddr = stackaddr;
+		return 0;
+	} // pthread_attr_setstackaddr
+
+	int pthread_attr_getstackaddr( const pthread_attr_t *attr, void **stackaddr ) libcfa_public __THROW {
+		*stackaddr = get( attr )->stackaddr;
+		return 0;
+	} // pthread_attr_getstackaddr
+
+	int pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ) libcfa_public __THROW {
+		get( attr )->stackaddr = stackaddr;
+		get( attr )->stacksize = stacksize;
+		return 0;
+	} // pthread_attr_setstack
+
+	int pthread_attr_getstack( const pthread_attr_t *attr, void **stackaddr, size_t *stacksize ) libcfa_public __THROW {
+		*stackaddr = get( attr )->stackaddr;
+		*stacksize = get( attr )->stacksize;
+		return 0;
+	} // pthread_attr_getstack
+
+	// Initialize thread attribute *attr with attributes corresponding to the
+	// already running thread threadID. It shall be called on unitialized attr
+	// and destroyed with pthread_attr_destroy when no longer needed.
+	int pthread_getattr_np( pthread_t threadID, pthread_attr_t *attr ) libcfa_public __THROW { // GNU extension
+		check_nonnull(attr);
+
+		// copy all fields
+		*get(attr) = lookup( threadID )->attr;
+
+		return 0;
+	} // pthread_getattr_np
+
+
+	//######################### Threads #########################
+
+	int pthread_create(pthread_t * _thread, const pthread_attr_t * attr, void *(*start_routine)(void *), void * arg) libcfa_public __THROW {
+		cfaPthread *t = alloc();
+		(*t){_thread, attr, start_routine, arg};
+		return 0;
+	}
+
+
+	int pthread_join(pthread_t _thread, void **value_ptr) libcfa_public __THROW {
+		// if thread is invalid
+		if (_thread == NULL) return EINVAL;
+		if (_thread == pthread_self()) return EDEADLK;
+
+		// get user thr pointer
+		cfaPthread* p = lookup(_thread);
+		try {
+			join(*p);
+		}
+		// if thread called pthread_exit
+		catchResume (ThreadCancelled(cfaPthread) * cancel) {}
+
+		// fetch result
+		if (value_ptr != NULL ) *value_ptr = p->joinval;
+		delete(p);
+		return 0;
+	}
+
+	int pthread_tryjoin_np(pthread_t _thread, void **value_ptr) libcfa_public __THROW {
+		// if thread is invalid
+		if (_thread == NULL) return EINVAL;
+		if (_thread == pthread_self()) return EDEADLK;
+
+		cfaPthread* p = lookup(_thread);
+
+		// thread not finished ?
+		if (!p->isTerminated) return EBUSY;
+
+		join( *p );
+
+		if (value_ptr != NULL ) *value_ptr = p->joinval;
+		delete(p);
+		return 0;
+	}
+
+	pthread_t pthread_self(void) libcfa_public __THROW {
+		return (pthread_t)((uintptr_t)active_thread() - (sizeof(cfaPthread) - sizeof(thread$)));
+	}
+
+	void pthread_exit(void * status) libcfa_public __THROW {
+		pthread_t pid = pthread_self();
+		cfaPthread* _thread = (cfaPthread*)pid;
+		_thread->joinval = status;  // set return value
+		_thread->isTerminated = 1;  // set terminated flag
+		cancel_stack((pthread_exit_exp){&exp_vt});
+	}   //pthread_exit_
+
+	int pthread_yield( void ) __THROW {			// GNU extension
+		yield();
+		return 0;
+	}
+
+
+	//######################### Mutex #########################
+
+	int pthread_mutex_init(pthread_mutex_t *_mutex, const pthread_mutexattr_t *attr) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		init(_mutex);
+		return 0;
+	}   //pthread_mutex_init_
+
+
+	int pthread_mutex_destroy(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		if (_lock->owner != NULL){
+			return EBUSY;
+		}
+		destroy(_mutex);
+		return 0;
+	}   //pthread_mutex_destroy_
+
+	int pthread_mutex_lock(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		mutex_check(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		lock(*_lock);
+		return 0;
+	}   //pthread_mutex_lock_
+
+	int pthread_mutex_unlock(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		if (_lock->owner != active_thread()){
+			return EPERM;
+		} // current thread does not hold the mutex
+		unlock(*_lock);
+		return 0;
+	}   //pthread_mutex_unlock_
+
+	int pthread_mutex_trylock(pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		simple_owner_lock* _lock = get(_mutex);
+		if (_lock->owner != active_thread() && _lock->owner != NULL){
+			return EBUSY;
+		}   // if mutex is owned
+		lock(*_lock);
+		return 0;
+	}   //pthread_mutex_trylock_
+
+	//######################### Conditional Variable #########################
+
+	/* conditional variable routines */
+	int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) libcfa_public __THROW {
+		check_nonnull(cond);
+		init(cond);
+		return 0;
+	}  //pthread_cond_init
+
+	int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *_mutex) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		check_nonnull(cond);
+		wait(*get(cond), *get(_mutex));
+		return 0;
+	} // pthread_cond_wait
+
+	int pthread_cond_timedwait(pthread_cond_t * cond, pthread_mutex_t * _mutex, const struct timespec * abstime) libcfa_public __THROW {
+		check_nonnull(_mutex);
+		check_nonnull(cond);
+		wait(*get(cond), *get(_mutex), *abstime);
+		return 0;
+	} // pthread_cond_timedwait
+
+	int pthread_cond_signal(pthread_cond_t *cond) libcfa_public __THROW {
+		check_nonnull(cond);
+		return notify_one(*get(cond));
+	} // pthread_cond_signal
+
+	int pthread_cond_broadcast(pthread_cond_t *cond) libcfa_public __THROW {
+		check_nonnull(cond);
+		return notify_all(*get(cond));
+	} // pthread_cond_broadcast
+
+	int pthread_cond_destroy(pthread_cond_t *cond) libcfa_public __THROW {
+		check_nonnull(cond);
+		destroy(cond);
+		return 0;
+	} // pthread_cond_destroy
+
+
+
+	//######################### Local storage #########################
+
+	int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) libcfa_public __THROW {
+		static_assert(sizeof(pthread_once_t) >= sizeof(int),"sizeof(pthread_once_t) < sizeof(int)");
+		check_nonnull(once_control);
+		check_nonnull(init_routine);
+		lock(once_lock);
+		if ( *((int *)once_control) == 0 ) {
+			init_routine();
+			*((int *)once_control) = 1;
+		} // if
+		unlock(once_lock);
+		return 0;
+	} // pthread_once
+
+	int pthread_key_create( pthread_key_t *key, void (*destructor)( void * ) ) libcfa_public __THROW {
+		lock(key_lock);
+		for ( int i = 0; i < PTHREAD_KEYS_MAX; i += 1 ) {
+			if ( ! cfa_pthread_keys[i].in_use ) {
+				cfa_pthread_keys[i].in_use = true;
+				cfa_pthread_keys[i].destructor = destructor;
+				unlock( key_lock );
+				*key = i;
+				return 0;
+			} // if
+		} // for
+		unlock(key_lock);
+		return EAGAIN;
+	}   // pthread_key_create
+
+	int pthread_key_delete( pthread_key_t key ) libcfa_public __THROW {
+		lock(key_lock);
+		if ( key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use ) {
+			unlock( key_lock );
+			return EINVAL;
+		} // if
+		cfa_pthread_keys[key].in_use = false;
+		cfa_pthread_keys[key].destructor = NULL;
+
+		// Remove key from all threads with a value.
+		pthread_values& p;
+		Sequence(pthread_values)& head = cfa_pthread_keys[key].threads;
+		for ( SeqIter(pthread_values) iter = { head }; iter | p; ) {
+			remove(head, p);
+			p.in_use = false;
+		}
+		unlock(key_lock);
+		return 0;
+	}   // pthread_key_delete
+
+	int pthread_setspecific( pthread_key_t key, const void *value ) libcfa_public __THROW {
+		// get current thread
+		cfaPthread* t = lookup(pthread_self());
+		// if current thread's pthreadData is NULL; initialize it
+		pthread_values* values;
+		if (t->pthreadData == NULL){
+			values = anew( PTHREAD_KEYS_MAX);
+			t->pthreadData = values;
+			for (int i = 0;i < PTHREAD_KEYS_MAX; i++){
+				t->pthreadData[i].in_use = false;
+			}   // for
+		}   else {
+			values = t->pthreadData;
+		}   // if
+		// find corresponding key and set value
+		lock(key_lock);
+		// if invalid key
+		if ( key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use ) {
+			unlock( key_lock );
+			return EINVAL;
+		} // if
+		pthread_values &entry = values[key];
+		if ( ! entry.in_use ) {
+			entry.in_use = true;
+			add(cfa_pthread_keys[key].threads, entry);
+		} // if
+		entry.value = (void *)value;
+		unlock(key_lock);
+		return 0;
+	} //pthread_setspecific
+
+	void* pthread_getspecific(pthread_key_t key) libcfa_public __THROW {
+		if (key >= PTHREAD_KEYS_MAX || ! cfa_pthread_keys[key].in_use) return NULL;
+
+		// get current thread
+		cfaPthread* t = lookup(pthread_self());
+		if (t->pthreadData == NULL) return NULL;
+		lock(key_lock);
+		pthread_values &entry = ((pthread_values *)t->pthreadData)[key];
+		if ( ! entry.in_use ) {
+			unlock( key_lock );
+			return NULL;
+		} // if
+		void *value = entry.value;
+		unlock(key_lock);
+
+		return value;
+	}   //pthread_get_specific
+
+	//######################### Parallelism #########################
+	void pthread_delete_kernel_threads_() __THROW {	// see uMain::~uMain
+		Pthread_kernel_threads& p;
+		for ( StackIter(Pthread_kernel_threads) iter = {cfa_pthreads_kernel_threads}; iter | p; ) {
+			delete(&p);
+		} // for
+	} // pthread_delete_kernel_threads_
+
+	int pthread_getconcurrency( void ) __THROW {	// XOPEN extension
+		return cfa_pthreads_kernel_threads_zero ? 0 : cfa_pthreads_no_kernel_threads;
+	} // pthread_getconcurrency
+
+	int pthread_setconcurrency( int new_level ) libcfa_public __THROW { // XOPEN extension
+		if ( new_level < 0 ) return EINVAL;
+		if ( new_level == 0 ) {
+			cfa_pthreads_kernel_threads_zero = true;	// remember set to zero, but ignore
+			return 0;					// do not do kernel thread management
+		} // exit
+		cfa_pthreads_kernel_threads_zero = false;
+		lock( concurrency_lock );
+		for ( ; new_level > cfa_pthreads_no_kernel_threads; cfa_pthreads_no_kernel_threads += 1 ) { // add processors ?
+			push(cfa_pthreads_kernel_threads, *new() );
+		} // for
+		for ( ; new_level < cfa_pthreads_no_kernel_threads; cfa_pthreads_no_kernel_threads -= 1 ) { // remove processors ?
+			delete(&pop(cfa_pthreads_kernel_threads));
+		} // for
+		unlock( concurrency_lock );
+		return 0;
+	} // pthread_setconcurrency
+
+	//######################### Signal #########################
+
+
+	 int pthread_sigmask( int /* how */, const sigset_t * /* set */, sigset_t * /* oset */ ) libcfa_public __THROW {
+		abort( "pthread_sigmask : not implemented" );
+		return 0;
+	 } // pthread_sigmask
+
+	int pthread_kill( pthread_t _thread __attribute__(( unused )), int sig ) libcfa_public __THROW {
+		if ( sig == 0 ) {
+			return 0;
+		} else {
+			abort( "pthread_kill : not implemented" );
+		} // if
+		return 0;
+	} // pthread_kill
+
+	int pthread_sigqueue(pthread_t , int sig, const union sigval) libcfa_public __THROW {
+		abort( "pthread_sigqueue : not implemented" );
+		return 0;
+	} // pthread_sigqueue
+
+	//######################### Scheduling #########################
+	int pthread_detach( pthread_t threadID ) __THROW {
+		abort( "pthread_detach : not implemented" );
+		return 0;
+	} // pthread_detach
+
+	int pthread_setschedparam( pthread_t /* thread */, int /* policy */, const struct sched_param * /* param */ ) libcfa_public __THROW {
+		abort( "pthread_setschedparam : not implemented" );
+		return 0;
+	} // pthread_setschedparam
+
+	int pthread_getschedparam( pthread_t /* thread */, int */* policy */, struct sched_param * /* param */ ) libcfa_public __THROW {
+		abort( "pthread_getschedparam : not implemented" );
+		return 0;
+	} // pthread_getschedparam
+
+	 //######################### Mutex Attr #########################
+
+	int pthread_mutexattr_init( pthread_mutexattr_t * /* attr */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_init
+
+	int pthread_mutexattr_destroy( pthread_mutexattr_t * /* attr */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_destroy
+
+	int pthread_mutexattr_setpshared( pthread_mutexattr_t * /* attr */, int /* pshared */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_setpshared
+
+	int pthread_mutexattr_getpshared( const pthread_mutexattr_t * /* attr */, int * /* pshared */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_getpshared
+
+	int pthread_mutexattr_setprotocol( pthread_mutexattr_t * /* attr */, int /* protocol */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_setprotocol
+
+	int pthread_mutexattr_getprotocol( const pthread_mutexattr_t * /* attr */, int * /* protocol */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_getprotocol
+
+	int pthread_mutexattr_setprioceiling( pthread_mutexattr_t * /* attr */, int /* prioceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_setprioceiling
+
+	int pthread_mutexattr_getprioceiling( const pthread_mutexattr_t * /* attr */, int * /* ceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_getprioceiling
+
+	int pthread_mutex_setprioceiling( pthread_mutex_t * /* mutex */, int /* prioceiling */, int * /* old_ceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutex_setprioceiling
+
+	int pthread_mutex_getprioceiling( const pthread_mutex_t * /* mutex */, int * /* ceiling */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutex_getprioceiling
+
+	int pthread_mutexattr_gettype( __const pthread_mutexattr_t * __restrict /* __attr */, int * __restrict /* __kind */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_gettype
+
+	int pthread_mutexattr_settype( pthread_mutexattr_t * /* __attr */, int /* __kind */ ) libcfa_public __THROW {
+		return 0;
+	} // pthread_mutexattr_settype
+
+	//######################### Mutex #########################
+
+	int pthread_mutex_timedlock( pthread_mutex_t *__restrict /* __mutex */, __const struct timespec *__restrict /* __abstime */ ) libcfa_public __THROW {
+		abort( "pthread_mutex_timedlock" );
+	} // pthread_mutex_timedlock
+
+	//######################### Condition #########################
+
+	int pthread_condattr_getclock( __const pthread_condattr_t * __restrict /* __attr */, __clockid_t *__restrict /* __clock_id */ ) libcfa_public __THROW {
+		abort( "pthread_condattr_getclock" );
+	} // pthread_condattr_getclock
+
+	int pthread_condattr_setclock( pthread_condattr_t * /* __attr */, __clockid_t /* __clock_id */ ) libcfa_public __THROW {
+		abort( "pthread_condattr_setclock" );
+	} // pthread_condattr_setclock
+
+	//######################### Spinlock #########################
+
+	int pthread_spin_init( pthread_spinlock_t * /* __lock */, int /*__pshared */ ) libcfa_public __THROW {
+		abort( "pthread_spin_init" );
+	} // pthread_spin_init
+
+	int pthread_spin_destroy( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_destroy" );
+	} // pthread_spin_destroy
+
+	int pthread_spin_lock( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_lock" );
+	} // pthread_spin_lock
+
+	int pthread_spin_trylock( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_trylock" );
+	} // pthread_spin_trylock
+
+	int pthread_spin_unlock( pthread_spinlock_t * /* __lock */ ) libcfa_public __THROW {
+		abort( "pthread_spin_unlock" );
+	} // pthread_spin_unlock
+
+	//######################### Barrier #########################
+
+	int pthread_barrier_init( pthread_barrier_t *__restrict /* __barrier */, __const pthread_barrierattr_t *__restrict /* __attr */, unsigned int /* __count */ ) libcfa_public __THROW {
+		abort( "pthread_barrier_init" );
+	} // pthread_barrier_init
+
+	int pthread_barrier_destroy( pthread_barrier_t * /* __barrier */ ) libcfa_public  __THROW {
+		abort( "pthread_barrier_destroy" );
+	} // pthread_barrier_destroy
+
+	int pthread_barrier_wait( pthread_barrier_t * /* __barrier */ ) libcfa_public __THROW {
+		abort( "pthread_barrier_wait" );
+	} // pthread_barrier_wait
+
+	int pthread_barrierattr_init( pthread_barrierattr_t * /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_init" );
+	} // pthread_barrierattr_init
+
+	int pthread_barrierattr_destroy( pthread_barrierattr_t * /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_destroy" );
+	} // pthread_barrierattr_destroy
+
+	int pthread_barrierattr_getpshared( __const pthread_barrierattr_t * __restrict /* __attr */, int *__restrict /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_getpshared" );
+	} // pthread_barrierattr_getpshared
+
+	int pthread_barrierattr_setpshared( pthread_barrierattr_t * /* __attr */, int /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_barrierattr_setpshared" );
+	} // pthread_barrierattr_setpshared
+
+	//######################### Clock #########################
+
+	int pthread_getcpuclockid( pthread_t /* __thread_id */, __clockid_t * /* __clock_id */ ) libcfa_public __THROW {
+		abort( "pthread_getcpuclockid" );
+	} // pthread_getcpuclockid
+
+	// pthread_atfork()
+
+// UNIX98
+
+	//######################### Read/Write #########################
+
+	int pthread_rwlock_init( pthread_rwlock_t *__restrict /* __rwlock */, __const pthread_rwlockattr_t *__restrict /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_init" );
+	} // pthread_rwlock_init
+
+	int pthread_rwlock_destroy( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_destroy" );
+	} // pthread_rwlock_destroy
+
+	int pthread_rwlock_rdlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_rdlock" );
+	} // pthread_rwlock_rdlock
+
+	int pthread_rwlock_tryrdlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_tryrdlock" );
+	} // pthread_rwlock_tryrdlock
+
+	int pthread_rwlock_wrlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_wrlock" );
+	} // pthread_rwlock_wrlock
+
+	int pthread_rwlock_trywrlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_trywrlock" );
+	} // pthread_rwlock_trywrlock
+
+	int pthread_rwlock_unlock( pthread_rwlock_t * /* __rwlock */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_unlock" );
+	} // pthread_rwlock_unlock
+
+	int pthread_rwlockattr_init( pthread_rwlockattr_t * /* __attr */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_init" );
+	} // pthread_rwlockattr_init
+
+	int pthread_rwlockattr_destroy( pthread_rwlockattr_t * /*__attr */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_destroy" );
+	} // pthread_rwlockattr_destroy
+
+	int pthread_rwlockattr_getpshared( __const pthread_rwlockattr_t * __restrict /* __attr */, int *__restrict /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_getpshared" );
+	} // pthread_rwlockattr_getpshared
+
+	int pthread_rwlockattr_setpshared( pthread_rwlockattr_t * /* __attr */, int /* __pshared */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_setpshared" );
+	} // pthread_rwlockattr_setpshared
+
+	int pthread_rwlockattr_getkind_np( __const pthread_rwlockattr_t * /* __attr */, int * /* __pref */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_getkind_np" );
+	} // pthread_rwlockattr_getkind_np
+
+	int pthread_rwlockattr_setkind_np( pthread_rwlockattr_t * /* __attr */, int /* __pref */ ) libcfa_public __THROW {
+		abort( "pthread_rwlockattr_setkind_np" );
+	} // pthread_rwlockattr_setkind_np
+
+// UNIX98 + XOPEN
+
+	int pthread_rwlock_timedrdlock( pthread_rwlock_t *__restrict  /* __rwlock */, __const struct timespec *__restrict /* __abstime */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_timedrdlock" );
+	} // pthread_rwlock_timedrdlock
+
+	int pthread_rwlock_timedwrlock( pthread_rwlock_t *__restrict  /* __rwlock */, __const struct timespec *__restrict /* __abstime */ ) libcfa_public __THROW {
+		abort( "pthread_rwlock_timedwrlock" );
+	} // pthread_rwlock_timedwrlock
+
+// GNU
+
+	//######################### Parallelism #########################
+
+	int pthread_setaffinity_np( pthread_t /* __th */, size_t /* __cpusetsize */, __const cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_setaffinity_np" );
+	} // pthread_setaffinity_np
+
+	int pthread_getaffinity_np( pthread_t /* __th */, size_t /* __cpusetsize */, cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_getaffinity_np" );
+	} // pthread_getaffinity_np
+
+	int pthread_attr_setaffinity_np( pthread_attr_t * /* __attr */, size_t /* __cpusetsize */, __const cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_attr_setaffinity_np" );
+	} // pthread_attr_setaffinity_np
+
+	int pthread_attr_getaffinity_np( __const pthread_attr_t * /* __attr */, size_t /* __cpusetsize */, cpu_set_t * /* __cpuset */ ) libcfa_public __THROW {
+		abort( "pthread_attr_getaffinity_np" );
+	} // pthread_attr_getaffinity_np
+
+	//######################### Cancellation #########################
+
+	void _pthread_cleanup_push_defer( struct _pthread_cleanup_buffer * /* __buffer */, void( * /* __routine */ )( void * ), void * /* __arg */ ) libcfa_public __THROW {
+		abort( "_pthread_cleanup_push_defer" );
+	} // _pthread_cleanup_push_defer
+
+	void _pthread_cleanup_pop_restore( struct _pthread_cleanup_buffer * /* __buffer */, int /* __execute */ ) libcfa_public __THROW {
+		abort( "_pthread_cleanup_pop_restore" );
+	} // _pthread_cleanup_pop_res
+
+	int pthread_cancel( pthread_t threadID ) libcfa_public __THROW {
+		abort("pthread cancel not implemented");
+		return 0;
+	} // pthread_cancel
+
+	int pthread_setcancelstate( int state, int *oldstate ) libcfa_public __THROW {
+		abort("pthread_setcancelstate not implemented");
+		return 0;
+	} // pthread_setcancelstate
+
+	int pthread_setcanceltype( int type, int *oldtype ) libcfa_public __THROW {
+		abort("pthread_setcanceltype not implemented");
+		return 0;
+	} // pthread_setcanceltype
+} // extern "C"
+
+#pragma GCC diagnostic pop
+
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/thread.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -50,4 +50,5 @@
 	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
 	#if defined( __CFA_WITH_VERIFY__ )
+		executing = 0p;
 		canary = 0x0D15EA5E0D15EA5Ep;
 	#endif
@@ -177,4 +178,50 @@
 
 //-----------------------------------------------------------------------------
+bool migrate( thread$ * thrd, struct cluster & cl ) {
+
+	monitor$ * tmon = get_monitor(thrd);
+	monitor$ * __monitors[] = { tmon };
+	monitor_guard_t __guard = { __monitors, 1 };
+
+
+	{
+		// if nothing needs to be done, return false
+		if( thrd->curr_cluster == &cl ) return false;
+
+		// are we migrating ourself?
+		const bool local = thrd == active_thread();
+
+		/* paranoid */ verify( !local || &cl != active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_processor()->cltr );
+		/* paranoid */ verify( local || tmon->signal_stack.top->owner->waiting_thread == thrd );
+		/* paranoid */ verify( local || tmon->signal_stack.top );
+
+		// make sure we aren't interrupted while doing this
+		// not as important if we aren't local
+		disable_interrupts();
+
+		// actually move the thread
+		unregister( thrd->curr_cluster, *thrd );
+		thrd->curr_cluster = &cl;
+		doregister( thrd->curr_cluster, *thrd );
+
+		// restore interrupts
+		enable_interrupts();
+
+		// if this is the local thread, we are still running on the old cluster
+		if(local) yield();
+
+		/* paranoid */ verify( !local || &cl == active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_cluster() );
+		/* paranoid */ verify( !local || thrd->curr_cluster == active_processor()->cltr );
+		/* paranoid */ verify(  local || tmon->signal_stack.top );
+		/* paranoid */ verify(  local || tmon->signal_stack.top->owner->waiting_thread == thrd );
+
+		return true;
+	}
+}
+
+//-----------------------------------------------------------------------------
 #define GENERATOR LCG
 
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/concurrency/thread.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -132,4 +132,12 @@
 
 //----------
+// misc
+bool migrate( thread$ * thrd, struct cluster & cl );
+
+forall( T & | is_thread(T) )
+static inline bool migrate( T & mutex thrd, struct cluster & cl ) { return migrate( &(thread&)thrd, cl ); }
+
+
+//----------
 // prng
 static inline {
Index: libcfa/src/containers/array.hfa
===================================================================
--- libcfa/src/containers/array.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/containers/array.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -1,2 +1,4 @@
+#pragma once
+
 #include <assert.h>
 
@@ -18,7 +20,12 @@
     // About the choice of integral types offered as subscript overloads:
     // Intent is to cover these use cases:
+    //    a[0]                                                // i : zero_t
+    //    a[1]                                                // i : one_t
+    //    a[2]                                                // i : int
     //    float foo( ptrdiff_t i ) { return a[i]; }           // i : ptrdiff_t
+    //    float foo( size_t i ) { return a[i]; }              // i : size_t
     //    forall( [N] ) ... for( i; N ) { total += a[i]; }    // i : typeof( sizeof(42) )
     //    for( i; 5 ) { total += a[i]; }                      // i : int
+    //
     // It gets complicated by:
     // -  CFA does overloading on concrete types, like int and unsigned int, not on typedefed
@@ -27,10 +34,29 @@
     // -  Given bug of Trac #247, CFA gives sizeof expressions type unsigned long int, when it
     //    should give them type size_t.
-    //    
-    //                          gcc -m32         cfa -m32 given bug         gcc -m64
+    //
+    //                          gcc -m32         cfa -m32 given bug         gcc -m64 (and cfa)
     // ptrdiff_t                int              int                        long int
     // size_t                   unsigned int     unsigned int               unsigned long int
     // typeof( sizeof(42) )     unsigned int     unsigned long int          unsigned long int
     // int                      int              int                        int
+    //
+    // So the solution must support types {zero_t, one_t, int, unsigned int, long int, unsigned long int}
+    //
+    // The solution cannot rely on implicit conversions (e.g. just have one overload for ptrdiff_t)
+    // because assertion satisfaction requires types to match exacly.  Both higher-dimensional
+    // subscripting and operations on slices use asserted subscript operators.  The test case
+    // array-container/array-sbscr-cases covers the combinations.  Mike beleives that commenting out
+    // any of the current overloads leads to one of those cases failing, either on 64- or 32-bit.
+    // Mike is open to being shown a smaller set of overloads that still passes the test.
+
+    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, zero_t ) {
+        assert( 0 < N );
+        return (Timmed &) a.strides[0];
+    }
+
+    static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, one_t ) {
+        assert( 1 < N );
+        return (Timmed &) a.strides[1];
+    }
 
     static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, int i ) {
@@ -39,4 +65,9 @@
     }
 
+    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, int i ) {
+        assert( i < N );
+        return (Timmed &) a.strides[i];
+    }
+
     static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, unsigned int i ) {
         assert( i < N );
@@ -44,4 +75,9 @@
     }
 
+    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, unsigned int i ) {
+        assert( i < N );
+        return (Timmed &) a.strides[i];
+    }
+
     static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, long int i ) {
         assert( i < N );
@@ -49,5 +85,15 @@
     }
 
+    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, long int i ) {
+        assert( i < N );
+        return (Timmed &) a.strides[i];
+    }
+
     static inline Timmed & ?[?]( arpk(N, S, Timmed, Tbase) & a, unsigned long int i ) {
+        assert( i < N );
+        return (Timmed &) a.strides[i];
+    }
+
+    static inline const Timmed & ?[?]( const arpk(N, S, Timmed, Tbase) & a, unsigned long int i ) {
         assert( i < N );
         return (Timmed &) a.strides[i];
@@ -57,4 +103,6 @@
         return N;
     }
+
+    static inline void __taglen( tag(arpk(N, S, Timmed, Tbase)), tag(N) ) {}
 
     // workaround #226 (and array relevance thereof demonstrated in mike102/otype-slow-ndims.cfa)
@@ -83,5 +131,5 @@
     // Make a FOREACH macro
     #define FE_0(WHAT)
-    #define FE_1(WHAT, X) WHAT(X) 
+    #define FE_1(WHAT, X) WHAT(X)
     #define FE_2(WHAT, X, ...) WHAT(X)FE_1(WHAT, __VA_ARGS__)
     #define FE_3(WHAT, X, ...) WHAT(X)FE_2(WHAT, __VA_ARGS__)
@@ -90,5 +138,5 @@
     //... repeat as needed
 
-    #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME 
+    #define GET_MACRO(_0,_1,_2,_3,_4,_5,NAME,...) NAME
     #define FOR_EACH(action,...) \
     GET_MACRO(_0,__VA_ARGS__,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)(action,__VA_ARGS__)
@@ -115,5 +163,5 @@
 }
 
-#else 
+#else
 
 // Workaround form.  Listing all possibilities up to 4 dims.
@@ -135,4 +183,11 @@
 
 #endif
+
+// Available for users to work around Trac #265
+// If `a[...0...]` isn't working, try `a[...ix0...]` instead.
+
+#define ix0 ((ptrdiff_t)0)
+
+
 
 //
@@ -155,5 +210,5 @@
 
 // Wrapper
-struct all_t {} all;
+extern struct all_t {} all;
 forall( [N], S & | sized(S), Te &, result &, Tbase & | { tag(result) enq_( tag(Tbase), tag(N), tag(S), tag(Te) ); } )
 static inline result & ?[?]( arpk(N, S, Te, Tbase) & this, all_t ) {
@@ -165,6 +220,24 @@
 //
 
-trait ar(A &, Tv &) {
-    Tv& ?[?]( A&, ptrdiff_t );
-    size_t ?`len( A& );
-};
+// desired:
+// trait ar(A &, Tv &, [N]) {
+//     Tv& ?[?]( A&, zero_t );
+//     Tv& ?[?]( A&, one_t  );
+//     Tv& ?[?]( A&, int    );
+//                   ...
+//     size_t ?`len( A& );
+//     void __taglen( tag(C), tag(N) );
+// };
+
+// working around N's not being accepted as arguments to traits
+
+#define ar(A, Tv, N) {                 \
+    Tv& ?[?]( A&, zero_t );            \
+    Tv& ?[?]( A&, one_t );             \
+    Tv& ?[?]( A&, int );               \
+    Tv& ?[?]( A&, unsigned int );      \
+    Tv& ?[?]( A&, long int );          \
+    Tv& ?[?]( A&, unsigned long int ); \
+    size_t ?`len( A& );                \
+    void __taglen( tag(A), tag(N) );   \
+}
Index: libcfa/src/containers/lockfree.hfa
===================================================================
--- libcfa/src/containers/lockfree.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ libcfa/src/containers/lockfree.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,250 @@
+#pragma once
+
+#include <assert.h>
+
+#include <stdint.h>
+#include <bits/defs.hfa>
+
+forall( T &) {
+	//------------------------------------------------------------
+	// Queue based on the MCS lock
+	// It is a Multi-Producer/Single-Consumer queue threads pushing
+	// elements must hold on to the elements they push
+	// Not appropriate for an async message queue for example,
+	struct mcs_queue {
+		T * volatile tail;
+	};
+
+	static inline void ?{}(mcs_queue(T) & this) { this.tail = 0p; }
+	static inline bool empty(const mcs_queue(T) & this) { return !this.tail; }
+
+ 	static inline forall(| { T * volatile & ?`next ( T * ); })
+	{
+		// Adds an element to the list
+		// Multi-Thread Safe, Lock-Free
+		T * push(mcs_queue(T) & this, T * elem) __attribute__((artificial));
+		T * push(mcs_queue(T) & this, T * elem) {
+			/* paranoid */ verify(!(elem`next));
+			// Race to add to the tail
+			T * prev = __atomic_exchange_n(&this.tail, elem, __ATOMIC_SEQ_CST);
+			// If we aren't the first, we need to tell the person before us
+			// No need to
+			if (prev) prev`next = elem;
+			return prev;
+		}
+
+		// Advances the head of the list, dropping the element given.
+		// Passing an element that is not the head is undefined behavior
+		// NOT Multi-Thread Safe, concurrent pushes are safe
+		T * advance(mcs_queue(T) & this, T * elem) __attribute__((artificial));
+		T * advance(mcs_queue(T) & this, T * elem) {
+			T * expected = elem;
+			// Check if this is already the last item
+			if (__atomic_compare_exchange_n(&this.tail, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return 0p;
+
+			// If not wait for next item to show-up, filled by push
+			while (!(elem`next)) Pause();
+
+			// we need to return if the next link was empty
+			T * ret = elem`next;
+
+			// invalidate link to reset to initial state
+			elem`next = 0p;
+			return ret;
+		}
+	}
+
+	//------------------------------------------------------------
+	// Queue based on the MCS lock
+	// Extension of the above lock which supports 'blind' pops.
+	// i.e., popping a value from the head without knowing what the head is
+	// has no extra guarantees beyond the mcs_queue
+	struct mpsc_queue {
+		inline mcs_queue(T);
+		T * volatile head;
+	};
+
+	static inline void ?{}(mpsc_queue(T) & this) {
+		((mcs_queue(T)&)this){};
+		this.head = 0p;
+	}
+
+	static inline forall(| { T * volatile & ?`next ( T * ); })
+	{
+		// Added a new element to the queue
+		// Multi-Thread Safe, Lock-Free
+		T * push(mpsc_queue(T) & this, T * elem) __attribute__((artificial));
+		T * push(mpsc_queue(T) & this, T * elem) {
+			T * prev = push((mcs_queue(T)&)this, elem);
+			if (!prev) this.head = elem;
+			return prev;
+		}
+
+		// Pop an element from the queue
+		// return the element that was removed
+		// next is set to the new head of the queue
+		// NOT Multi-Thread Safe
+		T * pop(mpsc_queue(T) & this, T *& next) __attribute__((artificial));
+		T * pop(mpsc_queue(T) & this, T *& next) {
+			T * elem = this.head;
+			// If head is empty just return
+			if (!elem) return 0p;
+
+			// If there is already someone in the list, then it's easy
+			if (elem`next) {
+				this.head = next = elem`next;
+				// force memory sync
+				__atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+				// invalidate link to reset to initial state
+				elem`next = 0p;
+			}
+			// Otherwise, there might be a race where it only looks but someone is enqueuing
+			else {
+				// null out head here, because we linearize with push
+				// at the CAS in advance and therefore can write to head
+				// after that point, it could overwrite the write in push
+				this.head = 0p;
+				next = advance((mcs_queue(T)&)this, elem);
+
+				// Only write to the head if there is a next element
+				// it is the only way we can guarantee we are not overwriting
+				// a write made in push
+				if (next) this.head = next;
+			}
+
+			// return removed element
+			return elem;
+		}
+
+		// Same as previous function
+		T * pop(mpsc_queue(T) & this) {
+			T * _ = 0p;
+			return pop(this, _);
+		}
+	}
+
+	//------------------------------------------------------------
+	// Queue based on the MCS lock with poisoning
+	// It is a Multi-Producer/Single-Consumer queue threads pushing
+	// elements must hold on to the elements they push
+	// Not appropriate for an async message queue for example
+	// poisoning the queue prevents any new elements from being push
+	// enum(void*) poison_state {
+	// 	EMPTY = 0p,
+	// 	POISON = 1p,
+	// 	IN_PROGRESS = 1p
+	// };
+
+	struct poison_list {
+		T * volatile head;
+	};
+
+	static inline void ?{}(poison_list(T) & this) { this.head = 0p; }
+
+ 	static inline forall(| { T * volatile & ?`next ( T * ); })
+	{
+		// Adds an element to the list
+		// Multi-Thread Safe, Lock-Free
+		T * push(poison_list(T) & this, T * elem) __attribute__((artificial));
+		T * push(poison_list(T) & this, T * elem) {
+			/* paranoid */ verify(0p == (elem`next));
+			__atomic_store_n( &elem`next, (T*)1p, __ATOMIC_RELAXED );
+
+			// read the head up-front
+			T * expected = this.head;
+			for() {
+				// check if it's poisoned
+				if(expected == 1p) return 0p;
+
+				// try to CAS the elem in
+				if(__atomic_compare_exchange_n(&this.head, &expected, elem, true, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED)) {
+					// We managed to exchange in, we are done
+
+					// We should never succeed the CAS if it's poisonned.
+					/* paranoid */ verify( expected != 1p );
+
+					// If we aren't the first, we need to tell the person before us
+					// No need to
+					elem`next = expected;
+					return expected;
+				}
+			}
+		}
+
+		// Advances the head of the list, dropping the element given.
+		// Passing an element that is not the head is undefined behavior
+		// NOT Multi-Thread Safe, concurrent pushes are safe
+		T * advance(T * elem) __attribute__((artificial));
+		T * advance(T * elem) {
+			T * ret;
+
+			// Wait for next item to show-up, filled by push
+			while (1p == (ret = __atomic_load_n(&elem`next, __ATOMIC_RELAXED))) Pause();
+
+			return ret;
+		}
+
+		// Poison the queue, preveting new pushes and returning the head
+		T * poison(poison_list(T) & this) __attribute__((artificial));
+		T * poison(poison_list(T) & this) {
+			T * ret = __atomic_exchange_n( &this.head, (T*)1p, __ATOMIC_SEQ_CST );
+			/* paranoid */ verify( ret != (T*)1p );
+			return ret;
+		}
+	}
+}
+
+forall( T & )
+union Link {
+	struct {											// 32/64-bit x 2
+		T * volatile top;								// pointer to stack top
+		uintptr_t count;								// count each push
+	};
+	#if __SIZEOF_INT128__ == 16
+	__int128											// gcc, 128-bit integer
+	#else
+	uint64_t											// 64-bit integer
+	#endif // __SIZEOF_INT128__ == 16
+	atom;
+}; // Link
+
+forall( T | sized(T) | { Link(T) * ?`next( T * ); } ) {
+	struct StackLF {
+		Link(T) stack;
+	}; // StackLF
+
+	static inline {
+		void ?{}( StackLF(T) & this ) with(this) { stack.atom = 0; }
+
+		T * top( StackLF(T) & this ) with(this) { return stack.top; }
+
+		void push( StackLF(T) & this, T & n ) with(this) {
+			*( &n )`next = stack;						// atomic assignment unnecessary, or use CAA
+			for () {									// busy wait
+			  if ( __atomic_compare_exchange_n( &stack.atom, &( &n )`next->atom, (Link(T))@{ {&n, ( &n )`next->count + 1} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) break; // attempt to update top node
+			} // for
+		} // push
+
+		T * pop( StackLF(T) & this ) with(this) {
+			Link(T) t @= stack;							// atomic assignment unnecessary, or use CAA
+			for () {									// busy wait
+			  if ( t.top == 0p ) return 0p;				// empty stack ?
+			  if ( __atomic_compare_exchange_n( &stack.atom, &t.atom, (Link(T))@{ {( t.top )`next->top, t.count} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) return t.top; // attempt to update top node
+			} // for
+		} // pop
+
+		bool unsafe_remove( StackLF(T) & this, T * node ) with(this) {
+			Link(T) * link = &stack;
+			for() {
+				T * next = link->top;
+				if( next == node ) {
+					link->top = ( node )`next->top;
+					return true;
+				}
+				if( next == 0p ) return false;
+				link = ( next )`next;
+			}
+		}
+	} // distribution
+} // distribution
Index: bcfa/src/containers/queueLockFree.hfa
===================================================================
--- libcfa/src/containers/queueLockFree.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ 	(revision )
@@ -1,125 +1,0 @@
-#pragma once
-
-#include <assert.h>
-
-#include <bits/defs.hfa>
-
-forall( T &) {
-	//------------------------------------------------------------
-	// Queue based on the MCS lock
-	// It is a Multi-Producer/Single-Consumer queue threads pushing
-	// elements must hold on to the elements they push
-	// Not appropriate for an async message queue for example,
-	struct mcs_queue {
-		T * volatile tail;
-	};
-
-	static inline void ?{}(mcs_queue(T) & this) { this.tail = 0p; }
-	static inline bool empty(const mcs_queue(T) & this) { return !this.tail; }
-
- 	static inline forall(| { T * volatile & ?`next ( T * ); })
-	{
-		// Adds an element to the list
-		// Multi-Thread Safe, Lock-Free
-		T * push(mcs_queue(T) & this, T * elem) __attribute__((artificial));
-		T * push(mcs_queue(T) & this, T * elem) {
-			/* paranoid */ verify(!(elem`next));
-			// Race to add to the tail
-			T * prev = __atomic_exchange_n(&this.tail, elem, __ATOMIC_SEQ_CST);
-			// If we aren't the first, we need to tell the person before us
-			// No need to
-			if (prev) prev`next = elem;
-			return prev;
-		}
-
-		// Advances the head of the list, dropping the element given.
-		// Passing an element that is not the head is undefined behavior
-		// NOT Multi-Thread Safe, concurrent pushes are safe
-		T * advance(mcs_queue(T) & this, T * elem) __attribute__((artificial));
-		T * advance(mcs_queue(T) & this, T * elem) {
-			T * expected = elem;
-			// Check if this is already the last item
-			if (__atomic_compare_exchange_n(&this.tail, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return 0p;
-
-			// If not wait for next item to show-up, filled by push
-			while (!(elem`next)) Pause();
-
-			// we need to return if the next link was empty
-			T * ret = elem`next;
-
-			// invalidate link to reset to initial state
-			elem`next = 0p;
-			return ret;
-		}
-	}
-
-	//------------------------------------------------------------
-	// Queue based on the MCS lock
-	// Extension of the above lock which supports 'blind' pops.
-	// i.e., popping a value from the head without knowing what the head is
-	// has no extra guarantees beyond the mcs_queue
-	struct mpsc_queue {
-		inline mcs_queue(T);
-		T * volatile head;
-	};
-
-	static inline void ?{}(mpsc_queue(T) & this) {
-		((mcs_queue(T)&)this){};
-		this.head = 0p;
-	}
-
-	static inline forall(| { T * volatile & ?`next ( T * ); })
-	{
-		// Added a new element to the queue
-		// Multi-Thread Safe, Lock-Free
-		T * push(mpsc_queue(T) & this, T * elem) __attribute__((artificial));
-		T * push(mpsc_queue(T) & this, T * elem) {
-			T * prev = push((mcs_queue(T)&)this, elem);
-			if (!prev) this.head = elem;
-			return prev;
-		}
-
-		// Pop an element from the queue
-		// return the element that was removed
-		// next is set to the new head of the queue
-		// NOT Multi-Thread Safe
-		T * pop(mpsc_queue(T) & this, T *& next) __attribute__((artificial));
-		T * pop(mpsc_queue(T) & this, T *& next) {
-			T * elem = this.head;
-			// If head is empty just return
-			if (!elem) return 0p;
-
-			// If there is already someone in the list, then it's easy
-			if (elem`next) {
-				this.head = next = elem`next;
-				// force memory sync
-				__atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-				// invalidate link to reset to initial state
-				elem`next = 0p;
-			}
-			// Otherwise, there might be a race where it only looks but someone is enqueuing
-			else {
-				// null out head here, because we linearize with push
-				// at the CAS in advance and therefore can write to head
-				// after that point, it could overwrite the write in push
-				this.head = 0p;
-				next = advance((mcs_queue(T)&)this, elem);
-
-				// Only write to the head if there is a next element
-				// it is the only way we can guarantee we are not overwriting
-				// a write made in push
-				if (next) this.head = next;
-			}
-
-			// return removed element
-			return elem;
-		}
-
-		// Same as previous function
-		T * pop(mpsc_queue(T) & this) {
-			T * _ = 0p;
-			return pop(this, _);
-		}
-	}
-}
Index: bcfa/src/containers/stackLockFree.hfa
===================================================================
--- libcfa/src/containers/stackLockFree.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ 	(revision )
@@ -1,76 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// stackLockFree.hfa --
-//
-// Author           : Peter A. Buhr
-// Created On       : Wed May 13 20:58:58 2020
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Jan 20 20:40:03 2021
-// Update Count     : 67
-//
-
-#pragma once
-
-#include <stdint.h>
-
-forall( T & )
-union Link {
-	struct {											// 32/64-bit x 2
-		T * volatile top;								// pointer to stack top
-		uintptr_t count;								// count each push
-	};
-	#if __SIZEOF_INT128__ == 16
-	__int128											// gcc, 128-bit integer
-	#else
-	uint64_t											// 64-bit integer
-	#endif // __SIZEOF_INT128__ == 16
-	atom;
-}; // Link
-
-forall( T | sized(T) | { Link(T) * ?`next( T * ); } ) {
-	struct StackLF {
-		Link(T) stack;
-	}; // StackLF
-
-	static inline {
-		void ?{}( StackLF(T) & this ) with(this) { stack.atom = 0; }
-
-		T * top( StackLF(T) & this ) with(this) { return stack.top; }
-
-		void push( StackLF(T) & this, T & n ) with(this) {
-			*( &n )`next = stack;						// atomic assignment unnecessary, or use CAA
-			for () {									// busy wait
-			  if ( __atomic_compare_exchange_n( &stack.atom, &( &n )`next->atom, (Link(T))@{ {&n, ( &n )`next->count + 1} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) break; // attempt to update top node
-			} // for
-		} // push
-
-		T * pop( StackLF(T) & this ) with(this) {
-			Link(T) t @= stack;							// atomic assignment unnecessary, or use CAA
-			for () {									// busy wait
-			  if ( t.top == 0p ) return 0p;				// empty stack ?
-			  if ( __atomic_compare_exchange_n( &stack.atom, &t.atom, (Link(T))@{ {( t.top )`next->top, t.count} }.atom, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) ) return t.top; // attempt to update top node
-			} // for
-		} // pop
-
-		bool unsafe_remove( StackLF(T) & this, T * node ) with(this) {
-			Link(T) * link = &stack;
-			for() {
-				T * next = link->top;
-				if( next == node ) {
-					link->top = ( node )`next->top;
-					return true;
-				}
-				if( next == 0p ) return false;
-				link = ( next )`next;
-			}
-		}
-	} // distribution
-} // distribution
-
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: libcfa/src/device/cpu.cfa
===================================================================
--- libcfa/src/device/cpu.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/device/cpu.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -359,17 +359,32 @@
 		int idxs = count_cache_indexes();
 
+		// Do we actually have a cache?
+		if(idxs == 0) {
+			// if not just fake the data structure, it makes things easier.
+			cpu_info.hthrd_count = cpus_c;
+			cpu_info.llc_count = 0;
+			struct cpu_map_entry_t * entries = alloc(cpu_info.hthrd_count);
+			for(i; cpu_info.hthrd_count) {
+				entries[i].self  = i;
+				entries[i].start = 0;
+				entries[i].count = cpu_info.hthrd_count;
+				entries[i].cache = 0;
+			}
+			cpu_info.llc_map = entries;
+			return;
+		}
+
 		// Count actual cache levels
 		unsigned cache_levels = 0;
 		unsigned llc = 0;
-		if (idxs != 0) {
-			unsigned char prev = -1u;
-			void first(unsigned idx, unsigned char level, const char * map, size_t len) {
-				/* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
-				llc = max(llc, level);
-				prev = level;
-				cache_levels++;
-			}
-			foreach_cacheidx(0, idxs, first);
-		}
+
+		unsigned char prev = -1u;
+		void first(unsigned idx, unsigned char level, const char * map, size_t len) {
+			/* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
+			llc = max(llc, level);
+			prev = level;
+			cache_levels++;
+		}
+		foreach_cacheidx(0, idxs, first);
 
 		// Read in raw data
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/heap.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -211,5 +211,5 @@
 #if BUCKETLOCK == SPINLOCK
 #elif BUCKETLOCK == LOCKFREE
-#include <stackLockFree.hfa>
+#include <containers/lockfree.hfa>
 #else
 	#error undefined lock type for bucket lock
@@ -505,5 +505,5 @@
 				freeLists[j].blockSize = bucketSizes[j];
 			} // for
-	
+
 			heapBuffer = 0p;
 			heapReserve = 0;
Index: libcfa/src/interpose.cfa
===================================================================
--- libcfa/src/interpose.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/interpose.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -42,8 +42,30 @@
 
 typedef void (* generic_fptr_t)(void);
+static generic_fptr_t do_interpose_symbol( void * library, const char symbol[], const char version[] ) {
+	const char * error;
+
+	union { generic_fptr_t fptr; void * ptr; } originalFunc;
+
+	#if defined( _GNU_SOURCE )
+		if ( version ) {
+			originalFunc.ptr = dlvsym( library, symbol, version );
+		} else {
+			originalFunc.ptr = dlsym( library, symbol );
+		}
+	#else
+		originalFunc.ptr = dlsym( library, symbol );
+	#endif // _GNU_SOURCE
+
+	error = dlerror();
+	if ( error ) abort( "interpose_symbol : internal error, %s\n", error );
+
+	return originalFunc.fptr;
+}
+
 static generic_fptr_t interpose_symbol( const char symbol[], const char version[] ) {
 	const char * error;
 
 	static void * library;
+	static void * pthread_library;
 	if ( ! library ) {
 		#if defined( RTLD_NEXT )
@@ -58,21 +80,18 @@
 		#endif
 	} // if
-
-	union { generic_fptr_t fptr; void * ptr; } originalFunc;
-
-	#if defined( _GNU_SOURCE )
-		if ( version ) {
-			originalFunc.ptr = dlvsym( library, symbol, version );
-		} else {
-			originalFunc.ptr = dlsym( library, symbol );
-		}
-	#else
-		originalFunc.ptr = dlsym( library, symbol );
-	#endif // _GNU_SOURCE
-
-	error = dlerror();
-	if ( error ) abort( "interpose_symbol : internal error, %s\n", error );
-
-	return originalFunc.fptr;
+	if ( ! pthread_library ) {
+		#if defined( RTLD_NEXT )
+			pthread_library = RTLD_NEXT;
+		#else
+			// missing RTLD_NEXT => must hard-code library name, assuming libstdc++
+			pthread_library = dlopen( "libpthread.so", RTLD_LAZY );
+			error = dlerror();
+			if ( error ) {
+				abort( "interpose_symbol : failed to open libpthread, %s\n", error );
+			}
+		#endif
+	} // if
+
+	return do_interpose_symbol(library, symbol, version);
 }
 
@@ -97,4 +116,5 @@
 
 extern "C" {
+	void __cfathreadabi_interpose_startup( generic_fptr_t (*do_interpose_symbol)( void * library, const char symbol[], const char version[] ) ) __attribute__((weak));
 	void __cfaabi_interpose_startup( void ) {
 		const char *version = 0p;
@@ -108,4 +128,6 @@
 		INTERPOSE_LIBC( exit , version );
 #pragma GCC diagnostic pop
+
+		if(__cfathreadabi_interpose_startup) __cfathreadabi_interpose_startup( do_interpose_symbol );
 
 		// As a precaution (and necessity), errors that result in termination are delivered on a separate stack because
Index: libcfa/src/interpose_thread.cfa
===================================================================
--- libcfa/src/interpose_thread.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ libcfa/src/interpose_thread.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,137 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2022 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// interpose_thread.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Wed Sep 21 11:55:16 2022
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <stdarg.h>										// va_start, va_end
+#include <stdio.h>
+#include <string.h>										// strlen
+#include <signal.h>
+#include <pthread.h>
+extern "C" {
+#include <dlfcn.h>										// dlopen, dlsym
+#include <execinfo.h>									// backtrace, messages
+}
+
+#include "bits/debug.hfa"
+#include "bits/defs.hfa"
+#include <assert.h>
+
+//=============================================================================================
+// Interposing helpers
+//=============================================================================================
+
+typedef void (* generic_fptr_t)(void);
+
+generic_fptr_t interpose_symbol(
+	generic_fptr_t (*do_interpose_symbol)( void * library, const char symbol[], const char version[] ),
+	const char symbol[],
+	const char version[]
+) libcfa_public {
+	const char * error;
+
+	static void * library;
+	if ( ! library ) {
+		#if defined( RTLD_NEXT )
+			library = RTLD_NEXT;
+		#else
+			// missing RTLD_NEXT => must hard-code library name, assuming libstdc++
+			library = dlopen( "libpthread.so", RTLD_LAZY );
+			error = dlerror();
+			if ( error ) {
+				abort( "interpose_symbol : failed to open libpthread, %s\n", error );
+			}
+		#endif
+	} // if
+
+	return do_interpose_symbol(library, symbol, version);
+}
+
+#define INTERPOSE( x, ver ) __cabi_libpthread.x = (typeof(__cabi_libpthread.x))interpose_symbol( do_interpose_symbol, #x, ver )
+
+//=============================================================================================
+// Interposition Startup logic
+//=============================================================================================
+
+static struct {
+	int (*pthread_create)(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
+	int (*pthread_join)(pthread_t _thread, void **retval);
+	pthread_t (*pthread_self)(void);
+	int (*pthread_attr_init)(pthread_attr_t *attr);
+	int (*pthread_attr_destroy)(pthread_attr_t *attr);
+	int (*pthread_attr_setstack)( pthread_attr_t *attr, void *stackaddr, size_t stacksize );
+	int (*pthread_attr_getstacksize)( const pthread_attr_t *attr, size_t *stacksize );
+	int (*pthread_sigmask)(int how, const sigset_t *set, sigset_t *oldset);
+	int (*pthread_sigqueue)(pthread_t _thread, int sig, const union sigval value);
+	int (*pthread_once)(pthread_once_t *once_control, void (*init_routine)(void));
+} __cabi_libpthread;
+
+extern "C" {
+	void __cfathreadabi_interpose_startup( generic_fptr_t (*do_interpose_symbol)( void * library, const char symbol[], const char version[] ) ) libcfa_public {
+		const char *version = 0p;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdiscarded-qualifiers"
+		INTERPOSE( pthread_create , version );
+		INTERPOSE( pthread_join , version );
+		INTERPOSE( pthread_self , version );
+		INTERPOSE( pthread_attr_init , version );
+		INTERPOSE( pthread_attr_destroy , version );
+		INTERPOSE( pthread_attr_setstack , version );
+		INTERPOSE( pthread_attr_getstacksize , version );
+		INTERPOSE( pthread_sigmask , version );
+		INTERPOSE( pthread_sigqueue , version );
+		INTERPOSE( pthread_once , version );
+#pragma GCC diagnostic pop
+	}
+
+	int __cfaabi_pthread_create(pthread_t *_thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg){
+		return __cabi_libpthread.pthread_create(_thread, attr, start_routine, arg);
+	}
+
+	int __cfaabi_pthread_join(pthread_t _thread, void **retval){
+		return __cabi_libpthread.pthread_join(_thread, retval);
+	}
+
+	pthread_t __cfaabi_pthread_self(void){
+		return __cabi_libpthread.pthread_self();
+	}
+
+	int __cfaabi_pthread_attr_init(pthread_attr_t *attr){
+		return __cabi_libpthread.pthread_attr_init(attr);
+	}
+
+	int __cfaabi_pthread_attr_destroy(pthread_attr_t *attr){
+		return __cabi_libpthread.pthread_attr_destroy(attr);
+	}
+
+	int __cfaabi_pthread_attr_setstack( pthread_attr_t *attr, void *stackaddr, size_t stacksize ){
+		return __cabi_libpthread.pthread_attr_setstack(attr, stackaddr, stacksize);
+	}
+
+	int read_pthread_attr_getstacksize( const pthread_attr_t *attr, size_t *stacksize ){
+		return __cabi_libpthread.pthread_attr_getstacksize(attr, stacksize);
+	}
+
+	int __cfaabi_pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset){
+		return __cabi_libpthread.pthread_sigmask(how, set, oldset);
+	}
+
+	int __cfaabi_pthread_sigqueue(pthread_t _thread, int sig, const union sigval value) {
+		return __cabi_libpthread.pthread_sigqueue(_thread, sig, value);
+	}
+
+	int __cfaabi_pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) {
+		return __cabi_libpthread.pthread_once(once_control, init_routine);
+	}
+}
Index: libcfa/src/parseargs.cfa
===================================================================
--- libcfa/src/parseargs.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/parseargs.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -50,10 +50,12 @@
 extern char ** cfa_args_envp __attribute__((weak));
 
-static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * usage, FILE * out)  __attribute__ ((noreturn));
+forall([N])
+static void usage(char * cmd, const array( cfa_option, N ) & options, const char * usage, FILE * out)  __attribute__ ((noreturn));
 //-----------------------------------------------------------------------------
 // checking
-static void check_args(cfa_option options[], size_t opt_count) {
-	for(i; opt_count) {
-		for(j; opt_count) {
+forall([N])
+static void check_args( const array( cfa_option, N ) & options ) {
+	for(i; N) {
+		for(j; N) {
 			if(i == j) continue;
 
@@ -70,94 +72,101 @@
 //-----------------------------------------------------------------------------
 // Parsing args
-void parse_args( cfa_option options[], size_t opt_count, const char * usage, char ** & left ) {
-	if( 0p != &cfa_args_argc ) {
-		parse_args(cfa_args_argc, cfa_args_argv, options, opt_count, usage, left );
-	}
-	else {
-		char * temp = "";
-		parse_args(0, &temp, options, opt_count, usage, left );
-	}
-}
-
-void parse_args(
-	int argc,
-	char * argv[],
-	cfa_option options[],
-	size_t opt_count,
-	const char * usage,
-	char ** & left
-) {
-	check_args(options, opt_count);
-
-	int maxv = 'h';
-	char optstring[(opt_count * 3) + 2] = { '\0' };
-	{
-		int idx = 0;
-		for(i; opt_count) {
-			if (options[i].short_name) {
-				maxv = max(options[i].short_name, maxv);
-				optstring[idx] = options[i].short_name;
-				idx++;
-				if(    ((intptr_t)options[i].parse) != ((intptr_t)parse_settrue)
-				&& ((intptr_t)options[i].parse) != ((intptr_t)parse_setfalse) ) {
-					optstring[idx] = ':';
+forall([opt_count]) {
+	void parse_args( const array( cfa_option, opt_count ) & options, const char * usage, char ** & left ) {
+		if( 0p != &cfa_args_argc ) {
+			parse_args(cfa_args_argc, cfa_args_argv, options, usage, left );
+		}
+		else {
+			char * temp = "";
+			parse_args(0, &temp, options, usage, left );
+		}
+	}
+
+	void parse_args(
+		int argc,
+		char * argv[],
+		const array( cfa_option, opt_count ) & options,
+		const char * usage,
+		char ** & left
+	) {
+		check_args(options);
+
+		int maxv = 'h';
+		char optstring[(opt_count * 3) + 2] = { '\0' };
+		{
+			int idx = 0;
+			for(i; opt_count) {
+				if (options[i].short_name) {
+					maxv = max(options[i].short_name, maxv);
+					optstring[idx] = options[i].short_name;
+					idx++;
+					if(    ((intptr_t)options[i].parse) != ((intptr_t)parse_settrue)
+					&& ((intptr_t)options[i].parse) != ((intptr_t)parse_setfalse) ) {
+						optstring[idx] = ':';
+						idx++;
+					}
+				}
+			}
+			optstring[idx+0] = 'h';
+			optstring[idx+1] = '\0';
+		}
+
+		struct option optarr[opt_count + 2];
+		{
+			int idx = 0;
+			for(i; opt_count) {
+				if(options[i].long_name) {
+					// we don't have the mutable keyword here, which is really what we would want
+					int & val_ref = (int &)(const int &)options[i].val;
+					val_ref = (options[i].short_name != '\0') ? ((int)options[i].short_name) : ++maxv;
+
+					optarr[idx].name = options[i].long_name;
+					optarr[idx].flag = 0p;
+					optarr[idx].val  = options[i].val;
+					if(    ((intptr_t)options[i].parse) == ((intptr_t)parse_settrue)
+					|| ((intptr_t)options[i].parse) == ((intptr_t)parse_setfalse) ) {
+						optarr[idx].has_arg = no_argument;
+					} else {
+						optarr[idx].has_arg = required_argument;
+					}
 					idx++;
 				}
 			}
-		}
-		optstring[idx+0] = 'h';
-		optstring[idx+1] = '\0';
-	}
-
-	struct option optarr[opt_count + 2];
-	{
-		int idx = 0;
-		for(i; opt_count) {
-			if(options[i].long_name) {
-				options[i].val = (options[i].short_name != '\0') ? ((int)options[i].short_name) : ++maxv;
-				optarr[idx].name = options[i].long_name;
-				optarr[idx].flag = 0p;
-				optarr[idx].val  = options[i].val;
-				if(    ((intptr_t)options[i].parse) == ((intptr_t)parse_settrue)
-				    || ((intptr_t)options[i].parse) == ((intptr_t)parse_setfalse) ) {
-					optarr[idx].has_arg = no_argument;
-				} else {
-					optarr[idx].has_arg = required_argument;
-				}
-				idx++;
+			optarr[idx+0].[name, has_arg, flag, val] = ["help", no_argument, 0, 'h'];
+			optarr[idx+1].[name, has_arg, flag, val] = [0, no_argument, 0, 0];
+		}
+
+		FILE * out = stderr;
+		NEXT_ARG:
+		for() {
+			int idx = 0;
+			int opt = getopt_long(argc, argv, optstring, optarr, &idx);
+			switch(opt) {
+				case -1:
+					if(&left != 0p) left = argv + optind;
+					return;
+				case 'h':
+					out = stdout;
+				case '?':
+					usage(argv[0], options, usage, out);
+				default:
+					for(i; opt_count) {
+						if(opt == options[i].val) {
+							const char * arg = optarg ? optarg : "";
+							if( arg[0] == '=' ) { arg++; }
+							// work around for some weird bug
+							void * variable = options[i].variable;
+							bool (*parse_func)(const char *, void * ) = options[i].parse;
+							bool success = parse_func( arg, variable );
+							if(success) continue NEXT_ARG;
+
+							fprintf(out, "Argument '%s' for option %c could not be parsed\n\n", arg, (char)opt);
+							usage(argv[0], options, usage, out);
+						}
+					}
+					abort("Internal parse arg error\n");
 			}
-		}
-		optarr[idx+0].[name, has_arg, flag, val] = ["help", no_argument, 0, 'h'];
-		optarr[idx+1].[name, has_arg, flag, val] = [0, no_argument, 0, 0];
-	}
-
-	FILE * out = stderr;
-	NEXT_ARG:
-	for() {
-		int idx = 0;
-		int opt = getopt_long(argc, argv, optstring, optarr, &idx);
-		switch(opt) {
-			case -1:
-				if(&left != 0p) left = argv + optind;
-				return;
-			case 'h':
-				out = stdout;
-			case '?':
-				usage(argv[0], options, opt_count, usage, out);
-			default:
-				for(i; opt_count) {
-					if(opt == options[i].val) {
-						const char * arg = optarg ? optarg : "";
-						if( arg[0] == '=' ) { arg++; }
-						bool success = options[i].parse( arg, options[i].variable );
-						if(success) continue NEXT_ARG;
-
-						fprintf(out, "Argument '%s' for option %c could not be parsed\n\n", arg, (char)opt);
-						usage(argv[0], options, opt_count, usage, out);
-					}
-				}
-				abort("Internal parse arg error\n");
-		}
-
+
+		}
 	}
 }
@@ -222,15 +231,28 @@
 
 void print_args_usage(cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn)) {
-	usage(cfa_args_argv[0], options, opt_count, usage, error ? stderr : stdout);
+	const array( cfa_option, opt_count ) & arr = (const array( cfa_option, opt_count ) &) *options;
+	usage(cfa_args_argv[0], arr, usage, error ? stderr : stdout);
 }
 
 void print_args_usage(int , char * argv[], cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn)) {
-	usage(argv[0], options, opt_count, usage, error ? stderr : stdout);
-}
-
-static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * help, FILE * out) __attribute__((noreturn)) {
+	const array( cfa_option, opt_count ) & arr = (const array( cfa_option, opt_count ) &) *options;
+	usage(argv[0], arr, usage, error ? stderr : stdout);
+}
+
+forall( [N] ) {
+	void print_args_usage( const array(cfa_option, N ) & options, const char * usage, bool error) {
+		usage(cfa_args_argv[0], options, usage, error ? stderr : stdout);
+	}
+
+	void print_args_usage(int argc, char * argv[], const array( cfa_option, N ) & options, const char * usage, bool error) {
+		usage(argv[0], options, usage, error ? stderr : stdout);
+	}
+}
+
+forall([N])
+static void usage(char * cmd, const array( cfa_option, N ) & options, const char * help, FILE * out) __attribute__((noreturn)) {
 	int width = 0;
 	{
-		for(i; opt_count) {
+		for(i; N) {
 			if(options[i].long_name) {
 				int w = strlen(options[i].long_name);
@@ -251,5 +273,5 @@
 	fprintf(out, "Usage:\n  %s %s\n", cmd, help);
 
-	for(i; opt_count) {
+	for(i; N) {
 		printopt(out, width, max_width, options[i].short_name, options[i].long_name, options[i].help);
 	}
Index: libcfa/src/parseargs.hfa
===================================================================
--- libcfa/src/parseargs.hfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ libcfa/src/parseargs.hfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -16,11 +16,13 @@
 #pragma once
 
+#include <array.hfa>
+
 struct cfa_option {
-      int val; // reserved
-      char short_name;
-      const char * long_name;
-      const char * help;
-      void * variable;
-      bool (*parse)(const char *, void * );
+	int val; // reserved
+	char short_name;
+	const char * long_name;
+	const char * help;
+	void * variable;
+	bool (*parse)(const char *, void * );
 };
 
@@ -31,20 +33,20 @@
 forall(T & | { bool parse(const char *, T & ); })
 static inline void ?{}( cfa_option & this, char short_name, const char * long_name, const char * help, T & variable ) {
-      this.val        = 0;
-      this.short_name = short_name;
-      this.long_name  = long_name;
-      this.help       = help;
-      this.variable   = (void*)&variable;
-      this.parse      = (bool (*)(const char *, void * ))parse;
+	this.val        = 0;
+	this.short_name = short_name;
+	this.long_name  = long_name;
+	this.help       = help;
+	this.variable   = (void*)&variable;
+	this.parse      = (bool (*)(const char *, void * ))parse;
 }
 
 forall(T &)
 static inline void ?{}( cfa_option & this, char short_name, const char * long_name, const char * help, T & variable, bool (*parse)(const char *, T & )) {
-      this.val        = 0;
-      this.short_name = short_name;
-      this.long_name  = long_name;
-      this.help       = help;
-      this.variable   = (void*)&variable;
-      this.parse      = (bool (*)(const char *, void * ))parse;
+	this.val        = 0;
+	this.short_name = short_name;
+	this.long_name  = long_name;
+	this.help       = help;
+	this.variable   = (void*)&variable;
+	this.parse      = (bool (*)(const char *, void * ))parse;
 }
 
@@ -52,6 +54,16 @@
 void parse_args( int argc, char * argv[], cfa_option options[], size_t opt_count, const char * usage, char ** & left );
 
+forall( [N] ) {
+	void parse_args( const array( cfa_option, N ) & options, const char * usage, char ** & left );
+	void parse_args( int argc, char * argv[], const array( cfa_option, N ) & options, const char * usage, char ** & left );
+}
+
 void print_args_usage(cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn));
 void print_args_usage(int argc, char * argv[], cfa_option options[], size_t opt_count, const char * usage, bool error)  __attribute__ ((noreturn));
+
+forall( [N] ) {
+	void print_args_usage( const array(cfa_option, N ) & options, const char * usage, bool error)  __attribute__ ((noreturn));
+	void print_args_usage(int argc, char * argv[], const array( cfa_option, N ) & options, const char * usage, bool error)  __attribute__ ((noreturn));
+}
 
 bool parse_yesno    (const char *, bool & );
Index: src/AST/Type.cpp
===================================================================
--- src/AST/Type.cpp	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/AST/Type.cpp	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -147,4 +147,10 @@
 // --- TypeInstType
 
+bool TypeInstType::operator==( const TypeInstType & other ) const {
+	return base == other.base
+		&& formal_usage == other.formal_usage
+		&& expr_id == other.expr_id;
+}
+
 TypeInstType::TypeInstType( const TypeDecl * b,
 	CV::Qualifiers q, std::vector<ptr<Attribute>> && as )
@@ -157,4 +163,33 @@
 
 bool TypeInstType::isComplete() const { return base->sized; }
+
+std::string TypeInstType::TypeEnvKey::typeString() const {
+	return std::string("_") + std::to_string(formal_usage)
+		+ "_" + std::to_string(expr_id) + "_" + base->name;
+}
+
+bool TypeInstType::TypeEnvKey::operator==(
+		const TypeInstType::TypeEnvKey & other ) const {
+	return base == other.base
+		&& formal_usage == other.formal_usage
+		&& expr_id == other.expr_id;
+}
+
+bool TypeInstType::TypeEnvKey::operator<(
+		const TypeInstType::TypeEnvKey & other ) const {
+	// TypeEnvKey ordering is an arbitrary total ordering.
+	// It doesn't mean anything but allows for a sorting.
+	if ( base < other.base ) {
+		return true;
+	} else if ( other.base < base ) {
+		return false;
+	} else if ( formal_usage < other.formal_usage ) {
+		return true;
+	} else if ( other.formal_usage < formal_usage ) {
+		return false;
+	} else {
+		return expr_id < other.expr_id;
+	}
+}
 
 // --- TupleType
Index: src/AST/Type.hpp
===================================================================
--- src/AST/Type.hpp	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/AST/Type.hpp	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -408,11 +408,14 @@
 
 		TypeEnvKey() = default;
-		TypeEnvKey(const TypeDecl * base, int formal_usage = 0, int expr_id = 0): base(base), formal_usage(formal_usage), expr_id(expr_id) {}
-		TypeEnvKey(const TypeInstType & inst): base(inst.base), formal_usage(inst.formal_usage), expr_id(inst.expr_id) {}
-		std::string typeString() const { return std::string("_") + std::to_string(formal_usage) + "_" + std::to_string(expr_id) + "_" + base->name; }
-		bool operator==(const TypeEnvKey & other) const { return base == other.base && formal_usage == other.formal_usage && expr_id == other.expr_id; }
+		TypeEnvKey(const TypeDecl * base, int formal_usage = 0, int expr_id = 0)
+		: base(base), formal_usage(formal_usage), expr_id(expr_id) {}
+		TypeEnvKey(const TypeInstType & inst)
+		: base(inst.base), formal_usage(inst.formal_usage), expr_id(inst.expr_id) {}
+		std::string typeString() const;
+		bool operator==(const TypeEnvKey & other) const;
+		bool operator<(const TypeEnvKey & other) const;
 	};
 
-	bool operator==(const TypeInstType & other) const { return base == other.base && formal_usage == other.formal_usage && expr_id == other.expr_id; }
+	bool operator==(const TypeInstType & other) const;
 
 	TypeInstType(
Index: src/AST/Vector.hpp
===================================================================
--- src/AST/Vector.hpp	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ src/AST/Vector.hpp	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,34 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// Vector.hpp -- Short hand for vector of ast pointers.
+//
+// Author           : Andrew Beach
+// Created On       : Thu Oct 20  9:46:00 2022
+// Last Modified By : Andrew Beach
+// Last Modified On : Thu Oct 20 10:16:00 2022
+// Update Count     : 0
+//
+
+#pragma once
+
+#include <vector>
+
+#include "AST/Node.hpp"
+
+namespace ast {
+
+/// Short hand for a vector of ast::ptr types.
+template<typename T, typename Alloc = std::allocator<ptr<T>> >
+using vector = std::vector<ptr<T>, Alloc>;
+
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// mode: c++ //
+// compile-command: "make install" //
+// End: //
Index: src/AST/module.mk
===================================================================
--- src/AST/module.mk	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/AST/module.mk	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -67,4 +67,5 @@
 	AST/Util.cpp \
 	AST/Util.hpp \
+	AST/Vector.hpp \
 	AST/Visitor.hpp
 
Index: src/GenPoly/GenPoly.cc
===================================================================
--- src/GenPoly/GenPoly.cc	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/GenPoly/GenPoly.cc	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -10,6 +10,6 @@
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Andrew Beach
-// Last Modified On : Fri Oct  7 15:25:00 2022
-// Update Count     : 16
+// Last Modified On : Mon Oct 24 15:19:00 2022
+// Update Count     : 17
 //
 
@@ -194,5 +194,5 @@
 
 	if ( auto inst = dynamic_cast< const ast::TypeInstType * >( type ) ) {
-		if ( typeVars.find( inst->typeString() ) != typeVars.end() ) return type;
+		if ( typeVars.find( *inst ) != typeVars.end() ) return type;
 	} else if ( auto array = dynamic_cast< const ast::ArrayType * >( type ) ) {
 		return isPolyType( array->base, subst );
@@ -227,5 +227,5 @@
 
 	if ( auto inst = dynamic_cast<ast::TypeInstType const *>( type ) ) {
-		auto var = typeVars.find( inst->name );
+		auto var = typeVars.find( *inst );
 		if ( var != typeVars.end() && var->second.isComplete ) {
 
@@ -784,5 +784,5 @@
 
 void addToTypeVarMap( const ast::TypeInstType * type, TypeVarMap & typeVars ) {
-	typeVars.insert( type->typeString(), ast::TypeDecl::Data( type->base ) );
+	typeVars.insert( *type, ast::TypeDecl::Data( type->base ) );
 }
 
@@ -816,11 +816,4 @@
 	}
 
-void printTypeVarMap( std::ostream &os, const TypeVarMap & typeVars ) {
-	for ( auto const & pair : typeVars ) {
-		os << pair.first << " (" << pair.second << ") ";
-	} // for
-	os << std::endl;
-}
-
 } // namespace GenPoly
 
Index: src/GenPoly/GenPoly.h
===================================================================
--- src/GenPoly/GenPoly.h	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/GenPoly/GenPoly.h	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -10,6 +10,6 @@
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Andrew Beach
-// Last Modified On : Fri Oct  7 15:06:00 2022
-// Update Count     : 9
+// Last Modified On : Mon Oct 24 15:18:00 2022
+// Update Count     : 11
 //
 
@@ -22,4 +22,5 @@
 #include "AST/Decl.hpp"           // for TypeDecl::Data
 #include "AST/Fwd.hpp"            // for ApplicationExpr, BaseInstType, Func...
+#include "AST/Type.hpp"           // for TypeInstType::TypeEnvKey
 #include "SymTab/Mangler.h"       // for Mangler
 #include "SynTree/Declaration.h"  // for TypeDecl::Data, AggregateDecl, Type...
@@ -28,10 +29,10 @@
 namespace GenPoly {
 
-	// TODO Via some tricks this works for ast::TypeDecl::Data as well.
 	typedef ErasableScopedMap< std::string, TypeDecl::Data > TyVarMap;
-	using TypeVarMap = ErasableScopedMap< std::string, ast::TypeDecl::Data >;
+	using TypeVarMap = ErasableScopedMap< ast::TypeInstType::TypeEnvKey, ast::TypeDecl::Data >;
 
 	/// Replaces a TypeInstType by its referrent in the environment, if applicable
 	Type* replaceTypeInst( Type* type, const TypeSubstitution* env );
+	const ast::Type * replaceTypeInst( const ast::Type *, const ast::TypeSubstitution * );
 
 	/// returns polymorphic type if is polymorphic type, NULL otherwise; will look up substitution in env if provided
@@ -53,4 +54,5 @@
 	/// true iff function has dynamic-layout return type under the type variable map generated from its forall-parameters
 	ReferenceToType *isDynRet( FunctionType *function );
+	const ast::BaseInstType *isDynRet( const ast::FunctionType * func );
 
 	/// A function needs an adapter if it returns a dynamic-layout value or if any of its parameters have dynamic-layout type
@@ -112,5 +114,4 @@
 	/// Prints type variable map
 	void printTyVarMap( std::ostream &os, const TyVarMap &tyVarMap );
-	void printTypeVarMap( std::ostream &os, const TypeVarMap & typeVars );
 
 	/// Gets the mangled name of this type; alias for SymTab::Mangler::mangleType().
@@ -128,4 +129,7 @@
 	/// Gets the name of the layout function for a given aggregate type, given its declaration
 	inline std::string layoutofName( AggregateDecl *decl ) { return std::string( "_layoutof_" ) + decl->get_name(); }
+	inline std::string layoutofName( ast::AggregateDecl const * decl ) {
+		return std::string( "_layoutof_" ) + decl->name;
+	}
 
 } // namespace GenPoly
Index: src/GenPoly/InstantiateGenericNew.cpp
===================================================================
--- src/GenPoly/InstantiateGenericNew.cpp	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/GenPoly/InstantiateGenericNew.cpp	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -26,4 +26,5 @@
 #include "AST/Pass.hpp"                // for Pass, WithGuard, WithShortCi...
 #include "AST/TranslationUnit.hpp"     // for TranslationUnit
+#include "AST/Vector.hpp"              // for vector
 #include "CodeGen/OperatorTable.h"     // for isAssignment
 #include "Common/ScopedMap.h"          // for ScopedMap
@@ -39,17 +40,17 @@
 // Utilities:
 
-using type_vector = std::vector< ast::ptr< ast::TypeExpr > >;
+using type_vector = ast::vector< ast::TypeExpr >;
 
 /// Abstracts type equality for a list of parameter types.
 struct TypeList {
 	TypeList() : params() {}
-	TypeList( std::vector< ast::ptr< ast::Type > > const & params ) :
+	TypeList( ast::vector< ast::Type > const & params ) :
 		params( params ) {}
-	TypeList( std::vector< ast::ptr< ast::Type > > && params ) :
+	TypeList( ast::vector< ast::Type > && params ) :
 		params( std::move( params ) ) {}
 	TypeList( TypeList const & that ) : params( that.params ) {}
 	TypeList( TypeList && that ) : params( std::move( that.params ) ) {}
 
-	TypeList( std::vector< ast::ptr< ast::TypeExpr > > const & exprs ) :
+	TypeList( ast::vector< ast::TypeExpr > const & exprs ) :
 			params() {
 		for ( auto expr : exprs ) {
@@ -82,5 +83,5 @@
 	}
 
-	std::vector<ast::ptr<ast::Type>> params;
+	ast::vector<ast::Type> params;
 };
 
@@ -103,5 +104,6 @@
 	/// returns null if no such value exists.
 	ast::AggregateDecl const * lookup(
-			ast::AggregateDecl const * key, type_vector const & params ) const {
+			ast::AggregateDecl const * key,
+			type_vector const & params ) const {
 		// This type repackaging is used for the helpers.
 		ast::ptr<ast::AggregateDecl> ptr = key;
@@ -150,5 +152,5 @@
 }
 
-bool isDtypeStatic( std::vector<ast::ptr<ast::TypeDecl>> const & baseParams ) {
+bool isDtypeStatic( ast::vector<ast::TypeDecl> const & baseParams ) {
 	return std::all_of( baseParams.begin(), baseParams.end(),
 		[]( ast::TypeDecl const * td ){ return !td->isComplete(); }
@@ -161,7 +163,7 @@
 /// least one parameter type, and dynamic if there is no concrete instantiation.
 GenericType makeSubstitutions(
-		std::vector<ast::ptr<ast::TypeExpr>> & out,
-		std::vector<ast::ptr<ast::TypeDecl>> const & baseParams,
-		std::vector<ast::ptr<ast::Expr>> const & params ) {
+		ast::vector<ast::TypeExpr> & out,
+		ast::vector<ast::TypeDecl> const & baseParams,
+		ast::vector<ast::Expr> const & params ) {
 	GenericType gt = GenericType::dtypeStatic;
 
@@ -214,9 +216,9 @@
 /// Substitutes types of members according to baseParams => typeSubs,
 /// returning the result in a new vector.
-std::vector<ast::ptr<ast::Decl>> substituteMembers(
-		std::vector<ast::ptr<ast::Decl>> const & members,
-		std::vector<ast::ptr<ast::TypeDecl>> const & baseParams,
-		std::vector<ast::ptr<ast::TypeExpr>> const & typeSubs ) {
-	std::vector<ast::ptr<ast::Decl>> out;
+ast::vector<ast::Decl> substituteMembers(
+		ast::vector<ast::Decl> const & members,
+		ast::vector<ast::TypeDecl> const & baseParams,
+		ast::vector<ast::TypeExpr> const & typeSubs ) {
+	ast::vector<ast::Decl> out;
 	ast::TypeSubstitution subs( baseParams, typeSubs );
 	for ( ast::ptr<ast::Decl> const & member : members ) {
@@ -235,7 +237,7 @@
 /// modifying them in-place.
 void substituteMembersHere(
-		std::vector<ast::ptr<ast::Decl>> & members,
-		std::vector<ast::ptr<ast::TypeDecl>> const & baseParams,
-		std::vector<ast::ptr<ast::TypeExpr>> const & typeSubs ) {
+		ast::vector<ast::Decl> & members,
+		ast::vector<ast::TypeDecl> const & baseParams,
+		ast::vector<ast::TypeExpr> const & typeSubs ) {
 	ast::TypeSubstitution subs( baseParams, typeSubs );
 	for ( ast::ptr<ast::Decl> & member : members ) {
@@ -285,5 +287,5 @@
 
 	ast::Expr const * fixMemberExpr(
-		std::vector<ast::ptr<ast::TypeDecl>> const & baseParams,
+		ast::vector<ast::TypeDecl> const & baseParams,
 		ast::MemberExpr const * memberExpr );
 
@@ -349,5 +351,5 @@
 
 ast::Expr const * FixDtypeStatic::fixMemberExpr(
-		std::vector<ast::ptr<ast::TypeDecl>> const & baseParams,
+		ast::vector<ast::TypeDecl> const & baseParams,
 		ast::MemberExpr const * memberExpr ) {
 	// Need to cast dtype-static member expressions to their actual type
@@ -461,5 +463,5 @@
 		type_vector const & typeSubs, ast::UnionDecl const * decl );
 
-	void replaceParametersWithConcrete( std::vector<ast::ptr<ast::Expr>> & params );
+	void replaceParametersWithConcrete( ast::vector<ast::Expr> & params );
 	ast::Type const * replaceWithConcrete( ast::Type const * type, bool doClone );
 
@@ -470,6 +472,6 @@
 	/// marks it as stripped.
 	void stripDtypeParams( ast::AggregateDecl * base,
-		std::vector<ast::ptr<ast::TypeDecl>> & baseParams,
-		std::vector<ast::ptr<ast::TypeExpr>> const & typeSubs );
+		ast::vector<ast::TypeDecl> & baseParams,
+		ast::vector<ast::TypeExpr> const & typeSubs );
 };
 
@@ -511,5 +513,5 @@
 	// and put substitutions in typeSubs.
 	assertf( inst->base, "Base data-type has parameters." );
-	std::vector<ast::ptr<ast::TypeExpr>> typeSubs;
+	ast::vector<ast::TypeExpr> typeSubs;
 	GenericType gt = makeSubstitutions( typeSubs, inst->base->params, inst->params );
 	switch ( gt ) {
@@ -570,5 +572,5 @@
 		ast::AggregateDecl const * aggr =
 			expr->aggregate->result.strict_as<ast::BaseInstType>()->aggr();
-		std::vector<ast::ptr<ast::Decl>> const & members = aggr->members;
+		ast::vector<ast::Decl> const & members = aggr->members;
 		auto it = std::find( members.begin(), members.end(), expr->member );
 		memberIndex = std::distance( members.begin(), it );
@@ -643,5 +645,5 @@
 
 void GenericInstantiator::replaceParametersWithConcrete(
-		std::vector<ast::ptr<ast::Expr>> & params ) {
+		ast::vector<ast::Expr> & params ) {
 	for ( ast::ptr<ast::Expr> & param : params ) {
 		auto paramType = param.as<ast::TypeExpr>();
@@ -673,6 +675,6 @@
 void GenericInstantiator::stripDtypeParams(
 		ast::AggregateDecl * base,
-		std::vector<ast::ptr<ast::TypeDecl>> & baseParams,
-		std::vector<ast::ptr<ast::TypeExpr>> const & typeSubs ) {
+		ast::vector<ast::TypeDecl> & baseParams,
+		ast::vector<ast::TypeExpr> const & typeSubs ) {
 	substituteMembersHere( base->members, baseParams, typeSubs );
 
Index: src/GenPoly/ScrubTyVars.cc
===================================================================
--- src/GenPoly/ScrubTyVars.cc	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/GenPoly/ScrubTyVars.cc	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -20,5 +20,5 @@
 #include "GenPoly/ErasableScopedMap.h"  // for ErasableScopedMap<>::const_it...
 #include "ScrubTyVars.h"
-#include "SymTab/Mangler.h"             // for mangle, typeMode
+#include "SymTab/Mangler.h"             // for mangleType
 #include "SynTree/Declaration.h"        // for TypeDecl, TypeDecl::Data, Typ...
 #include "SynTree/Expression.h"         // for Expression (ptr only), NameExpr
@@ -195,5 +195,5 @@
 	}
 
-	auto typeVar = typeVars->find( type->name );
+	auto typeVar = typeVars->find( *type );
 	if ( typeVar == typeVars->end() ) {
 		return type;
@@ -227,5 +227,5 @@
 	if ( dynType ) {
 		return new ast::NameExpr( expr->location,
-			sizeofName( Mangle::mangle( dynType, Mangle::typeMode() ) ) );
+			sizeofName( Mangle::mangleType( dynType ) ) );
 	} else {
 		return expr;
@@ -237,5 +237,5 @@
 	if ( dynType ) {
 		return new ast::NameExpr( expr->location,
-			alignofName( Mangle::mangle( dynType, Mangle::typeMode() ) ) );
+			alignofName( Mangle::mangleType( dynType ) ) );
 	} else {
 		return expr;
Index: src/Parser/ParseNode.h
===================================================================
--- src/Parser/ParseNode.h	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/Parser/ParseNode.h	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -10,6 +10,6 @@
 // Created On       : Sat May 16 13:28:16 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Oct 18 14:15:37 2022
-// Update Count     : 936
+// Last Modified On : Tue Oct 18 16:22:15 2022
+// Update Count     : 937
 //
 
@@ -468,5 +468,5 @@
 		cur = dynamic_cast< const NodeType * >( temp );	// should not return nullptr
 		if ( ! cur && temp ) {							// non-homogeneous nodes ?
-			SemanticError( cur->location, "internal error, non-homogeneous nodes founds in buildList processing." );
+			SemanticError( temp->location, "internal error, non-homogeneous nodes founds in buildList processing." );
 		} // if
 	} // while
Index: src/ResolvExpr/SatisfyAssertions.cpp
===================================================================
--- src/ResolvExpr/SatisfyAssertions.cpp	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/ResolvExpr/SatisfyAssertions.cpp	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -268,5 +268,5 @@
 		ast::ptr< ast::Type > resType = cand.expr->result;
 		cand.env.apply( resType );
-		return Mangle::mangle( resType, Mangle::typeMode() );
+		return Mangle::mangleType( resType );
 	}
 
Index: src/SymTab/Mangler.cc
===================================================================
--- src/SymTab/Mangler.cc	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/SymTab/Mangler.cc	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -9,7 +9,7 @@
 // Author           : Richard C. Bilson
 // Created On       : Sun May 17 21:40:29 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Jan 11 21:56:06 2021
-// Update Count     : 74
+// Last Modified By : Andrew Beach
+// Last Modified On : Fri Oct 21 16:18:00 2022
+// Update Count     : 75
 //
 #include "Mangler.h"
@@ -418,5 +418,6 @@
 			void postvisit( const ast::QualifiedType * qualType );
 
-			std::string get_mangleName() { return mangleName; }
+			/// The result is the current constructed mangled name.
+			std::string result() const { return mangleName; }
 		  private:
 			std::string mangleName;         ///< Mangled name being constructed
@@ -444,9 +445,6 @@
 	} // namespace
 
-
 	std::string mangle( const ast::Node * decl, Mangle::Mode mode ) {
-		ast::Pass<Mangler_new> mangler( mode );
-		maybeAccept( decl, mangler );
-		return mangler.core.get_mangleName();
+		return ast::Pass<Mangler_new>::read( decl, mode );
 	}
 
@@ -689,8 +687,7 @@
 					} // for
 					for ( auto & assert : ptype->assertions ) {
-						ast::Pass<Mangler_new> sub_mangler(
-							mangleOverridable, typeMode, mangleGenericParams, nextVarNum, varNums );
-						assert->var->accept( sub_mangler );
-						assertionNames.push_back( sub_mangler.core.get_mangleName() );
+						assertionNames.push_back( ast::Pass<Mangler_new>::read(
+							assert->var.get(),
+							mangleOverridable, typeMode, mangleGenericParams, nextVarNum, varNums ) );
 						acount++;
 					} // for
Index: src/SymTab/Mangler.h
===================================================================
--- src/SymTab/Mangler.h	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/SymTab/Mangler.h	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -9,7 +9,7 @@
 // Author           : Richard C. Bilson
 // Created On       : Sun May 17 21:44:03 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul 22 09:45:30 2017
-// Update Count     : 15
+// Last Modified By : Andrew Beach
+// Last Modified On : Thu Oct 27 11:58:00 2022
+// Update Count     : 16
 //
 
@@ -22,5 +22,4 @@
 
 #include "AST/Bitfield.hpp"
-#include "AST/Fwd.hpp"
 #include "SynTree/SynTree.h"  // for Types
 #include "SynTree/Visitor.h"  // for Visitor, maybeAccept
@@ -33,4 +32,7 @@
 // * Currently name compression is not implemented.
 
+namespace ast {
+	class Node;
+}
 namespace ResolvExpr {
 	class TypeEnvironment;
@@ -101,8 +103,11 @@
 	using Mode = bitfield<mangle_flags>;
 
-	static inline Mode typeMode() { return NoOverrideable | Type; }
+	/// Mangle declaration name.
+	std::string mangle( const ast::Node * decl, Mode mode = {} );
 
-	/// Mangle declaration name
-	std::string mangle( const ast::Node * decl, Mode mode = {} );
+	/// Most common mangle configuration for types.
+	static inline std::string mangleType( const ast::Node * type ) {
+		return mangle( type, { NoOverrideable | Type } );
+	}
 
 	namespace Encoding {
Index: src/SynTree/AddressExpr.cc
===================================================================
--- src/SynTree/AddressExpr.cc	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/SynTree/AddressExpr.cc	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -50,4 +50,5 @@
 				set_result( addrType( refType->base ) );
 			} else {
+				if(!arg->result->location.isSet()) arg->result->location = arg->location;
 				SemanticError( arg->result, "Attempt to take address of non-lvalue expression: " );
 			} // if
Index: src/Virtual/ExpandCasts.cc
===================================================================
--- src/Virtual/ExpandCasts.cc	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ src/Virtual/ExpandCasts.cc	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -295,6 +295,5 @@
 	// returns the previous declaration for error messages.
 	ast::ObjectDecl const * insert( ast::ObjectDecl const * typeIdDecl ) {
-		std::string const & mangledName =
-				Mangle::mangle( typeIdDecl->type, Mangle::typeMode() );
+		std::string mangledName = Mangle::mangleType( typeIdDecl->type );
 		ast::ObjectDecl const *& value = instances[ mangledName ];
 		if ( value ) {
@@ -310,6 +309,5 @@
 
 	ast::ObjectDecl const * lookup( ast::Type const * typeIdType ) {
-		std::string const & mangledName =
-				Mangle::mangle( typeIdType, Mangle::typeMode() );
+		std::string mangledName = Mangle::mangleType( typeIdType );
 		auto const it = instances.find( mangledName );
 		return ( instances.end() == it ) ? nullptr : it->second;
Index: tests/array-container/.expect/array-sbscr-types.txt
===================================================================
--- tests/array-container/.expect/array-sbscr-types.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/array-container/.expect/array-sbscr-types.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,162 @@
+Simple array
+
+100.3
+100.3
+
+100.0
+100.1
+100.2
+100.3
+100.4
+
+100.0
+100.1
+100.2
+100.3
+
+Via trait
+
+100.3
+100.3
+
+100.0
+100.1
+100.2
+100.3
+100.4
+
+100.0
+100.1
+100.2
+100.3
+
+Simple array, multidim
+
+3.3
+3.3
+3.3
+3.3
+
+0.3
+1.3
+2.3
+3.3
+4.3
+
+0.3
+1.3
+2.3
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+Via trait, multidim
+
+3.3
+3.3
+3.3
+3.3
+
+0.3
+1.3
+2.3
+3.3
+4.3
+
+0.3
+1.3
+2.3
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+Transposed, Via trait, multidim
+
+3.3
+3.3
+3.3
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+3.0
+3.1
+3.2
+3.3
+
+0.3
+1.3
+2.3
+3.3
+4.3
+
+0.3
+1.3
+2.3
+3.3
+
+Slice giving Simple array
+
+2.3
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+Same slice Via trait
+
+2.3
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+2.0
+2.1
+2.2
+2.3
+
+Strided slice Via trait
+
+3.2
+3.2
+
+0.2
+1.2
+2.2
+3.2
+4.2
+
+0.2
+1.2
+2.2
+3.2
+
Index: tests/array-container/array-basic.cfa
===================================================================
--- tests/array-container/array-basic.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ tests/array-container/array-basic.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -78,8 +78,8 @@
 }
 
-forall( A & | ar(A, float) )
+forall( [N], A & | ar(A, float, N) )
 float total1d_hi( A & a ) {
     float total = 0.0f;
-    for (i; a`len)
+    for (i; N)
         total += a[i];
     return total;
Index: tests/array-container/array-sbscr-types.cfa
===================================================================
--- tests/array-container/array-sbscr-types.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/array-container/array-sbscr-types.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,135 @@
+#include <containers/array.hfa>
+
+// Shows support for many required ways a user can index into a new array.
+//
+// A successful run of this test on 32 bit is necessary, before concluding
+// that a relevant change has good quality.  Even though the test has no 
+// differentiated 64/32-bit versions.
+//
+// Repetition, within this test, beween indexing directly into an `array(...)`
+// and indexing into a `A`, as in `forall(A...|ar(A...))`, represents indexing
+// into a (statically programmer-known) contiguous view, and a (potentially)
+// noncontiguous view, respectively.  Users obtain noncontiguous views by
+// slicing or transposing higher-dimensional arrays.  The limited uses of
+// `a[..., all, ...]` within this test create such situations.  Working via
+// the `ar` trait is the first of two ways that users depend on the array
+// implementation tunneling subscript operators through the CFA assertion
+// system.
+//
+// This test uses the `a[i,j]` form for subscriping higher-dimensional arrays,
+// which is the "new" form, compared with the C-style `a[i][j]` form.  The
+// "new" subscripting form is the second of two ways that users depend on the
+// array implementation tunneling subscript operators through the CFA
+// assertion system.
+//
+// This test covers types and syntactic forms that can convey a numeric value
+// to `a[-]` or `a[-,-,-]`.  The array-md-sbscr-cases test covers combinations
+// of `a[i][j,k]` vs `a[i,j,k]` and `a[all,3][42]` vs `a[42,3]`, though
+// generally using ptrdiff_t-typed variables to convey numeric values.
+
+
+#define show( expr ) printf( "%.1f\n", expr )
+
+#define singleDimTestBody(testName) {                      \
+                                                           \
+    printf(testName "\n\n");                               \
+                                                           \
+    assert( 3 < N );                                       \
+                                                           \
+    show( a[i1] );                                         \
+    show( a[i2] );                                         \
+    printf("\n");                                          \
+                                                           \
+    for( i_dynbounded; N ) show( a[i_dynbounded] );        \
+    printf("\n");                                          \
+                                                           \
+    for( i_stabounded; 4 ) show( a[i_stabounded] );        \
+    printf("\n");                                          \
+}
+
+forall( [N] )
+void test_common_arg_types(array(float, N) & a, ptrdiff_t i1, size_t i2)
+    singleDimTestBody("Simple array")
+
+forall( [N], A& | ar(A, float, N) )
+void test_common_arg_types__via_trait(A & a, ptrdiff_t i1, size_t i2)
+    singleDimTestBody("Via trait")
+
+void do1dimTest() {
+    array(float, 5) a;
+    a[0] = 100.0;
+    a[1] = 100.1;
+    a[2] = 100.2;
+    a[3] = 100.3;
+    a[4] = 100.4;
+
+    test_common_arg_types(a, 3, 3);
+    test_common_arg_types__via_trait(a, 3, 3);
+}
+
+#define multiDimTestBody(testName) {                         \
+                                                             \
+    printf(testName "\n\n");                                 \
+                                                             \
+    assert( 3 < M );                                         \
+    assert( 3 < N );                                         \
+                                                             \
+    show(( a[x1,x1] ));                                      \
+    show(( a[x1,x2] ));                                      \
+    show(( a[x2,x1] ));                                      \
+    show(( a[x2,x2] ));                                      \
+    printf("\n");                                            \
+                                                             \
+    for( i_dynbounded; M ) show(( a[i_dynbounded, 3] ));     \
+    printf("\n");                                            \
+                                                             \
+    for( i_stabounded; 4 ) show(( a[i_stabounded, 3] ));     \
+    printf("\n");                                            \
+                                                             \
+    for( j_dynbounded; N ) show(( a[3, j_dynbounded] ));     \
+    printf("\n");                                            \
+                                                             \
+    for( j_stabounded; 4 ) show(( a[3, j_stabounded] ));     \
+    printf("\n");                                            \
+}
+
+forall( [M], [N] )
+void test_common_arg_types__md(array(float, M, N) & a, ptrdiff_t x1, size_t x2)
+    multiDimTestBody("Simple array, multidim")
+
+
+forall( [M], [N], A_outer &, A_inner & | ar(A_outer, A_inner, M) | ar(A_inner, float, N) )
+void test_common_arg_types__md__via_trait(A_outer & a, ptrdiff_t x1, size_t x2)
+    multiDimTestBody("Via trait, multidim")
+
+
+void doMdimTest() {
+
+    array(float, 5, 4) b;
+    b[ix0,ix0] = 0.0; b[ix0,1] = 0.1; b[ix0,2] = 0.2; b[ix0,3] = 0.3;
+    b[  1,ix0] = 1.0; b[  1,1] = 1.1; b[  1,2] = 1.2; b[  1,3] = 1.3;
+    b[  2,ix0] = 2.0; b[  2,1] = 2.1; b[  2,2] = 2.2; b[  2,3] = 2.3;
+    b[  3,ix0] = 3.0; b[  3,1] = 3.1; b[  3,2] = 3.2; b[  3,3] = 3.3;
+    b[  4,ix0] = 4.0; b[  4,1] = 4.1; b[  4,2] = 4.2; b[  4,3] = 4.3;
+
+    test_common_arg_types__md(b, 3, 3);
+    test_common_arg_types__md__via_trait(b, 3, 3);
+    printf("Transposed, ");
+    test_common_arg_types__md__via_trait(b[all], 3, 3);
+
+    printf("Slice giving ");
+    test_common_arg_types(b[2], 3, 3);
+
+    printf("Same slice ");
+    test_common_arg_types__via_trait(b[2], 3, 3);
+
+    printf("Strided slice ");
+    test_common_arg_types__via_trait(b[all,2], 3, 3);
+}
+
+int main() {
+
+    // can't be inlined in same func due to Trac #175.
+    do1dimTest();
+    doMdimTest();
+}
Index: tests/collections/atomic_mpsc.cfa
===================================================================
--- tests/collections/atomic_mpsc.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ tests/collections/atomic_mpsc.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -1,4 +1,4 @@
 #include <fstream.hfa>
-#include <queueLockFree.hfa>
+#include <containers/lockfree.hfa>
 #include <thread.hfa>
 
Index: tests/concurrent/.expect/migrate.txt
===================================================================
--- tests/concurrent/.expect/migrate.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/.expect/migrate.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/migrate.cfa
===================================================================
--- tests/concurrent/migrate.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/migrate.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,85 @@
+#include <fstream.hfa>
+#include <kernel.hfa>
+#include <thread.hfa>
+
+#include <stdatomic.h>
+#include <assert.h>
+
+struct cluster_wrapper {
+	cluster self;
+	const uint64_t canary;
+	struct {
+		volatile uint64_t want;
+		volatile uint64_t have;
+	} checksum;
+};
+
+void ?{}( cluster_wrapper & this ) {
+	(this.self){};
+	(*(uint64_t *)&this.canary) = 0xDEAD2BADDEAD2BAD;
+	this.checksum.want = 0;
+	this.checksum.have = 0;
+}
+
+void ^?{}( cluster_wrapper & this ) {
+	assert(this.canary == 0xDEAD2BADDEAD2BAD);
+}
+
+static cluster_wrapper * the_clusters;
+static unsigned cluster_cnt;
+
+thread MyThread {
+
+};
+
+void ?{}( MyThread & this ) {}
+
+void checkcl( MyThread & this, cluster * cl) {
+	if(((thread&)this).curr_cluster != cl) {
+		abort | "Thread has unexpected cluster";
+	}
+}
+
+void main( MyThread & this ) {
+	waitfor( migrate : this ) {
+		assert( ((thread&)this).curr_cluster == active_cluster() );
+		assert( ((thread&)this).curr_cluster == active_processor()->cltr );
+	}
+
+	struct cluster_wrapper * curr = (struct cluster_wrapper *)&the_clusters[0];
+
+	for(100) {
+		unsigned idx = prng( this, cluster_cnt );
+
+		struct cluster_wrapper * next = &the_clusters[ idx ];
+		assert(next->canary == 0xDEAD2BADDEAD2BAD);
+
+		// next->
+
+		migrate( this, next->self );
+
+		assert( active_cluster() == &next->self );
+		assert( ((thread&)this).curr_cluster == active_cluster() );
+		assert( ((thread&)this).curr_cluster == active_processor()->cltr );
+	}
+}
+
+int main() {
+	cluster_cnt = 3;
+	cluster_wrapper cl[cluster_cnt];
+	the_clusters = cl;
+
+	{
+		set_concurrency( cl[0].self, 2 );
+		set_concurrency( cl[1].self, 2 );
+		set_concurrency( cl[2].self, 1 );
+
+		MyThread threads[17];
+		for(i;17) {
+			migrate( threads[i], cl[0].self );
+		}
+
+	}
+	// non-empty .expect file
+	printf( "done\n" );
+}
Index: tests/concurrent/pthread/.expect/bounded_buffer.txt
===================================================================
--- tests/concurrent/pthread/.expect/bounded_buffer.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/bounded_buffer.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,2 @@
+producer total value is 23426
+consumer total value is 23426
Index: tests/concurrent/pthread/.expect/pthread_attr_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_attr_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/pthread_attr_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,1 @@
+stack size is 123456789
Index: tests/concurrent/pthread/.expect/pthread_cond_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_cond_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/pthread_cond_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,1001 @@
+S1 done 0
+S1 done 1
+S1 done 2
+S1 done 3
+S1 done 4
+S1 done 5
+S1 done 6
+S1 done 7
+S1 done 8
+S1 done 9
+S1 done 10
+S1 done 11
+S1 done 12
+S1 done 13
+S1 done 14
+S1 done 15
+S1 done 16
+S1 done 17
+S1 done 18
+S1 done 19
+S1 done 20
+S1 done 21
+S1 done 22
+S1 done 23
+S1 done 24
+S1 done 25
+S1 done 26
+S1 done 27
+S1 done 28
+S1 done 29
+S1 done 30
+S1 done 31
+S1 done 32
+S1 done 33
+S1 done 34
+S1 done 35
+S1 done 36
+S1 done 37
+S1 done 38
+S1 done 39
+S1 done 40
+S1 done 41
+S1 done 42
+S1 done 43
+S1 done 44
+S1 done 45
+S1 done 46
+S1 done 47
+S1 done 48
+S1 done 49
+S1 done 50
+S1 done 51
+S1 done 52
+S1 done 53
+S1 done 54
+S1 done 55
+S1 done 56
+S1 done 57
+S1 done 58
+S1 done 59
+S1 done 60
+S1 done 61
+S1 done 62
+S1 done 63
+S1 done 64
+S1 done 65
+S1 done 66
+S1 done 67
+S1 done 68
+S1 done 69
+S1 done 70
+S1 done 71
+S1 done 72
+S1 done 73
+S1 done 74
+S1 done 75
+S1 done 76
+S1 done 77
+S1 done 78
+S1 done 79
+S1 done 80
+S1 done 81
+S1 done 82
+S1 done 83
+S1 done 84
+S1 done 85
+S1 done 86
+S1 done 87
+S1 done 88
+S1 done 89
+S1 done 90
+S1 done 91
+S1 done 92
+S1 done 93
+S1 done 94
+S1 done 95
+S1 done 96
+S1 done 97
+S1 done 98
+S1 done 99
+S1 done 100
+S1 done 101
+S1 done 102
+S1 done 103
+S1 done 104
+S1 done 105
+S1 done 106
+S1 done 107
+S1 done 108
+S1 done 109
+S1 done 110
+S1 done 111
+S1 done 112
+S1 done 113
+S1 done 114
+S1 done 115
+S1 done 116
+S1 done 117
+S1 done 118
+S1 done 119
+S1 done 120
+S1 done 121
+S1 done 122
+S1 done 123
+S1 done 124
+S1 done 125
+S1 done 126
+S1 done 127
+S1 done 128
+S1 done 129
+S1 done 130
+S1 done 131
+S1 done 132
+S1 done 133
+S1 done 134
+S1 done 135
+S1 done 136
+S1 done 137
+S1 done 138
+S1 done 139
+S1 done 140
+S1 done 141
+S1 done 142
+S1 done 143
+S1 done 144
+S1 done 145
+S1 done 146
+S1 done 147
+S1 done 148
+S1 done 149
+S1 done 150
+S1 done 151
+S1 done 152
+S1 done 153
+S1 done 154
+S1 done 155
+S1 done 156
+S1 done 157
+S1 done 158
+S1 done 159
+S1 done 160
+S1 done 161
+S1 done 162
+S1 done 163
+S1 done 164
+S1 done 165
+S1 done 166
+S1 done 167
+S1 done 168
+S1 done 169
+S1 done 170
+S1 done 171
+S1 done 172
+S1 done 173
+S1 done 174
+S1 done 175
+S1 done 176
+S1 done 177
+S1 done 178
+S1 done 179
+S1 done 180
+S1 done 181
+S1 done 182
+S1 done 183
+S1 done 184
+S1 done 185
+S1 done 186
+S1 done 187
+S1 done 188
+S1 done 189
+S1 done 190
+S1 done 191
+S1 done 192
+S1 done 193
+S1 done 194
+S1 done 195
+S1 done 196
+S1 done 197
+S1 done 198
+S1 done 199
+S1 done 200
+S1 done 201
+S1 done 202
+S1 done 203
+S1 done 204
+S1 done 205
+S1 done 206
+S1 done 207
+S1 done 208
+S1 done 209
+S1 done 210
+S1 done 211
+S1 done 212
+S1 done 213
+S1 done 214
+S1 done 215
+S1 done 216
+S1 done 217
+S1 done 218
+S1 done 219
+S1 done 220
+S1 done 221
+S1 done 222
+S1 done 223
+S1 done 224
+S1 done 225
+S1 done 226
+S1 done 227
+S1 done 228
+S1 done 229
+S1 done 230
+S1 done 231
+S1 done 232
+S1 done 233
+S1 done 234
+S1 done 235
+S1 done 236
+S1 done 237
+S1 done 238
+S1 done 239
+S1 done 240
+S1 done 241
+S1 done 242
+S1 done 243
+S1 done 244
+S1 done 245
+S1 done 246
+S1 done 247
+S1 done 248
+S1 done 249
+S1 done 250
+S1 done 251
+S1 done 252
+S1 done 253
+S1 done 254
+S1 done 255
+S1 done 256
+S1 done 257
+S1 done 258
+S1 done 259
+S1 done 260
+S1 done 261
+S1 done 262
+S1 done 263
+S1 done 264
+S1 done 265
+S1 done 266
+S1 done 267
+S1 done 268
+S1 done 269
+S1 done 270
+S1 done 271
+S1 done 272
+S1 done 273
+S1 done 274
+S1 done 275
+S1 done 276
+S1 done 277
+S1 done 278
+S1 done 279
+S1 done 280
+S1 done 281
+S1 done 282
+S1 done 283
+S1 done 284
+S1 done 285
+S1 done 286
+S1 done 287
+S1 done 288
+S1 done 289
+S1 done 290
+S1 done 291
+S1 done 292
+S1 done 293
+S1 done 294
+S1 done 295
+S1 done 296
+S1 done 297
+S1 done 298
+S1 done 299
+S1 done 300
+S1 done 301
+S1 done 302
+S1 done 303
+S1 done 304
+S1 done 305
+S1 done 306
+S1 done 307
+S1 done 308
+S1 done 309
+S1 done 310
+S1 done 311
+S1 done 312
+S1 done 313
+S1 done 314
+S1 done 315
+S1 done 316
+S1 done 317
+S1 done 318
+S1 done 319
+S1 done 320
+S1 done 321
+S1 done 322
+S1 done 323
+S1 done 324
+S1 done 325
+S1 done 326
+S1 done 327
+S1 done 328
+S1 done 329
+S1 done 330
+S1 done 331
+S1 done 332
+S1 done 333
+S1 done 334
+S1 done 335
+S1 done 336
+S1 done 337
+S1 done 338
+S1 done 339
+S1 done 340
+S1 done 341
+S1 done 342
+S1 done 343
+S1 done 344
+S1 done 345
+S1 done 346
+S1 done 347
+S1 done 348
+S1 done 349
+S1 done 350
+S1 done 351
+S1 done 352
+S1 done 353
+S1 done 354
+S1 done 355
+S1 done 356
+S1 done 357
+S1 done 358
+S1 done 359
+S1 done 360
+S1 done 361
+S1 done 362
+S1 done 363
+S1 done 364
+S1 done 365
+S1 done 366
+S1 done 367
+S1 done 368
+S1 done 369
+S1 done 370
+S1 done 371
+S1 done 372
+S1 done 373
+S1 done 374
+S1 done 375
+S1 done 376
+S1 done 377
+S1 done 378
+S1 done 379
+S1 done 380
+S1 done 381
+S1 done 382
+S1 done 383
+S1 done 384
+S1 done 385
+S1 done 386
+S1 done 387
+S1 done 388
+S1 done 389
+S1 done 390
+S1 done 391
+S1 done 392
+S1 done 393
+S1 done 394
+S1 done 395
+S1 done 396
+S1 done 397
+S1 done 398
+S1 done 399
+S1 done 400
+S1 done 401
+S1 done 402
+S1 done 403
+S1 done 404
+S1 done 405
+S1 done 406
+S1 done 407
+S1 done 408
+S1 done 409
+S1 done 410
+S1 done 411
+S1 done 412
+S1 done 413
+S1 done 414
+S1 done 415
+S1 done 416
+S1 done 417
+S1 done 418
+S1 done 419
+S1 done 420
+S1 done 421
+S1 done 422
+S1 done 423
+S1 done 424
+S1 done 425
+S1 done 426
+S1 done 427
+S1 done 428
+S1 done 429
+S1 done 430
+S1 done 431
+S1 done 432
+S1 done 433
+S1 done 434
+S1 done 435
+S1 done 436
+S1 done 437
+S1 done 438
+S1 done 439
+S1 done 440
+S1 done 441
+S1 done 442
+S1 done 443
+S1 done 444
+S1 done 445
+S1 done 446
+S1 done 447
+S1 done 448
+S1 done 449
+S1 done 450
+S1 done 451
+S1 done 452
+S1 done 453
+S1 done 454
+S1 done 455
+S1 done 456
+S1 done 457
+S1 done 458
+S1 done 459
+S1 done 460
+S1 done 461
+S1 done 462
+S1 done 463
+S1 done 464
+S1 done 465
+S1 done 466
+S1 done 467
+S1 done 468
+S1 done 469
+S1 done 470
+S1 done 471
+S1 done 472
+S1 done 473
+S1 done 474
+S1 done 475
+S1 done 476
+S1 done 477
+S1 done 478
+S1 done 479
+S1 done 480
+S1 done 481
+S1 done 482
+S1 done 483
+S1 done 484
+S1 done 485
+S1 done 486
+S1 done 487
+S1 done 488
+S1 done 489
+S1 done 490
+S1 done 491
+S1 done 492
+S1 done 493
+S1 done 494
+S1 done 495
+S1 done 496
+S1 done 497
+S1 done 498
+S1 done 499
+S1 done 500
+S1 done 501
+S1 done 502
+S1 done 503
+S1 done 504
+S1 done 505
+S1 done 506
+S1 done 507
+S1 done 508
+S1 done 509
+S1 done 510
+S1 done 511
+S1 done 512
+S1 done 513
+S1 done 514
+S1 done 515
+S1 done 516
+S1 done 517
+S1 done 518
+S1 done 519
+S1 done 520
+S1 done 521
+S1 done 522
+S1 done 523
+S1 done 524
+S1 done 525
+S1 done 526
+S1 done 527
+S1 done 528
+S1 done 529
+S1 done 530
+S1 done 531
+S1 done 532
+S1 done 533
+S1 done 534
+S1 done 535
+S1 done 536
+S1 done 537
+S1 done 538
+S1 done 539
+S1 done 540
+S1 done 541
+S1 done 542
+S1 done 543
+S1 done 544
+S1 done 545
+S1 done 546
+S1 done 547
+S1 done 548
+S1 done 549
+S1 done 550
+S1 done 551
+S1 done 552
+S1 done 553
+S1 done 554
+S1 done 555
+S1 done 556
+S1 done 557
+S1 done 558
+S1 done 559
+S1 done 560
+S1 done 561
+S1 done 562
+S1 done 563
+S1 done 564
+S1 done 565
+S1 done 566
+S1 done 567
+S1 done 568
+S1 done 569
+S1 done 570
+S1 done 571
+S1 done 572
+S1 done 573
+S1 done 574
+S1 done 575
+S1 done 576
+S1 done 577
+S1 done 578
+S1 done 579
+S1 done 580
+S1 done 581
+S1 done 582
+S1 done 583
+S1 done 584
+S1 done 585
+S1 done 586
+S1 done 587
+S1 done 588
+S1 done 589
+S1 done 590
+S1 done 591
+S1 done 592
+S1 done 593
+S1 done 594
+S1 done 595
+S1 done 596
+S1 done 597
+S1 done 598
+S1 done 599
+S1 done 600
+S1 done 601
+S1 done 602
+S1 done 603
+S1 done 604
+S1 done 605
+S1 done 606
+S1 done 607
+S1 done 608
+S1 done 609
+S1 done 610
+S1 done 611
+S1 done 612
+S1 done 613
+S1 done 614
+S1 done 615
+S1 done 616
+S1 done 617
+S1 done 618
+S1 done 619
+S1 done 620
+S1 done 621
+S1 done 622
+S1 done 623
+S1 done 624
+S1 done 625
+S1 done 626
+S1 done 627
+S1 done 628
+S1 done 629
+S1 done 630
+S1 done 631
+S1 done 632
+S1 done 633
+S1 done 634
+S1 done 635
+S1 done 636
+S1 done 637
+S1 done 638
+S1 done 639
+S1 done 640
+S1 done 641
+S1 done 642
+S1 done 643
+S1 done 644
+S1 done 645
+S1 done 646
+S1 done 647
+S1 done 648
+S1 done 649
+S1 done 650
+S1 done 651
+S1 done 652
+S1 done 653
+S1 done 654
+S1 done 655
+S1 done 656
+S1 done 657
+S1 done 658
+S1 done 659
+S1 done 660
+S1 done 661
+S1 done 662
+S1 done 663
+S1 done 664
+S1 done 665
+S1 done 666
+S1 done 667
+S1 done 668
+S1 done 669
+S1 done 670
+S1 done 671
+S1 done 672
+S1 done 673
+S1 done 674
+S1 done 675
+S1 done 676
+S1 done 677
+S1 done 678
+S1 done 679
+S1 done 680
+S1 done 681
+S1 done 682
+S1 done 683
+S1 done 684
+S1 done 685
+S1 done 686
+S1 done 687
+S1 done 688
+S1 done 689
+S1 done 690
+S1 done 691
+S1 done 692
+S1 done 693
+S1 done 694
+S1 done 695
+S1 done 696
+S1 done 697
+S1 done 698
+S1 done 699
+S1 done 700
+S1 done 701
+S1 done 702
+S1 done 703
+S1 done 704
+S1 done 705
+S1 done 706
+S1 done 707
+S1 done 708
+S1 done 709
+S1 done 710
+S1 done 711
+S1 done 712
+S1 done 713
+S1 done 714
+S1 done 715
+S1 done 716
+S1 done 717
+S1 done 718
+S1 done 719
+S1 done 720
+S1 done 721
+S1 done 722
+S1 done 723
+S1 done 724
+S1 done 725
+S1 done 726
+S1 done 727
+S1 done 728
+S1 done 729
+S1 done 730
+S1 done 731
+S1 done 732
+S1 done 733
+S1 done 734
+S1 done 735
+S1 done 736
+S1 done 737
+S1 done 738
+S1 done 739
+S1 done 740
+S1 done 741
+S1 done 742
+S1 done 743
+S1 done 744
+S1 done 745
+S1 done 746
+S1 done 747
+S1 done 748
+S1 done 749
+S1 done 750
+S1 done 751
+S1 done 752
+S1 done 753
+S1 done 754
+S1 done 755
+S1 done 756
+S1 done 757
+S1 done 758
+S1 done 759
+S1 done 760
+S1 done 761
+S1 done 762
+S1 done 763
+S1 done 764
+S1 done 765
+S1 done 766
+S1 done 767
+S1 done 768
+S1 done 769
+S1 done 770
+S1 done 771
+S1 done 772
+S1 done 773
+S1 done 774
+S1 done 775
+S1 done 776
+S1 done 777
+S1 done 778
+S1 done 779
+S1 done 780
+S1 done 781
+S1 done 782
+S1 done 783
+S1 done 784
+S1 done 785
+S1 done 786
+S1 done 787
+S1 done 788
+S1 done 789
+S1 done 790
+S1 done 791
+S1 done 792
+S1 done 793
+S1 done 794
+S1 done 795
+S1 done 796
+S1 done 797
+S1 done 798
+S1 done 799
+S1 done 800
+S1 done 801
+S1 done 802
+S1 done 803
+S1 done 804
+S1 done 805
+S1 done 806
+S1 done 807
+S1 done 808
+S1 done 809
+S1 done 810
+S1 done 811
+S1 done 812
+S1 done 813
+S1 done 814
+S1 done 815
+S1 done 816
+S1 done 817
+S1 done 818
+S1 done 819
+S1 done 820
+S1 done 821
+S1 done 822
+S1 done 823
+S1 done 824
+S1 done 825
+S1 done 826
+S1 done 827
+S1 done 828
+S1 done 829
+S1 done 830
+S1 done 831
+S1 done 832
+S1 done 833
+S1 done 834
+S1 done 835
+S1 done 836
+S1 done 837
+S1 done 838
+S1 done 839
+S1 done 840
+S1 done 841
+S1 done 842
+S1 done 843
+S1 done 844
+S1 done 845
+S1 done 846
+S1 done 847
+S1 done 848
+S1 done 849
+S1 done 850
+S1 done 851
+S1 done 852
+S1 done 853
+S1 done 854
+S1 done 855
+S1 done 856
+S1 done 857
+S1 done 858
+S1 done 859
+S1 done 860
+S1 done 861
+S1 done 862
+S1 done 863
+S1 done 864
+S1 done 865
+S1 done 866
+S1 done 867
+S1 done 868
+S1 done 869
+S1 done 870
+S1 done 871
+S1 done 872
+S1 done 873
+S1 done 874
+S1 done 875
+S1 done 876
+S1 done 877
+S1 done 878
+S1 done 879
+S1 done 880
+S1 done 881
+S1 done 882
+S1 done 883
+S1 done 884
+S1 done 885
+S1 done 886
+S1 done 887
+S1 done 888
+S1 done 889
+S1 done 890
+S1 done 891
+S1 done 892
+S1 done 893
+S1 done 894
+S1 done 895
+S1 done 896
+S1 done 897
+S1 done 898
+S1 done 899
+S1 done 900
+S1 done 901
+S1 done 902
+S1 done 903
+S1 done 904
+S1 done 905
+S1 done 906
+S1 done 907
+S1 done 908
+S1 done 909
+S1 done 910
+S1 done 911
+S1 done 912
+S1 done 913
+S1 done 914
+S1 done 915
+S1 done 916
+S1 done 917
+S1 done 918
+S1 done 919
+S1 done 920
+S1 done 921
+S1 done 922
+S1 done 923
+S1 done 924
+S1 done 925
+S1 done 926
+S1 done 927
+S1 done 928
+S1 done 929
+S1 done 930
+S1 done 931
+S1 done 932
+S1 done 933
+S1 done 934
+S1 done 935
+S1 done 936
+S1 done 937
+S1 done 938
+S1 done 939
+S1 done 940
+S1 done 941
+S1 done 942
+S1 done 943
+S1 done 944
+S1 done 945
+S1 done 946
+S1 done 947
+S1 done 948
+S1 done 949
+S1 done 950
+S1 done 951
+S1 done 952
+S1 done 953
+S1 done 954
+S1 done 955
+S1 done 956
+S1 done 957
+S1 done 958
+S1 done 959
+S1 done 960
+S1 done 961
+S1 done 962
+S1 done 963
+S1 done 964
+S1 done 965
+S1 done 966
+S1 done 967
+S1 done 968
+S1 done 969
+S1 done 970
+S1 done 971
+S1 done 972
+S1 done 973
+S1 done 974
+S1 done 975
+S1 done 976
+S1 done 977
+S1 done 978
+S1 done 979
+S1 done 980
+S1 done 981
+S1 done 982
+S1 done 983
+S1 done 984
+S1 done 985
+S1 done 986
+S1 done 987
+S1 done 988
+S1 done 989
+S1 done 990
+S1 done 991
+S1 done 992
+S1 done 993
+S1 done 994
+S1 done 995
+S1 done 996
+S1 done 997
+S1 done 998
+S1 done 999
+S2 statement done!
Index: tests/concurrent/pthread/.expect/pthread_demo_create_join.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_demo_create_join.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/pthread_demo_create_join.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,1 @@
+final res is 190
Index: tests/concurrent/pthread/.expect/pthread_demo_lock.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_demo_lock.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/pthread_demo_lock.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,6 @@
+lock res is 2000000000
+in trylocktest1 res1 is 0
+in trylocktest1 res2 is 0
+in trylocktest2 res1 is 16
+in trylocktest2 res2 is 16
+cnt_trylock is 200000000
Index: tests/concurrent/pthread/.expect/pthread_key_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_key_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/pthread_key_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,6 @@
+Destructor function invoked
+Destructor function invoked
+Destructor function invoked
+Destructor function invoked
+Destructor function invoked
+total value is 207, total value by pthread_getspecific is 207
Index: tests/concurrent/pthread/.expect/pthread_once_test.txt
===================================================================
--- tests/concurrent/pthread/.expect/pthread_once_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/.expect/pthread_once_test.txt	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,1 @@
+in once_fn
Index: tests/concurrent/pthread/bounded_buffer.cfa
===================================================================
--- tests/concurrent/pthread/bounded_buffer.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/bounded_buffer.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,156 @@
+#include <stdlib.h>										// prototype: rand
+#include <fstream.hfa>
+#include <thread.hfa>
+#include <pthread.h>
+#include <errno.h>
+// tested pthread mutex related routines, pthread cond related routines
+// tested pthread_create/join
+
+enum { BufferSize = 50 };
+
+volatile int producer_val_total;
+volatile int consumer_val_total;
+
+pthread_mutex_t producer_cnt_lock, consumer_cnt_lock;
+
+
+forall( T ){
+    struct Buffer
+    {
+        int front, back, count;
+		T elements[BufferSize];
+        pthread_mutex_t _mutex;
+	    pthread_cond_t Full, Empty;							// waiting consumers & producers
+    };
+
+    void ?{}( Buffer(T) & buffer ) with( buffer ) {
+         [front, back, count] = 0; 
+         pthread_mutex_init(&_mutex, NULL);
+         pthread_cond_init(&Full, NULL);
+         pthread_cond_init(&Empty, NULL);
+    }
+
+    void ^?{}( Buffer(T) & buffer ) with( buffer ){
+        pthread_mutex_destroy(&_mutex);
+        pthread_cond_destroy(&Full);
+        pthread_cond_destroy(&Empty);
+    }
+
+    int query( Buffer(T) & buffer ) { return buffer.count; } // read-only, no mutual exclusion
+    
+    void insert( Buffer(T) & buffer, T elem ) with(buffer) {
+		pthread_mutex_lock(&_mutex);
+		while ( count == 20 ) pthread_cond_wait( &Empty, &_mutex ); // block producer
+		elements[back] = elem;
+		back = ( back + 1 ) % 20;
+		count += 1;
+		pthread_cond_signal( &Full );					// unblock consumer
+        pthread_mutex_unlock(&_mutex);
+	}
+
+    T remove(Buffer(T) & buffer) with(buffer) {
+		pthread_mutex_lock(&_mutex);
+		while ( count == 0 ) pthread_cond_wait( &Full, &_mutex ); // block consumer
+		T elem = elements[front];
+		front = ( front + 1 ) % 20;
+		count -= 1;
+		pthread_cond_signal( &Empty );					// unblock producer
+        pthread_mutex_unlock(&_mutex);
+		return elem;
+	}
+
+}
+
+void *producer( void *arg ) {
+	Buffer(int) &buf = *(Buffer(int)*)arg;
+	const int NoOfItems = rand() % 40;
+	int item;
+	for ( int i = 1; i <= NoOfItems; i += 1 ) {			// produce a bunch of items
+		item = rand() % 100 + 1;						// produce a random number
+		//sout | "Producer:" | pthread_self() | " value:" | item;
+		insert( buf,item );								// insert element into queue
+        pthread_mutex_lock(&producer_cnt_lock);
+        producer_val_total += item;
+        pthread_mutex_unlock(&producer_cnt_lock);
+	} // for
+	//sout | "Producer:" | pthread_self() | " is finished";
+	return NULL;
+} // producer
+
+void *consumer( void *arg ) {
+	Buffer(int) &buf = *(Buffer(int) *)arg;
+	int item;
+	for ( ;; ) {										// consume until a negative element appears
+		item = remove(buf);							// remove from front of queue
+		//sout | "Consumer:" | pthread_self() | " value:" | item;
+	  if ( item == -1 ) break;
+        pthread_mutex_lock(&consumer_cnt_lock);
+        consumer_val_total += item;
+        pthread_mutex_unlock(&consumer_cnt_lock);
+	} // for
+	//sout | "Consumer:" | pthread_self() | " is finished";
+	return NULL;
+} // consumer
+
+int main() {
+	const int NoOfCons = 20, NoOfProds = 30;
+	Buffer(int) buf;								// create a buffer monitor
+	pthread_t cons[NoOfCons];							// pointer to an array of consumers
+	pthread_t prods[NoOfProds];							// pointer to an array of producers
+    pthread_mutex_init(&producer_cnt_lock, NULL);
+    pthread_mutex_init(&consumer_cnt_lock, NULL);
+	// parallelism
+    srandom( 1003 );
+
+	processor p[5];
+    {
+        // create/join and mutex/condition test
+        //sout | "create/join and mutex/condition test";
+        for ( int i = 0; i < NoOfCons; i += 1 ) {			// create consumers
+            if ( pthread_create( &cons[i], NULL, consumer, (void*)&buf ) != 0 ) {
+                sout | "create thread failure, errno:" | errno;
+                exit( EXIT_FAILURE );
+            } // if
+        } // for
+        for ( int i = 0; i < NoOfProds; i += 1 ) {			// 	create producers
+            if ( pthread_create( &prods[i], NULL, producer, (void*)&buf ) != 0 ) {
+                sout | "create thread failure";
+                exit( EXIT_FAILURE );
+            } // if
+        } // for
+
+        void *result;
+        for ( int i = 0; i < NoOfProds; i += 1 ) {			// wait for producers to end
+            if ( pthread_join( prods[i], &result ) != 0 ) {
+                sout | " producers join thread failure";
+                exit( EXIT_FAILURE );
+            } // if
+            if ( (uint64_t)result != 0 ) {
+                sout | "producers" | prods[i] |" bad return value " | result;
+                exit( EXIT_FAILURE );
+            } // if
+            //sout | "join prods[" | i | "]:" | prods[i] | " result:" | result;
+        } // for
+
+        for ( int i = 0; i < NoOfCons; i += 1 ) {			// terminate each consumer
+            insert(buf, -1 );
+        } // for
+
+        for ( int i = 0; i < NoOfCons; i += 1 ) {			// wait for consumer to end
+            if ( pthread_join( cons[i], &result ) != 0 ) {
+                sout| "consumers join thread failure" ;
+                exit( EXIT_FAILURE );
+            } // if
+            if ( (uint64_t)result != 0 ) {
+                sout| "consumers bad return value" | result;
+                exit( EXIT_FAILURE );
+            } // if
+        } // for
+        sout | "producer total value is " | producer_val_total;
+        sout | "consumer total value is " | consumer_val_total;
+    }
+
+	
+
+	
+}
Index: tests/concurrent/pthread/pthread_attr_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_attr_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/pthread_attr_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,29 @@
+/* test attr init; set stack; get stack */
+
+#include <fstream.hfa>
+#include <thread.hfa>
+
+void* foo(void* _attr){
+    size_t size;
+    pthread_attr_t* attr = (pthread_attr_t*)_attr;
+    int status = pthread_attr_getstacksize(attr, &size);
+    if (status != 0){
+        sout | "error return code";
+        exit(1);
+    }
+    sout | "stack size is " | size;
+    return NULL;
+}
+
+int main(int argc, char const *argv[])
+{
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setstacksize(&attr, 123456789);
+    pthread_t thr;
+    void* res;
+    pthread_create(&thr, &attr, foo, (void*)&attr);
+    pthread_join(thr, &res);
+    pthread_attr_destroy(&attr);
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_cond_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_cond_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/pthread_cond_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,46 @@
+/* small test of pthread cond */
+
+#include <fstream.hfa>
+#include <thread.hfa>
+#include <pthread.h>
+
+int done_flag = 0;
+pthread_mutex_t _mutex;
+pthread_cond_t cond;
+
+extern "C"{
+    void* S1(void* arg){
+        pthread_mutex_lock(&_mutex);
+        for (int i = 0; i < 1000; i++) sout | "S1 done " | i;
+        done_flag = 1;
+        pthread_mutex_unlock(&_mutex);
+        pthread_cond_signal(&cond);
+        return NULL;
+    }
+
+    void* S2(void* arg){
+        pthread_mutex_lock(&_mutex);
+        if (!done_flag) pthread_cond_wait(&cond, &_mutex);
+        sout | "S2 statement done!";
+        pthread_mutex_unlock(&_mutex);
+        return NULL;
+    }
+}
+
+
+
+int main(int argc, char const *argv[])
+{
+    /* code */
+    pthread_mutex_init(&_mutex, NULL);
+    pthread_cond_init(&cond, NULL);
+    pthread_t s1,s2;
+    pthread_create(&s1, NULL, S1, NULL);
+    pthread_create(&s2, NULL, S2, NULL);
+    void* res = NULL;
+    pthread_join(s1, &res);
+    pthread_join(s2, &res);
+    pthread_mutex_destroy(&_mutex);
+    pthread_cond_destroy(&cond);
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_demo_create_join.cfa
===================================================================
--- tests/concurrent/pthread/pthread_demo_create_join.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/pthread_demo_create_join.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,41 @@
+#include <fstream.hfa>
+#include <thread.hfa>
+/* test pthread create/join/exit */
+
+int arr[20];
+
+void* fetch(void* idx){
+    int res = arr[(uint64_t)idx];
+    pthread_exit((void*)res);
+    sout | "it should not be here";
+    exit(1);
+    //return (void*)res;
+}
+
+void arr_init(){
+    for (int i = 0; i < 20; i++){
+        arr[i] = i;
+    }
+}
+
+int main(int argc, char const *argv[])
+{
+    pthread_t threads[20];
+    arr_init();
+    int status;
+    for (int i = 0; i < 20; i++){
+        status = pthread_create(&threads[i], NULL, fetch, (void*)i);
+        if (status != 0) exit(1);
+    }
+    int res = 0;
+    for (int i = 0; i < 20; i++){
+        void* _res = NULL;
+        status = pthread_join(threads[i], &_res);
+        if (status != 0) exit(2);
+        if (((uint64_t)_res) != i) exit(3);
+        res += (uint64_t)_res;
+    }
+    sout | "final res is" | res;
+
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_demo_lock.cfa
===================================================================
--- tests/concurrent/pthread/pthread_demo_lock.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/pthread_demo_lock.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,124 @@
+#include <fstream.hfa>
+#include <thread.hfa>
+/* 
+    test pthread_mutex to provide mutual exclusion
+    test pthread_mutex_trylock not block when lock is acquired by others; test pthread_mutex_trylock can acquire the lock
+*/
+volatile int cnt_nolock = 0;
+volatile int cnt_lock = 0;
+volatile int cnt_trylock = 0;
+extern "C"{
+    static pthread_mutex_t _mutex;
+}
+
+/* mutex pthread routine  */
+// unlocked increnment
+void* inc_unlock(void* cnt){
+    for (int i = 0; i < (uint64_t)cnt; i++){
+        cnt_nolock++;
+    }   // for
+    return NULL;
+}   
+// locked increment
+void* inc_lock(void* cnt){
+    pthread_mutex_lock(&_mutex);
+    for (int i = 0; i < (uint64_t)cnt; i++){
+        cnt_lock++;
+    }   // for
+    pthread_mutex_unlock(&_mutex);
+    return NULL;
+}
+
+/* test lock vs unlock */
+void test_unlock(){
+    pthread_t threads[20];
+    for (int i = 0; i < 20; i++){
+        pthread_create(&threads[i], NULL, inc_unlock, (void*)100000000);
+    }
+    for (int i = 0; i < 20; i++){
+        void * res = NULL;
+        pthread_join(threads[i], &res);
+    }
+    sout | "unlock res is" | cnt_nolock;
+    cnt_nolock = 0;
+}
+extern "C"{
+    void test_lock(){
+        pthread_mutex_init(&_mutex, NULL);
+        pthread_t threads[20];
+        for (int i = 0; i < 20; i++){
+            
+            pthread_create(&threads[i], NULL, inc_lock, (void*)100000000);
+        }
+        for (int i = 0; i < 20; i++){
+            void * res = NULL;
+            pthread_join(threads[i], &res);
+        }
+        sout | "lock res is" | cnt_lock;
+        pthread_mutex_destroy(&_mutex);
+        if (cnt_lock != 100000000 * 20) {
+            sout | "pthread mutex not working";
+            exit(1);
+        }
+        cnt_lock = 0;
+    }
+}
+
+
+/* mutex trylock pthread routine  */
+void* trylock_test2(void* arg){
+    int res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest2 res1 is" | res;
+    res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest2 res2 is" | res;
+    pthread_mutex_lock(&_mutex);
+    for (int i = 0; i < (uint64_t)arg; i++) cnt_trylock++;
+    pthread_mutex_unlock(&_mutex);
+    return NULL;
+}
+
+void* trylock_test1(void* arg){
+    int res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest1 res1 is" | res;
+    res = pthread_mutex_trylock(&_mutex);
+    sout | "in trylocktest1 res2 is" | res;
+    pthread_t task2;
+    pthread_create(&task2, NULL, trylock_test2, (void*)100000000);
+
+    // inc cnt then release the lock
+    for (int i = 0; i < (uint64_t)arg; i++) cnt_trylock++;
+    pthread_mutex_unlock(&_mutex);
+    pthread_mutex_unlock(&_mutex);
+    void * dummy = NULL;
+    pthread_join(task2, &dummy);
+    sout | "cnt_trylock is " | cnt_trylock;
+    return NULL;
+}
+
+// trylock test
+void test_trylock(){
+    pthread_mutex_init(&_mutex, NULL);
+    pthread_t task1;
+    pthread_create(&task1, NULL, trylock_test1, (void*)100000000);
+    void * dummy = NULL;
+    pthread_join(task1,&dummy);
+    pthread_mutex_destroy(&_mutex);
+    if (cnt_trylock != 100000000 * 2) {
+        sout | "pthread try mutex not working";
+        exit(1);
+    }
+    cnt_trylock = 0;
+}
+
+
+
+int main(int argc, char const *argv[])
+{
+    
+    // compare unlock vs lock
+    test_lock();
+    // test trylock
+    test_trylock();
+    
+    return 0;
+}
Index: tests/concurrent/pthread/pthread_key_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_key_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/pthread_key_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,126 @@
+
+/* test pthread_key_create/set_specific/get_specific
+    get specific == set specific
+    dtor is invoked (no mem leak)
+*/
+
+
+extern "C"{
+
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <errno.h>
+    #include <pthread.h>
+
+    #define THREADS 5
+    #define BUFFSZ  48
+    pthread_key_t   key;
+    volatile int total_value,total_value_getspec;
+    pthread_mutex_t value_mutex;
+
+    void            *threadfunc(void *parm)
+    {
+        int        status;
+        void      *value;
+        int        threadnum;
+        int       *tnum;
+        void      *getvalue;
+        char       Buffer[BUFFSZ];
+
+        tnum = (int*)parm;
+        threadnum = *tnum;
+
+        //printf("Thread %d executing\n", threadnum);
+        value = (void *)(rand()%100);
+        status = pthread_setspecific(key, (void *) value);
+        if ( status !=  0) {
+            printf("pthread_setspecific failed, thread %d, errno %d",
+                                                        threadnum, errno);
+            return (void*)12;
+        }
+        pthread_mutex_lock(&value_mutex);
+        total_value_getspec += (int)value;
+        total_value += (int)pthread_getspecific(key);
+        pthread_mutex_unlock(&value_mutex);
+
+
+        if (!(value = malloc(sizeof(Buffer))))
+            printf("Thread %d could not allocate storage, errno = %d\n",
+                                                        threadnum, errno);
+        status = pthread_setspecific(key, (void *) value);
+        if ( status !=  0) {
+            printf("pthread_setspecific failed, thread %d, errno %d",
+                                                        threadnum, errno);
+            return (void*)12;
+        }
+        //printf("Thread %d setspecific value: %d\n", threadnum, value);
+
+        getvalue = 0;
+        getvalue = pthread_getspecific(key);
+
+        if (getvalue != value) {
+        printf("getvalue not valid, getvalue=%d", (u_int64_t)getvalue);
+            return (void*)68;
+        }
+
+        pthread_exit((void *)0);
+    }
+
+    void  destr_fn(void *parm)
+    {
+
+        printf("Destructor function invoked\n");
+        free(parm);
+    }
+
+
+    int main() {
+        int          getvalue;
+        int          status;
+        int          i;
+        int          threadparm[THREADS];
+        pthread_t    threadid[THREADS];
+        void*          thread_stat[THREADS];
+
+        // rand seed for testing
+        srand(1003);
+        pthread_mutex_init(&value_mutex, NULL);
+
+        // testing getspec and setspec
+        total_value = 0;
+        total_value_getspec = 0;
+
+        if ((status = pthread_key_create(&key, destr_fn )) < 0) {
+            printf("pthread_key_create failed, errno=%d", errno);
+            exit(1);
+        }
+
+        // create 3 THREADS, pass each its number
+        for (i=0; i<THREADS; i++) {
+            threadparm[i] = i+1;
+            status = pthread_create( &threadid[i],
+                                    NULL,
+                                    threadfunc,
+                                    (void *)&threadparm[i]);
+            if ( status <  0) {
+            printf("pthread_create failed, errno=%d", errno);
+            exit(2);
+            }
+        }
+
+        for ( i=0; i<THREADS; i++) {
+            status = pthread_join( threadid[i], (void **)&thread_stat[i]);
+            if ( status <  0) {
+            printf("pthread_join failed, thread %d, errno=%d\n", i+1, errno);
+            }
+
+            if (thread_stat[i] != 0)   {
+                printf("bad thread status, thread %d, status=%d\n", i+1,
+                                                        (u_int64_t)thread_stat[i]);
+            }
+        }
+        printf("total value is %d, total value by pthread_getspecific is %d\n", total_value, total_value_getspec);
+        exit(0);
+    }   // main
+}
+
Index: tests/concurrent/pthread/pthread_once_test.cfa
===================================================================
--- tests/concurrent/pthread/pthread_once_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
+++ tests/concurrent/pthread/pthread_once_test.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -0,0 +1,96 @@
+// tested pthread once,create,join
+
+                                                          
+                                                                                
+                                                            
+#include <fstream.hfa>
+#include <thread.hfa>                                                                    
+#define THREADS 20                                                                                                                                                    
+
+extern "C"{
+    #include <pthread.h>
+    #include <stdio.h>
+    #include <errno.h> 
+    int             once_counter=0;                                                 
+    pthread_once_t  once_control = PTHREAD_ONCE_INIT;                               
+                                                                                    
+    void  once_fn(void)                                                             
+    {                                                                               
+    puts("in once_fn");                                                            
+    once_counter++;                                                                
+    }                                                                               
+                                                                                    
+    void            *threadfunc(void *parm)                                         
+    {                                                                               
+    int        status;                                                             
+    int        threadnum;                                                          
+    int        *tnum;                                                              
+                                                                                    
+    tnum = (int *)parm;                                                                   
+    threadnum = *tnum;                                                             
+                                                                                    
+    //printf("Thread %d executing\n", threadnum);                                    
+                                                                                    
+    status = pthread_once(&once_control, once_fn);                                 
+    if ( status <  0)                                                              
+        printf("pthread_once failed, thread %d, errno=%d\n", threadnum,             
+                                                                errno);             
+                                                                                    
+    //pthread_exit((void *)0);  
+    return NULL;                                                     
+    }  
+
+
+    void once_rtn(){
+        printf("in once init\n");
+    }
+    void test(){
+
+        processor p[10];
+        
+
+        int          status;                                                           
+        int          i;                                                                
+        int          threadparm[THREADS];                                              
+        pthread_t    threadid[THREADS];                                                
+        void*          thread_stat[THREADS];                                             
+                                                                                        
+        for (i=0; i<THREADS; i++) {                                                    
+            threadparm[i] = i+1;                                                        
+            status = pthread_create( &threadid[i],                                      
+                                    NULL,                                              
+                                    threadfunc,                                        
+                                    (void *)&threadparm[i]);                           
+            if ( status <  0) {                                                         
+            printf("pthread_create failed, errno=%d", errno);                        
+            exit(2);                                                                 
+            }                                                                           
+        }                                                                             
+                                                                                        
+        for ( i=0; i<THREADS; i++) {                                                   
+            status = pthread_join( threadid[i], (void **)&thread_stat[i]);               
+            if ( status <  0)                                                           
+            printf("pthread_join failed, thread %d, errno=%d\n", i+1, errno);        
+                                                                                        
+            if (thread_stat[i] != 0)                                                    
+                printf("bad thread status, thread %d, status=%d\n", i+1,                
+                                                        (int)thread_stat[i]);             
+        }                                                                             
+                                                                                        
+        if (once_counter != 1) {
+            printf("once_fn did not get control once, counter=%d",once_counter);         
+            exit(1);
+        }                                                        
+        
+        exit(0);
+        
+    }
+}
+
+
+
+int main(int argc, char const *argv[])
+{
+    test();
+    return 0;
+}
Index: tests/configs/parsebools.cfa
===================================================================
--- tests/configs/parsebools.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ tests/configs/parsebools.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -15,8 +15,10 @@
 //
 
-#include <parseargs.hfa>
 #include <fstream.hfa>
 
 #include "../meta/fork+exec.hfa"
+
+// last as a work around to a parse bug
+#include <parseargs.hfa>
 
 int main(int argc, char * argv[]) {
@@ -30,16 +32,14 @@
 	bool sf = true;
 
-	cfa_option options[] = {
-		{'e', "yesno",     "test yes/no",     YN, parse_yesno},
-		{'y', "YN",        "test yes/no",     Yn, parse_yesno},
-		{'n', "yn",        "test yes/no",     yn, parse_yesno},
-		{'t', "truefalse", "test true/false", tf, parse_truefalse},
-		{'s', "settrue",   "test set true",   st, parse_settrue},
-		{'u', "setfalse",  "test set false",  sf, parse_setfalse},
-	};
-	int options_cnt = sizeof(options) / sizeof(cfa_option);
+	array( cfa_option, 6 ) options;
+	options[0] = (cfa_option){'e', "yesno",     "test yes/no",     YN, parse_yesno};
+	options[1] = (cfa_option){'y', "YN",        "test yes/no",     Yn, parse_yesno};
+	options[2] = (cfa_option){'n', "yn",        "test yes/no",     yn, parse_yesno};
+	options[3] = (cfa_option){'t', "truefalse", "test true/false", tf, parse_truefalse};
+	options[4] = (cfa_option){'s', "settrue",   "test set true",   st, parse_settrue};
+	options[5] = (cfa_option){'u', "setfalse",  "test set false",  sf, parse_setfalse};
 
 	char **left;
-	parse_args( options, options_cnt, "[OPTIONS]...\ntesting bool parameters", left);
+	parse_args( options, "[OPTIONS]...\ntesting bool parameters", left);
 
 	sout | "yes/no     :" | YN;
Index: tests/configs/parsenums.cfa
===================================================================
--- tests/configs/parsenums.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ tests/configs/parsenums.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -15,8 +15,10 @@
 //
 
+#include <fstream.hfa>
+
+#include "../meta/fork+exec.hfa"
+
+// last as workaround to parser bug
 #include <parseargs.hfa>
-#include <fstream.hfa>
-
-#include "../meta/fork+exec.hfa"
 
 #if __SIZEOF_LONG__ == 4
@@ -42,15 +44,13 @@
 
 
-	cfa_option options[] = {
-		{ 'i', "int",              "test int",                i   },
-		{ 'u', "unsigned",         "test unsigned",           u   },
-		{ 'l', "unsignedlong",     "test unsigned long",      ul  },
-		{ 'L', "unsignedlonglong", "test unsigned long long", ull },
-		{ 'd', "double",           "test double",             d   },
-	};
-	int options_cnt = sizeof(options) / sizeof(cfa_option);
+	array( cfa_option, 5 ) options;
+	options[0] = (cfa_option){ 'i', "int",              "test int",                i   };
+	options[1] = (cfa_option){ 'u', "unsigned",         "test unsigned",           u   };
+	options[2] = (cfa_option){ 'l', "unsignedlong",     "test unsigned long",      ul  };
+	options[3] = (cfa_option){ 'L', "unsignedlonglong", "test unsigned long long", ull };
+	options[4] = (cfa_option){ 'd', "double",           "test double",             d   };
 
 	char **left;
-	parse_args( options, options_cnt, "[OPTIONS]...\ntesting bool parameters", left);
+	parse_args( options, "[OPTIONS]...\ntesting bool parameters", left);
 
 	sout | "int                :" | i;
Index: tests/configs/usage.cfa
===================================================================
--- tests/configs/usage.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ tests/configs/usage.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -15,8 +15,8 @@
 //
 
+#include <fstream.hfa>
+#include "../meta/fork+exec.hfa"
 #include <parseargs.hfa>
-#include <fstream.hfa>
 
-#include "../meta/fork+exec.hfa"
 
 int main() {
@@ -25,6 +25,6 @@
 	sout | "No args, no errors";
 	if(pid_t child = strict_fork(); child == 0) {
-		cfa_option opts[0];
-		print_args_usage(1, fake_argv, opts, 0, "Test usage", false);
+		array( cfa_option, 0 ) opts;
+		print_args_usage(1, fake_argv, opts, "Test usage", false);
 	}
 	else {
@@ -35,6 +35,6 @@
 	sout | "No args, with errors";
 	if(pid_t child = strict_fork(); child == 0) {
-		cfa_option opts[0];
-		print_args_usage(1, fake_argv, opts, 0, "Test usage", true);
+		array( cfa_option, 0 ) opts;
+		print_args_usage(1, fake_argv, opts, "Test usage", true);
 	}
 	else {
@@ -46,10 +46,9 @@
 	if(pid_t child = strict_fork(); child == 0) {
 		int a, b, c;
-		cfa_option opts[] = {
-			{'a', "", "First arg", a },
-			{'b', "", "Second arg", b },
-			{'c', "", "Third arg", c },
-		};
-		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+		array( cfa_option, 3 ) opts;
+		opts[0] = (cfa_option){'a', "", "First arg", a };
+		opts[1] = (cfa_option){'b', "", "Second arg", b };
+		opts[2] = (cfa_option){'c', "", "Third arg", c };
+		print_args_usage(1, fake_argv, opts, "Test usage", false);
 	}
 	else {
@@ -61,10 +60,9 @@
 	if(pid_t child = strict_fork(); child == 0) {
 		int a, b, c;
-		cfa_option opts[] = {
-			{'\0', "AA", "First arg", a },
-			{'\0', "BB", "Second arg", b },
-			{'\0', "CC", "Third arg", c },
-		};
-		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+		array( cfa_option, 3 ) opts;
+		opts[0] = (cfa_option){'\0', "AA", "First arg", a };
+		opts[1] = (cfa_option){'\0', "BB", "Second arg", b };
+		opts[2] = (cfa_option){'\0', "CC", "Third arg", c };
+		print_args_usage(1, fake_argv, opts, "Test usage", false);
 	}
 	else {
@@ -76,10 +74,9 @@
 	if(pid_t child = strict_fork(); child == 0) {
 		int a, b, c;
-		cfa_option opts[] = {
-			{'a', "", "First arg", a },
-			{'b', "BBBB", "Second arg", b },
-			{'\0', "CC", "Third arg", c },
-		};
-		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+		array( cfa_option, 3 ) opts;
+		opts[0] = (cfa_option){'a', "", "First arg", a };
+		opts[1] = (cfa_option){'b', "BBBB", "Second arg", b };
+		opts[2] = (cfa_option){'\0', "CC", "Third arg", c };
+		print_args_usage(1, fake_argv, opts, "Test usage", false);
 	}
 	else {
@@ -91,10 +88,9 @@
 	if(pid_t child = strict_fork(); child == 0) {
 		int a, b, c;
-		cfa_option opts[] = {
-			{'a', "", "First arg", a },
-			{'b', "BBBB", "", b },
-			{'\0', "CC", "Third arg", c },
-		};
-		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+		array( cfa_option, 3 ) opts;
+		opts[0] = (cfa_option){'a', "", "First arg", a };
+		opts[1] = (cfa_option){'b', "BBBB", "", b };
+		opts[2] = (cfa_option){'\0', "CC", "Third arg", c };
+		print_args_usage(1, fake_argv, opts, "Test usage", false);
 	}
 	else {
@@ -106,10 +102,9 @@
 	if(pid_t child = strict_fork(); child == 0) {
 		int a, b, c;
-		cfa_option opts[] = {
-			{'a', "", "First arg\nThe description has multiple lines,\n...for some reason", a },
-			{'b', "BBBB", "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", b },
-			{'\0', "CC", "Third arg", c },
-		};
-		print_args_usage(1, fake_argv, opts, 3, "Test usage", false);
+		array( cfa_option, 3 ) opts;
+		opts[0] = (cfa_option){'a', "", "First arg\nThe description has multiple lines,\n...for some reason", a };
+		opts[1] = (cfa_option){'b', "BBBB", "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", b };
+		opts[2] = (cfa_option){'\0', "CC", "Third arg", c };
+		print_args_usage(1, fake_argv, opts, "Test usage", false);
 	}
 	else {
Index: tests/device/cpu.cfa
===================================================================
--- tests/device/cpu.cfa	(revision e874605d8b0f66ad6050e4d03825b57912afe8ac)
+++ tests/device/cpu.cfa	(revision 93d2219a70113e973cd8ff2d973d1fdec40ac805)
@@ -117,4 +117,5 @@
 unsigned find_idx() {
 	int idxs = count_cache_indexes();
+	if( 0 == idxs ) return 0;
 
 	unsigned found_level = 0;
@@ -179,5 +180,5 @@
 	unsigned idx = find_idx();
 	// For all procs check mapping is consistent
-	for(cpu_me; cpu_info.hthrd_count) {
+	if( idx > 0 ) for(cpu_me; cpu_info.hthrd_count) {
 		char buf_me[32];
 		size_t len_me = read_cpuidxinfo_into(cpu_me, idx, "shared_cpu_list", buf_me, 32);
