Index: src/Concurrency/Waitfor.cc
===================================================================
--- src/Concurrency/Waitfor.cc	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/Concurrency/Waitfor.cc	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -416,5 +416,5 @@
 				makeAccStatement( acceptables, index, "is_dtor", detectIsDtor( clause.target.function )                                    , indexer ),
 				makeAccStatement( acceptables, index, "func"   , new CastExpr( clause.target.function, fptr_t )                            , indexer ),
-				makeAccStatement( acceptables, index, "list"   , new VariableExpr( monitors )                                              , indexer ),
+				makeAccStatement( acceptables, index, "data"   , new VariableExpr( monitors )                                              , indexer ),
 				makeAccStatement( acceptables, index, "size"   , new ConstantExpr( Constant::from_ulong( clause.target.arguments.size() ) ), indexer ),
 				setter->clone()
Index: src/GenPoly/InstantiateGeneric.cc
===================================================================
--- src/GenPoly/InstantiateGeneric.cc	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/GenPoly/InstantiateGeneric.cc	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -210,5 +210,5 @@
 		PassVisitor<GenericInstantiator> instantiator;
 
-		mutateAll( translationUnit, fixer );
+		// mutateAll( translationUnit, fixer );
 		mutateAll( translationUnit, instantiator );
 	}
Index: src/benchmark/Makefile.am
===================================================================
--- src/benchmark/Makefile.am	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/benchmark/Makefile.am	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -94,5 +94,7 @@
 	ctxswitch-cfa_thread.run	\
 	ctxswitch-upp_coroutine.run	\
-	ctxswitch-upp_thread.run
+	ctxswitch-upp_thread.run	\
+	ctxswitch-goroutine.run		\
+	ctxswitch-java_thread.run
 
 ctxswitch-cfa_coroutine$(EXEEXT):
@@ -111,16 +113,30 @@
 	@@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 
+ctxswitch-goroutine$(EXEEXT):
+	@go build -o a.out ctxswitch/goroutine.go
+
+ctxswitch-java_thread$(EXEEXT):
+	@javac ctxswitch/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd ctxswitch && java JavaThread" >> a.out
+	@chmod a+x a.out
+
 ## =========================================================================================================
 mutex$(EXEEXT) :\
 	mutex-function.run	\
+	mutex-fetch_add.run	\
 	mutex-pthread_lock.run	\
 	mutex-upp.run		\
 	mutex-cfa1.run		\
 	mutex-cfa2.run		\
-	mutex-cfa4.run
+	mutex-cfa4.run		\
+	mutex-java_thread.run
 
 mutex-function$(EXEEXT):
 	@@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 
+mutex-fetch_add$(EXEEXT):
+	@@BACKEND_CC@ mutex/fetch_add.c   -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
 mutex-pthread_lock$(EXEEXT):
 	@@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
@@ -137,4 +153,10 @@
 mutex-cfa4$(EXEEXT):
 	@${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+mutex-java_thread$(EXEEXT):
+	@javac mutex/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd mutex && java JavaThread" >> a.out
+	@chmod a+x a.out
 
 ## =========================================================================================================
@@ -143,5 +165,6 @@
 	signal-cfa1.run		\
 	signal-cfa2.run		\
-	signal-cfa4.run
+	signal-cfa4.run		\
+	signal-java_thread.run
 
 signal-upp$(EXEEXT):
@@ -156,4 +179,11 @@
 signal-cfa4$(EXEEXT):
 	@${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+signal-java_thread$(EXEEXT):
+	@javac schedint/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd schedint && java JavaThread" >> a.out
+	@chmod a+x a.out
+
 
 ## =========================================================================================================
@@ -183,5 +213,7 @@
 	creation-cfa_thread.run			\
 	creation-upp_coroutine.run		\
-	creation-upp_thread.run
+	creation-upp_thread.run			\
+	creation-goroutine.run			\
+	creation-java_thread.run
 
 creation-cfa_coroutine$(EXEEXT):
@@ -202,4 +234,13 @@
 creation-pthread$(EXEEXT):
 	@@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+creation-goroutine$(EXEEXT):
+	@go build -o a.out creation/goroutine.go
+
+creation-java_thread$(EXEEXT):
+	@javac creation/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd creation && java JavaThread" >> a.out
+	@chmod a+x a.out
 
 ## =========================================================================================================
Index: src/benchmark/Makefile.in
===================================================================
--- src/benchmark/Makefile.in	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/benchmark/Makefile.in	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -507,5 +507,7 @@
 	ctxswitch-cfa_thread.run	\
 	ctxswitch-upp_coroutine.run	\
-	ctxswitch-upp_thread.run
+	ctxswitch-upp_thread.run	\
+	ctxswitch-goroutine.run		\
+	ctxswitch-java_thread.run
 
 ctxswitch-cfa_coroutine$(EXEEXT):
@@ -524,15 +526,29 @@
 	@@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 
+ctxswitch-goroutine$(EXEEXT):
+	@go build -o a.out ctxswitch/goroutine.go
+
+ctxswitch-java_thread$(EXEEXT):
+	@javac ctxswitch/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd ctxswitch && java JavaThread" >> a.out
+	@chmod a+x a.out
+
 mutex$(EXEEXT) :\
 	mutex-function.run	\
+	mutex-fetch_add.run	\
 	mutex-pthread_lock.run	\
 	mutex-upp.run		\
 	mutex-cfa1.run		\
 	mutex-cfa2.run		\
-	mutex-cfa4.run
+	mutex-cfa4.run		\
+	mutex-java_thread.run
 
 mutex-function$(EXEEXT):
 	@@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 
+mutex-fetch_add$(EXEEXT):
+	@@BACKEND_CC@ mutex/fetch_add.c   -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
 mutex-pthread_lock$(EXEEXT):
 	@@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
@@ -549,4 +565,10 @@
 mutex-cfa4$(EXEEXT):
 	@${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+mutex-java_thread$(EXEEXT):
+	@javac mutex/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd mutex && java JavaThread" >> a.out
+	@chmod a+x a.out
 
 signal$(EXEEXT) :\
@@ -567,4 +589,10 @@
 signal-cfa4$(EXEEXT):
 	@${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+signal-java_thread$(EXEEXT):
+	@javac schedint/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd schedint && java JavaThread" >> a.out
+	@chmod a+x a.out
 
 waitfor$(EXEEXT) :\
@@ -592,5 +620,7 @@
 	creation-cfa_thread.run			\
 	creation-upp_coroutine.run		\
-	creation-upp_thread.run
+	creation-upp_thread.run			\
+	creation-goroutine.run			\
+	creation-java_thread.run
 
 creation-cfa_coroutine$(EXEEXT):
@@ -611,4 +641,13 @@
 creation-pthread$(EXEEXT):
 	@@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+
+creation-goroutine$(EXEEXT):
+	@go build -o a.out creation/goroutine.go
+
+creation-java_thread$(EXEEXT):
+	@javac creation/JavaThread.java
+	@echo "#!/bin/sh" > a.out
+	@echo "cd creation && java JavaThread" >> a.out
+	@chmod a+x a.out
 
 compile$(EXEEXT) :\
Index: src/benchmark/bench.h
===================================================================
--- src/benchmark/bench.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/benchmark/bench.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -1,5 +1,5 @@
 #pragma once
 
-#if defined(__CFORALL__)
+#if defined(__cforall)
 extern "C" {
 #endif
@@ -8,5 +8,5 @@
 	#include <sys/times.h>					// times
 	#include <time.h>
-#if defined(__CFORALL__)
+#if defined(__cforall)
 }
 #endif
Index: src/benchmark/creation/JavaThread.java
===================================================================
--- src/benchmark/creation/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/creation/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,18 @@
+public class JavaThread {
+	public static class MyThread extends Thread {
+		@Override
+		public void run() {}
+	}
+
+	public static void main(String[] args) throws InterruptedException {
+		int NoOfTimes = 50000;
+		long start = System.nanoTime();
+		for(int i = 1; i <= NoOfTimes; i += 1) {
+			JavaThread.MyThread m = new JavaThread.MyThread();
+        		m.start();
+			m.join();
+		}
+		long end = System.nanoTime();
+		System.out.println( (end - start) / NoOfTimes);
+	}
+}
Index: src/benchmark/creation/goroutine.go
===================================================================
--- src/benchmark/creation/goroutine.go	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/creation/goroutine.go	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,27 @@
+package main
+
+import (
+    "fmt"
+    "time"
+)
+
+var shake chan bool = make( chan bool )
+
+func noop() {
+	shake <- true   // indicate completion
+}
+
+//=======================================
+// benchmark driver
+//=======================================
+
+func main() {
+	const NoOfTimes = 500000
+	start := time.Now()
+	for i := 1; i <= NoOfTimes; i += 1 {
+		go noop()		// creation
+	}
+	end := time.Now()
+	fmt.Printf("%d\n", end.Sub(start) / time.Duration(NoOfTimes))
+	<- shake
+}
Index: src/benchmark/ctxswitch/JavaThread.java
===================================================================
--- src/benchmark/ctxswitch/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/ctxswitch/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,11 @@
+public class JavaThread {
+	public static void main(String[] args) {
+		int NoOfTimes = 5000000;
+		long start = System.nanoTime();
+		for(int i = 1; i <= NoOfTimes; i += 1) {
+			Thread.yield();
+		}
+		long end = System.nanoTime();
+		System.out.println( (end - start) / NoOfTimes);
+	}
+}
Index: src/benchmark/ctxswitch/goroutine.go
===================================================================
--- src/benchmark/ctxswitch/goroutine.go	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/ctxswitch/goroutine.go	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,33 @@
+package main
+
+import (
+    "fmt"
+    "runtime"
+    "time"
+)
+
+//=======================================
+// time context switch
+//=======================================
+
+var shake chan bool = make( chan bool )
+
+func ContextSwitch(N int) {
+	start := time.Now()
+	for i := 1; i <= N; i += 1 {
+		runtime.Gosched()
+	}
+	end := time.Now()
+	fmt.Printf("%d\n", end.Sub(start) / time.Duration(N))
+	shake <- true   // indicate completion
+}
+
+//=======================================
+// benchmark driver
+//=======================================
+
+func main() {
+	const NoOfTimes = 10000000
+	go ContextSwitch( NoOfTimes )		// context switch
+	<- shake
+}
Index: src/benchmark/mutex/JavaThread.java
===================================================================
--- src/benchmark/mutex/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/mutex/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,14 @@
+public class JavaThread {
+	public synchronized void noop() {}
+
+	public static void main(String[] args) {
+		int NoOfTimes = 5000000;
+		JavaThread j = new JavaThread();
+		long start = System.nanoTime();
+		for(int i = 1; i <= NoOfTimes; i += 1) {
+			j.noop();
+		}
+		long end = System.nanoTime();
+		System.out.println( (end - start) / NoOfTimes);
+	}
+}
Index: src/benchmark/mutex/fetch_add.c
===================================================================
--- src/benchmark/mutex/fetch_add.c	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/mutex/fetch_add.c	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,22 @@
+#include <stdio.h>
+
+#include "bench.h"
+
+volatile int value;
+
+void __attribute__((noinline)) do_call() {
+	__atomic_add_fetch( &value, 1, __ATOMIC_SEQ_CST );
+	asm volatile ("");
+	__atomic_sub_fetch( &value, 1, __ATOMIC_SEQ_CST );
+}
+
+int main(int argc, char* argv[]) {
+	BENCH(
+		for (size_t i = 0; i < n; i++) {
+			do_call();
+		},
+		result
+	)
+
+	printf("%llu\n", result);
+}
Index: src/benchmark/schedint/JavaThread.java
===================================================================
--- src/benchmark/schedint/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
+++ src/benchmark/schedint/JavaThread.java	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -0,0 +1,42 @@
+class Monitor {
+	public static volatile Boolean go = false;
+}
+
+class Signaller extends Thread {
+	Monitor m;
+	Signaller(Monitor m) {
+		this.m = m;
+	}
+
+	public void run() {
+		Monitor.go = true;
+		while( Monitor.go ) {
+			synchronized(this.m) {
+				this.m.notify();
+			}
+		}
+	}
+}
+
+public class JavaThread {
+	public static void main(String[] args) throws InterruptedException {
+		int NoOfTimes = 50000;
+		Monitor m = new Monitor();
+		long start, end;
+		Signaller s = new Signaller(m);
+		synchronized(m) {
+			s.start();
+			while( !Monitor.go ) {
+				Thread.yield();
+			}
+			start = System.nanoTime();
+			for(int i = 1; i <= NoOfTimes; i += 1) {
+				m.wait();
+			}
+			end = System.nanoTime();
+		}
+		Monitor.go = false;
+		s.join();
+		System.out.println( (end - start) / NoOfTimes);
+	}
+}
Index: src/libcfa/Makefile.am
===================================================================
--- src/libcfa/Makefile.am	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/Makefile.am	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -100,4 +100,5 @@
 	math 				\
 	gmp 				\
+	bits/containers.h		\
 	bits/defs.h 		\
 	bits/locks.h 		\
Index: src/libcfa/Makefile.in
===================================================================
--- src/libcfa/Makefile.in	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/Makefile.in	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -264,6 +264,6 @@
 	containers/result containers/vector concurrency/coroutine \
 	concurrency/thread concurrency/kernel concurrency/monitor \
-	${shell echo stdhdr/*} math gmp bits/defs.h bits/locks.h \
-	concurrency/invoke.h libhdr.h libhdr/libalign.h \
+	${shell echo stdhdr/*} math gmp bits/containers.h bits/defs.h \
+	bits/locks.h concurrency/invoke.h libhdr.h libhdr/libalign.h \
 	libhdr/libdebug.h libhdr/libtools.h
 HEADERS = $(nobase_cfa_include_HEADERS)
@@ -437,4 +437,5 @@
 	math 				\
 	gmp 				\
+	bits/containers.h		\
 	bits/defs.h 		\
 	bits/locks.h 		\
Index: src/libcfa/bits/containers.h
===================================================================
--- src/libcfa/bits/containers.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/bits/containers.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -15,7 +15,52 @@
 #pragma once
 
-#include <stddef.h>
+#include "bits/defs.h"
+#include "libhdr.h"
 
-#include "libhdr.h"
+//-----------------------------------------------------------------------------
+// Array
+//-----------------------------------------------------------------------------
+
+#ifdef __cforall
+	forall(dtype T)
+#else
+	#define T void
+#endif
+struct __small_array {
+	T *           data;
+	__lock_size_t size;
+};
+#undef T
+
+#ifdef __cforall
+	#define __small_array_t(T) __small_array(T)
+#else
+	#define __small_array_t(T) struct __small_array
+#endif
+
+#ifdef __cforall
+	// forall(otype T | sized(T))
+	// static inline void ?{}(__small_array(T) & this) {}
+
+	forall(dtype T | sized(T))
+	static inline T& ?[?]( __small_array(T) & this, __lock_size_t idx) {
+		return ((typeof(this.data))this.data)[idx];
+	}
+
+	forall(dtype T | sized(T))
+	static inline T& ?[?]( const __small_array(T) & this, __lock_size_t idx) {
+		return ((typeof(this.data))this.data)[idx];
+	}
+
+	forall(dtype T | sized(T))
+	static inline T* begin( const __small_array(T) & this ) {
+		return ((typeof(this.data))this.data);
+	}
+
+	forall(dtype T | sized(T))
+	static inline T* end( const __small_array(T) & this ) {
+		return ((typeof(this.data))this.data) + this.size;
+	}
+#endif
 
 //-----------------------------------------------------------------------------
@@ -23,5 +68,5 @@
 //-----------------------------------------------------------------------------
 
-#ifdef __CFORALL__
+#ifdef __cforall
 	trait is_node(dtype T) {
 		T*& get_next( T& );
@@ -32,5 +77,5 @@
 // Stack
 //-----------------------------------------------------------------------------
-#ifdef __CFORALL__
+#ifdef __cforall
 	forall(dtype TYPE | is_node(TYPE))
 	#define T TYPE
@@ -41,6 +86,7 @@
 	T * top;
 };
+#undef T
 
-#ifdef __CFORALL__
+#ifdef __cforall
 #define __stack_t(T) __stack(T)
 #else
@@ -48,12 +94,12 @@
 #endif
 
-#ifdef __CFORALL__
+#ifdef __cforall
 	forall(dtype T | is_node(T))
-	void ?{}( __stack(T) & this ) {
-		this.top = NULL;
+	static inline void ?{}( __stack(T) & this ) {
+		(this.top){ NULL };
 	}
 
 	forall(dtype T | is_node(T) | sized(T))
-	void push( __stack(T) & this, T * val ) {
+	static inline void push( __stack(T) & this, T * val ) {
 		verify( !get_next( *val ) );
 		get_next( *val ) = this.top;
@@ -62,5 +108,5 @@
 
 	forall(dtype T | is_node(T) | sized(T))
-	T * pop( __stack(T) & this ) {
+	static inline T * pop( __stack(T) & this ) {
 		T * top = this.top;
 		if( top ) {
@@ -75,6 +121,6 @@
 // Queue
 //-----------------------------------------------------------------------------
-#ifdef __CFORALL__
-	forall(dtype T | is_node(T))
+#ifdef __cforall
+	forall(dtype TYPE | is_node(TYPE))
 	#define T TYPE
 #else
@@ -85,14 +131,21 @@
 	T ** tail;
 };
+#undef T
 
-#ifdef __CFORALL__
+#ifdef __cforall
+#define __queue_t(T) __queue(T)
+#else
+#define __queue_t(T) struct __queue
+#endif
+
+#ifdef __cforall
 	forall(dtype T | is_node(T))
-	void ?{}( __queue(T) & this ) {
-		this.head = NULL;
-		this.tail = &this.head;
+	static inline void ?{}( __queue(T) & this ) {
+		(this.head){ NULL };
+		(this.tail){ &this.head };
 	}
 
 	forall(dtype T | is_node(T) | sized(T))
-	void append( __queue(T) & this, T * val ) {
+	static inline void append( __queue(T) & this, T * val ) {
 		verify(this.tail != NULL);
 		*this.tail = val;
@@ -101,5 +154,5 @@
 
 	forall(dtype T | is_node(T) | sized(T))
-	T * pop_head( __queue(T) & this ) {
+	static inline T * pop_head( __queue(T) & this ) {
 		T * head = this.head;
 		if( head ) {
@@ -114,5 +167,5 @@
 
 	forall(dtype T | is_node(T) | sized(T))
-	T * remove( __queue(T) & this, T ** it ) {
+	static inline T * remove( __queue(T) & this, T ** it ) {
 		T * val = *it;
 		verify( val );
Index: src/libcfa/bits/defs.h
===================================================================
--- src/libcfa/bits/defs.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/bits/defs.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -17,4 +17,5 @@
 
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdint.h>
 
@@ -22,2 +23,11 @@
 #define likely  (x)    __builtin_expect(!!(x), 1)
 #define thread_local _Thread_local
+
+typedef void (*fptr_t)();
+typedef int_fast16_t __lock_size_t;
+
+#ifdef __cforall
+#define __cfa_anonymous_object
+#else
+#define __cfa_anonymous_object __cfa_anonymous_object
+#endif
Index: src/libcfa/bits/locks.h
===================================================================
--- src/libcfa/bits/locks.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/bits/locks.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -56,5 +56,5 @@
 } __ALIGN__;
 
-#ifdef __CFORALL__
+#ifdef __cforall
 	extern void yield( unsigned int );
 	extern thread_local struct thread_desc *    volatile this_thread;
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/concurrency/invoke.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -14,8 +14,9 @@
 //
 
+#include "bits/containers.h"
 #include "bits/defs.h"
 #include "bits/locks.h"
 
-#ifdef __CFORALL__
+#ifdef __cforall
 extern "C" {
 #endif
@@ -25,26 +26,8 @@
 #define _INVOKE_H_
 
-	typedef void (*fptr_t)();
-	typedef int_fast16_t __lock_size_t;
-
-	struct __thread_queue_t {
-		struct thread_desc * head;
-		struct thread_desc ** tail;
-	};
-
-	struct __condition_stack_t {
-		struct __condition_criterion_t * top;
-	};
-
-	#ifdef __CFORALL__
+	#ifdef __cforall
 	extern "Cforall" {
-		void ?{}( struct __thread_queue_t & );
-		void append( struct __thread_queue_t &, struct thread_desc * );
-		struct thread_desc * pop_head( struct __thread_queue_t & );
-		struct thread_desc * remove( struct __thread_queue_t &, struct thread_desc ** );
-
-		void ?{}( struct __condition_stack_t & );
-		void push( struct __condition_stack_t &, struct __condition_criterion_t * );
-		struct __condition_criterion_t * pop( struct __condition_stack_t & );
+		static inline struct thread_desc             * & get_next( struct thread_desc             & this );
+		static inline struct __condition_criterion_t * & get_next( struct __condition_criterion_t & this );
 	}
 	#endif
@@ -100,8 +83,5 @@
 
 		// list of acceptable functions, null if any
-		struct __acceptable_t * clauses;
-
-		// number of acceptable functions
-		__lock_size_t size;
+		__small_array_t(struct __acceptable_t) __cfa_anonymous_object;
 	};
 
@@ -114,8 +94,8 @@
 
 		// queue of threads that are blocked waiting for the monitor
-		struct __thread_queue_t entry_queue;
+		__queue_t(struct thread_desc) entry_queue;
 
 		// stack of conditions to run next once we exit the monitor
-		struct __condition_stack_t signal_stack;
+		__stack_t(struct __condition_criterion_t) signal_stack;
 
 		// monitor routines can be called recursively, we need to keep track of that
@@ -131,8 +111,5 @@
 	struct __monitor_group_t {
 		// currently held monitors
-		struct monitor_desc ** list;
-
-		// number of currently held monitors
-		__lock_size_t size;
+		__small_array_t(monitor_desc*) __cfa_anonymous_object;
 
 		// last function that acquired monitors
@@ -159,12 +136,26 @@
      };
 
-     #ifdef __CFORALL__
+     #ifdef __cforall
      extern "Cforall" {
-		static inline monitor_desc * ?[?]( const __monitor_group_t & this, ptrdiff_t index ) {
-			return this.list[index];
+		static inline thread_desc * & get_next( thread_desc & this ) {
+			return this.next;
+		}
+
+		static inline struct __condition_criterion_t * & get_next( struct __condition_criterion_t & this );
+
+		static inline void ?{}(__monitor_group_t & this) {
+			(this.data){NULL};
+			(this.size){0};
+			(this.func){NULL};
+		}
+
+		static inline void ?{}(__monitor_group_t & this, struct monitor_desc ** data, __lock_size_t size, fptr_t func) {
+			(this.data){data};
+			(this.size){size};
+			(this.func){func};
 		}
 
 		static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
-			if( (lhs.list != 0) != (rhs.list != 0) ) return false;
+			if( (lhs.data != 0) != (rhs.data != 0) ) return false;
 			if( lhs.size != rhs.size ) return false;
 			if( lhs.func != rhs.func ) return false;
@@ -177,4 +168,10 @@
 
 			return true;
+		}
+
+		static inline void ?=?(__monitor_group_t & lhs, const __monitor_group_t & rhs) {
+			lhs.data = rhs.data;
+			lhs.size = rhs.size;
+			lhs.func = rhs.func;
 		}
 	}
@@ -210,5 +207,5 @@
 #endif //_INVOKE_PRIVATE_H_
 #endif //! defined(__CFA_INVOKE_PRIVATE__)
-#ifdef __CFORALL__
+#ifdef __cforall
 }
 #endif
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/concurrency/kernel	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -26,20 +26,8 @@
 //-----------------------------------------------------------------------------
 // Locks
-// // Lock the spinlock, spin if already acquired
-// void lock      ( spinlock * DEBUG_CTX_PARAM2 );
-
-// // Lock the spinlock, yield repeatedly if already acquired
-// void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
-
-// // Lock the spinlock, return false if already acquired
-// bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
-
-// // Unlock the spinlock
-// void unlock    ( spinlock * );
-
 struct semaphore {
 	__spinlock_t lock;
 	int count;
-	__thread_queue_t waiting;
+	__queue_t(thread_desc) waiting;
 };
 
@@ -57,5 +45,5 @@
 
 	// Ready queue for threads
-	__thread_queue_t ready_queue;
+	__queue_t(thread_desc) ready_queue;
 
 	// Preemption rate on this cluster
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/concurrency/kernel.c	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -164,5 +164,5 @@
 
 void ?{}(cluster & this) {
-	( this.ready_queue ){};
+	(this.ready_queue){};
 	( this.ready_queue_lock ){};
 
@@ -611,65 +611,4 @@
 }
 
-//-----------------------------------------------------------------------------
-// Queues
-void ?{}( __thread_queue_t & this ) {
-	this.head = NULL;
-	this.tail = &this.head;
-}
-
-void append( __thread_queue_t & this, thread_desc * t ) {
-	verify(this.tail != NULL);
-	*this.tail = t;
-	this.tail = &t->next;
-}
-
-thread_desc * pop_head( __thread_queue_t & this ) {
-	thread_desc * head = this.head;
-	if( head ) {
-		this.head = head->next;
-		if( !head->next ) {
-			this.tail = &this.head;
-		}
-		head->next = NULL;
-	}
-	return head;
-}
-
-thread_desc * remove( __thread_queue_t & this, thread_desc ** it ) {
-	thread_desc * thrd = *it;
-	verify( thrd );
-
-	(*it) = thrd->next;
-
-	if( this.tail == &thrd->next ) {
-		this.tail = it;
-	}
-
-	thrd->next = NULL;
-
-	verify( (this.head == NULL) == (&this.head == this.tail) );
-	verify( *this.tail == NULL );
-	return thrd;
-}
-
-void ?{}( __condition_stack_t & this ) {
-	this.top = NULL;
-}
-
-void push( __condition_stack_t & this, __condition_criterion_t * t ) {
-	verify( !t->next );
-	t->next = this.top;
-	this.top = t;
-}
-
-__condition_criterion_t * pop( __condition_stack_t & this ) {
-	__condition_criterion_t * top = this.top;
-	if( top ) {
-		this.top = top->next;
-		top->next = NULL;
-	}
-	return top;
-}
-
 // Local Variables: //
 // mode: c //
Index: src/libcfa/concurrency/monitor
===================================================================
--- src/libcfa/concurrency/monitor	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/concurrency/monitor	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -34,5 +34,5 @@
 	this.recursion     = 0;
 	this.mask.accepted = NULL;
-	this.mask.clauses  = NULL;
+	this.mask.data     = NULL;
 	this.mask.size     = 0;
 	this.dtor_node     = NULL;
@@ -40,9 +40,7 @@
 
 struct monitor_guard_t {
-	monitor_desc ** m;
-	__lock_size_t   count;
-	monitor_desc ** prev_mntrs;
-	__lock_size_t   prev_count;
-	fptr_t          prev_func;
+	monitor_desc ** 	m;
+	__lock_size_t   	count;
+	__monitor_group_t prev;
 };
 
@@ -51,8 +49,6 @@
 
 struct monitor_dtor_guard_t {
-	monitor_desc * m;
-	monitor_desc ** prev_mntrs;
-	__lock_size_t   prev_count;
-	fptr_t          prev_func;
+	monitor_desc *    m;
+	__monitor_group_t prev;
 };
 
@@ -83,4 +79,8 @@
 };
 
+static inline __condition_criterion_t * & get_next( __condition_criterion_t & this ) {
+	return this.next;
+}
+
 struct __condition_node_t {
 	// Thread that needs to be woken when all criteria are met
@@ -100,8 +100,7 @@
 };
 
-struct __condition_blocked_queue_t {
-	__condition_node_t * head;
-	__condition_node_t ** tail;
-};
+static inline __condition_node_t * & get_next( __condition_node_t & this ) {
+	return this.next;
+}
 
 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info );
@@ -109,11 +108,7 @@
 void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner );
 
-void ?{}( __condition_blocked_queue_t & );
-void append( __condition_blocked_queue_t &, __condition_node_t * );
-__condition_node_t * pop_head( __condition_blocked_queue_t & );
-
 struct condition {
 	// Link list which contains the blocked threads as-well as the information needed to unblock them
-	__condition_blocked_queue_t blocked;
+	__queue_t(__condition_node_t) blocked;
 
 	// Array of monitor pointers (Monitors are NOT contiguous in memory)
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/concurrency/monitor.c	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -280,5 +280,5 @@
 static inline void enter( __monitor_group_t monitors ) {
 	for( __lock_size_t i = 0; i < monitors.size; i++) {
-		__enter_monitor_desc( monitors.list[i], monitors );
+		__enter_monitor_desc( monitors[i], monitors );
 	}
 }
@@ -303,8 +303,8 @@
 
 	// Save previous thread context
-	this.[prev_mntrs, prev_count, prev_func] = this_thread->monitors.[list, size, func];
+	this.prev = this_thread->monitors;
 
 	// Update thread context (needed for conditions)
-	this_thread->monitors.[list, size, func] = [m, count, func];
+	(this_thread->monitors){m, count, func};
 
 	// LIB_DEBUG_PRINT_SAFE("MGUARD : enter %d\n", count);
@@ -328,5 +328,5 @@
 
 	// Restore thread context
-	this_thread->monitors.[list, size, func] = this.[prev_mntrs, prev_count, prev_func];
+	this_thread->monitors = this.prev;
 }
 
@@ -338,8 +338,8 @@
 
 	// Save previous thread context
-	this.[prev_mntrs, prev_count, prev_func] = this_thread->monitors.[list, size, func];
+	this.prev = this_thread->monitors;
 
 	// Update thread context (needed for conditions)
-	this_thread->monitors.[list, size, func] = [m, 1, func];
+	(this_thread->monitors){m, 1, func};
 
 	__enter_monitor_dtor( this.m, func );
@@ -352,5 +352,5 @@
 
 	// Restore thread context
-	this_thread->monitors.[list, size, func] = this.[prev_mntrs, prev_count, prev_func];
+	this_thread->monitors = this.prev;
 }
 
@@ -437,6 +437,6 @@
 
 		for(int i = 0; i < this.monitor_count; i++) {
-			if ( this.monitors[i] != this_thrd->monitors.list[i] ) {
-				abortf( "Signal on condition %p made with different monitor, expected %p got %i", &this, this.monitors[i], this_thrd->monitors.list[i] );
+			if ( this.monitors[i] != this_thrd->monitors[i] ) {
+				abortf( "Signal on condition %p made with different monitor, expected %p got %i", &this, this.monitors[i], this_thrd->monitors[i] );
 			}
 		}
@@ -510,5 +510,5 @@
 		"Possible cause is not checking if the condition is empty before reading stored data."
 	);
-	return this.blocked.head->user_info;
+	return ((typeof(this.blocked.head))this.blocked.head)->user_info;
 }
 
@@ -554,9 +554,10 @@
 		if( next ) {
 			*mask.accepted = index;
-			if( mask.clauses[index].is_dtor ) {
+			__acceptable_t& accepted = mask[index];
+			if( accepted.is_dtor ) {
 				LIB_DEBUG_PRINT_BUFFER_LOCAL( "Kernel : dtor already there\n");
-				verifyf( mask.clauses[index].size == 1        , "ERROR: Accepted dtor has more than 1 mutex parameter." );
-
-				monitor_desc * mon2dtor = mask.clauses[index].list[0];
+				verifyf( accepted.size == 1,  "ERROR: Accepted dtor has more than 1 mutex parameter." );
+
+				monitor_desc * mon2dtor = accepted[0];
 				verifyf( mon2dtor->dtor_node, "ERROR: Accepted monitor has no dtor_node." );
 
@@ -596,5 +597,4 @@
 
 			LIB_DEBUG_PRINT_BUFFER_LOCAL( "Kernel : accepted %d\n", *mask.accepted);
-
 			return;
 		}
@@ -671,5 +671,5 @@
 static inline void reset_mask( monitor_desc * this ) {
 	this->mask.accepted = NULL;
-	this->mask.clauses = NULL;
+	this->mask.data = NULL;
 	this->mask.size = 0;
 }
@@ -697,5 +697,5 @@
 
 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & group ) {
-	__acceptable_t * it = this->mask.clauses; // Optim
+	__acceptable_t * it = this->mask.data; // Optim
 	__lock_size_t count = this->mask.size;
 
@@ -820,10 +820,10 @@
 	if( !this.monitors ) {
 		// LIB_DEBUG_PRINT_SAFE("Branding\n");
-		assertf( thrd->monitors.list != NULL, "No current monitor to brand condition %p", thrd->monitors.list );
+		assertf( thrd->monitors.data != NULL, "No current monitor to brand condition %p", thrd->monitors.data );
 		this.monitor_count = thrd->monitors.size;
 
 		this.monitors = (monitor_desc **)malloc( this.monitor_count * sizeof( *this.monitors ) );
 		for( int i = 0; i < this.monitor_count; i++ ) {
-			this.monitors[i] = thrd->monitors.list[i];
+			this.monitors[i] = thrd->monitors[i];
 		}
 	}
@@ -832,5 +832,5 @@
 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor_desc * monitors [], __lock_size_t count ) {
 
-	__thread_queue_t & entry_queue = monitors[0]->entry_queue;
+	__queue_t(thread_desc) & entry_queue = monitors[0]->entry_queue;
 
 	// For each thread in the entry-queue
@@ -841,6 +841,7 @@
 		// For each acceptable check if it matches
 		int i = 0;
-		__acceptable_t * end = mask.clauses + mask.size;
-		for( __acceptable_t * it = mask.clauses; it != end; it++, i++ ) {
+		__acceptable_t * end   = end  (mask);
+		__acceptable_t * begin = begin(mask);
+		for( __acceptable_t * it = begin; it != end; it++, i++ ) {
 			// Check if we have a match
 			if( *it == (*thrd_it)->monitors ) {
@@ -872,5 +873,6 @@
 	__lock_size_t max = 0;
 	for( __lock_size_t i = 0; i < mask.size; i++ ) {
-		max += mask.clauses[i].size;
+		__acceptable_t & accepted = mask[i];
+		max += accepted.size;
 	}
 	return max;
@@ -880,7 +882,8 @@
 	__lock_size_t size = 0;
 	for( __lock_size_t i = 0; i < mask.size; i++ ) {
-		__libcfa_small_sort( mask.clauses[i].list, mask.clauses[i].size );
-		for( __lock_size_t j = 0; j < mask.clauses[i].size; j++) {
-			insert_unique( storage, size, mask.clauses[i].list[j] );
+		__acceptable_t & accepted = mask[i];
+		__libcfa_small_sort( accepted.data, accepted.size );
+		for( __lock_size_t j = 0; j < accepted.size; j++) {
+			insert_unique( storage, size, accepted[j] );
 		}
 	}
@@ -888,27 +891,4 @@
 	__libcfa_small_sort( storage, size );
 	return size;
-}
-
-void ?{}( __condition_blocked_queue_t & this ) {
-	this.head = NULL;
-	this.tail = &this.head;
-}
-
-void append( __condition_blocked_queue_t & this, __condition_node_t * c ) {
-	verify(this.tail != NULL);
-	*this.tail = c;
-	this.tail = &c->next;
-}
-
-__condition_node_t * pop_head( __condition_blocked_queue_t & this ) {
-	__condition_node_t * head = this.head;
-	if( head ) {
-		this.head = head->next;
-		if( !head->next ) {
-			this.tail = &this.head;
-		}
-		head->next = NULL;
-	}
-	return head;
 }
 
Index: src/libcfa/exception.h
===================================================================
--- src/libcfa/exception.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/exception.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -17,5 +17,5 @@
 
 
-#ifdef __CFORALL__
+#ifdef __cforall
 extern "C" {
 #endif
@@ -68,5 +68,5 @@
 struct __cfaehm__cleanup_hook {};
 
-#ifdef __CFORALL__
+#ifdef __cforall
 }
 #endif
Index: src/libcfa/stdhdr/assert.h
===================================================================
--- src/libcfa/stdhdr/assert.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/stdhdr/assert.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -4,7 +4,7 @@
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
-// 
-// assert.h -- 
-// 
+//
+// assert.h --
+//
 // Author           : Peter A. Buhr
 // Created On       : Mon Jul  4 23:25:26 2016
@@ -12,9 +12,9 @@
 // Last Modified On : Mon Jul 31 23:09:32 2017
 // Update Count     : 13
-// 
+//
 
-#ifdef __CFORALL__
+#ifdef __cforall
 extern "C" {
-#endif //__CFORALL__
+#endif //__cforall
 
 #include_next <assert.h>
@@ -30,7 +30,7 @@
 #endif
 
-#ifdef __CFORALL__
+#ifdef __cforall
 } // extern "C"
-#endif //__CFORALL__
+#endif //__cforall
 
 // Local Variables: //
Index: src/libcfa/virtual.h
===================================================================
--- src/libcfa/virtual.h	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/libcfa/virtual.h	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -16,5 +16,5 @@
 #pragma once
 
-#ifdef __CFORALL__
+#ifdef __cforall
 extern "C" {
 #endif
@@ -35,5 +35,5 @@
 		struct __cfa__parent_vtable const * const * child );
 
-#ifdef __CFORALL__
+#ifdef __cforall
 }
 #endif
Index: src/tests/designations.c
===================================================================
--- src/tests/designations.c	(revision 8eb348a347301e6734e6bbd0827acd993a3db4f7)
+++ src/tests/designations.c	(revision 8a0a64d9a29f461d2e53a950f0b7b400ab1cfeaa)
@@ -17,5 +17,5 @@
 // In particular, since the syntax for designations in Cforall differs from that of C, preprocessor substitution
 // is used for the designation syntax
-#ifdef __CFORALL__
+#ifdef __cforall
 #define DES :
 #else
