Index: .gitignore
===================================================================
--- .gitignore	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ .gitignore	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,4 +1,5 @@
 # build files
 *.[ao]
+*.pyc
 
 # generated by configure
Index: doc/generic_types/evaluation/Makefile
===================================================================
--- doc/generic_types/evaluation/Makefile	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/Makefile	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,2 +1,3 @@
+CC = gcc
 CFA = cfa
 DEPFLAGS = -MMD -MP
@@ -6,8 +7,9 @@
 endif
 CXXFLAGS = $(CFLAGS) --std=c++14
+MAKEFILE_NAME = ${firstword ${MAKEFILE_LIST}}
 
-.PHONY: all clean distclean run-c run-cpp run-cfa run
+.PHONY : all clean run-c run-cpp run-cfa run
 
-all: c-bench cpp-bench cfa-bench cpp-vbench
+all : c-bench cpp-bench cpp-vbench cfa-bench
 
 # rewrite object generation to auto-determine deps
@@ -17,94 +19,79 @@
 
 c-%.o : c-%.c
-c-%.o : c-%.c c-%.d
 	$(COMPILE.c) $(OUTPUT_OPTION) -c $<
 
 cpp-%.o : cpp-%.cpp
-cpp-%.o : cpp-%.cpp cpp-%.d
 	$(COMPILE.cpp) $(OUTPUT_OPTION) -c $<
 
 cfa-%.o : cfa-%.c
-cfa-%.o : cfa-%.c cfa-%.d
 	$(COMPILE.cfa) $(OUTPUT_OPTION) -c $<
 
-COBJS = c-stack.o c-pair.o c-print.o
-CPPOBJS = 
-CPPVOBJS = cpp-vstack.o
-CFAOBJS = cfa-stack.o cfa-pair.o cfa-print.o
+COBJS = c-stack.o c-pair.o c-print.o c-bench.o
+CPPOBJS = cpp-bench.o
+CPPVOBJS = cpp-vstack.o cpp-vbench.o
+CFAOBJS = cfa-stack.o cfa-pair.o cfa-print.o cfa-bench.o
 
-CFILES = c-bench.c bench.h $(COBJS:.o=.h) $(COBJS:.o=.c)
-CPPFILES = cpp-bench.cpp bench.hpp cpp-stack.hpp cpp-pair.hpp cpp-print.hpp
-CPPVFILES = cpp-vbench.cpp bench.hpp object.hpp $(CPPVOBJS:.o=.hpp) $(CPPVOBJS:.o=.cpp) cpp-vprint.hpp
-CFAFILES = cfa-bench.c bench.h $(CFAOBJS:.o=.h) $(CFAOBJS:.o=.c)
+${COBJS} ${CPPOBJS} ${CPPVOBJS} ${CFAOBJS} : ${MAKEFILE_NAME}
 
-c-bench: c-bench.c c-bench.d $(COBJS)
-	$(COMPILE.c) -o $@ $< $(COBJS) $(LDFLAGS)
+CFILES = bench.h $(patsubst c-bench.h,,$(COBJS:.o=.h)) $(COBJS:.o=.c)
+CPPFILES = bench.hpp cpp-stack.hpp cpp-pair.hpp cpp-print.hpp $(CPPOBJS:.o=.cpp)
+CPPVFILES = bench.hpp object.hpp cpp-vprint.hpp $(patsubst cpp-vbench.hpp,,$(CPPVOBJS:.o=.hpp)) $(CPPVOBJS:.o=.cpp)
+CFAFILES = bench.h $(patsubst cfa-bench.h,,$(CFAOBJS:.o=.h)) $(CFAOBJS:.o=.c)
 
-cpp-bench: cpp-bench.cpp cpp-bench.d $(CPPOBJS)
-	$(COMPILE.cpp) -o $@ $< $(CPPOBJS) $(LDFLAGS)
+c-bench : $(COBJS) c-bench.o
+	$(COMPILE.c) $(LDFLAGS) $^ -o $@
 
-cpp-vbench: cpp-vbench.cpp cpp-vbench.d $(CPPVOBJS)
-	$(COMPILE.cpp) -o $@ $< $(CPPVOBJS) $(LDFLAGS)
+cpp-bench : $(CPPOBJS) cpp-bench.o
+	$(COMPILE.cpp) $(LDFLAGS) $^ -o $@
 
-cfa-bench: cfa-bench.c cfa-bench.d $(CFAOBJS)
-	$(COMPILE.cfa) -o $@ $< $(CFAOBJS) $(LDFLAGS)
+cpp-vbench : $(CPPVOBJS) cpp-vbench.o
+	$(COMPILE.cpp) $(LDFLAGS) $^ -o $@
 
-clean:
-	-rm $(COBJS) c-bench
-	-rm $(CPPOBJS) cpp-bench
-	-rm $(CPPVOBJS) cpp-vbench
-	-rm $(CFAOBJS) cfa-bench
+cfa-bench : $(CFAOBJS) cfa-bench.o
+	$(COMPILE.cfa) $(LDFLAGS) $^ -o $@
 
-distclean: clean
-	-rm $(COBJS:.o=.d) c-bench.d
-	-rm $(CPPOBJS:.o=.d) cpp-bench.d
-	-rm $(CPPVOBJS:.o=.d) cpp-vbench.d
-	-rm $(CFAOBJS:.o=.d) cfa-bench.d
+# include dependency files
+-include $(COBJS:.o=.d)
+-include $(CPPOBJS:.o=.d)
+-include $(CPPVOBJS:.o=.d)
+-include $(CFAOBJS:.o=.d)
 
-run-c: c-bench
+clean :
+	rm -f $(COBJS) $(COBJS:.o=.d) c-bench
+	rm -f $(CPPOBJS) $(CPPOBJS:.o=.d) cpp-bench
+	rm -f $(CPPVOBJS) $(CPPVOBJS:.o=.d) cpp-vbench
+	rm -f $(CFAOBJS) $(CFAOBJS:.o=.d) cfa-bench
+
+run-c : c-bench
 	@echo
 	@echo '## C ##'
-	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./c-bench
+	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./$<
 	@printf 'source_size:\t%8d lines\n' `cat $(CFILES) | wc -l`
 	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s c-bench`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
 
-run-cfa: cfa-bench
+run-cpp : cpp-bench
+	@echo
+	@echo '## C++ ##'
+	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./$<
+	@printf 'source_size:\t%8d lines\n' `cat $(CPPFILES) | wc -l`
+	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPFILES) | fgrep '/***/' -c`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
+
+run-cppv : cpp-vbench
+	@echo
+	@echo '## C++obj ##'
+	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./$<
+	@printf 'source_size:\t%8d lines\n' `cat $(CPPVFILES) | wc -l`
+	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPVFILES) | fgrep '/***/' -c`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
+
+run-cfa : cfa-bench
 	@echo
 	@echo '## Cforall ##'
-	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./cfa-bench
+	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./$<
 	@printf 'source_size:\t%8d lines\n' `cat $(CFAFILES) | wc -l`
 	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CFAFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s cfa-bench`
+	@printf 'binary_size:\t%8d bytes\n' `stat -c %s $<`
 
-run-cpp: cpp-bench
-	@echo
-	@echo '## C++ ##'
-	@/usr/bin/time -f 'max_memory:\t %M kilobytes' ./cpp-bench
-	@printf 'source_size:\t%8d lines\n' `cat $(CPPFILES) | wc -l`
-	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s cpp-bench`
-
-run-cppv: cpp-vbench
-	@echo
-	@echo '## C++obj ##'
-	@/usr/bin/time -f 'max_memory:\t%M kilobytes' ./cpp-vbench
-	@printf 'source_size:\t%8d lines\n' `cat $(CPPVFILES) | wc -l`
-	@printf 'redundant_type_annotations:%8d lines\n' `cat $(CPPVFILES) | fgrep '/***/' -c`
-	@printf 'binary_size:\t%8d bytes\n' `stat -c %s cpp-vbench`
-
-run: run-c run-cfa run-cpp run-cppv
-
-# so make doesn't fail without dependency files
-%.d: ;
-
-# so make won't delete dependency files
-.PRECIOUS: %.d
-
-# include dependency files
--include: $(COBJS:.o=.d)
--include: $(CPPOBJS:.o=.d)
--include: $(CFAOBJS:.o=.d)
--include: c-bench.d
--include: cpp-bench.d
--include: cfa-bench.d
+run : run-c run-cfa run-cpp run-cppv
Index: doc/generic_types/evaluation/c-bench.c
===================================================================
--- doc/generic_types/evaluation/c-bench.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/c-bench.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -39,5 +39,5 @@
 
 int main(int argc, char** argv) {
-	FILE * out = fopen("c-out.txt", "w");
+	FILE * out = fopen("/dev/null", "w");
 	int maxi = 0, vali = 42;
 	struct stack si = new_stack(), ti;
Index: doc/generic_types/evaluation/c-stack.c
===================================================================
--- doc/generic_types/evaluation/c-stack.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/c-stack.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -11,10 +11,8 @@
 void copy_stack(struct stack* s, const struct stack* t, void* (*copy)(const void*)) {
 	struct stack_node** crnt = &s->head;
-	struct stack_node* next = t->head;
-	while ( next ) {
+	for ( struct stack_node* next = t->head; next; next = next->next ) {
 		*crnt = malloc(sizeof(struct stack_node)); /***/
 		**crnt = (struct stack_node){ copy(next->value) }; /***/
 		crnt = &(*crnt)->next;
-		next = next->next;
 	}
 	*crnt = 0;
@@ -22,6 +20,5 @@
 
 void clear_stack(struct stack* s, void (*free_el)(void*)) {
-	struct stack_node* next = s->head;
-	while ( next ) {
+    for ( struct stack_node* next = s->head; next; ) {
 		struct stack_node* crnt = next;
 		next = crnt->next;
Index: doc/generic_types/evaluation/cfa-bench.c
===================================================================
--- doc/generic_types/evaluation/cfa-bench.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/cfa-bench.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -6,5 +6,5 @@
 
 int main( int argc, char *argv[] ) {
-	FILE * out = fopen( "cfa-out.txt", "w" );
+	FILE * out = fopen( "/dev/null", "w" );
 	int maxi = 0, vali = 42;
 	stack(int) si, ti;
Index: doc/generic_types/evaluation/cfa-stack.c
===================================================================
--- doc/generic_types/evaluation/cfa-stack.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/cfa-stack.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -11,10 +11,8 @@
 forall(otype T) void ?{}(stack(T)* s, stack(T) t) {
 	stack_node(T)** crnt = &s->head;
-	stack_node(T)* next = t.head;
-	while ( next ) {
+	for ( stack_node(T)* next = t.head; next; next = next->next ) {
 		*crnt = ((stack_node(T)*)malloc()){ next->value }; /***/
 		stack_node(T)* acrnt = *crnt;
 		crnt = &acrnt->next;
-		next = next->next;
 	}
 	*crnt = 0;
@@ -46,6 +44,5 @@
 
 forall(otype T) void clear(stack(T)* s) {
-	stack_node(T)* next = s->head;
-	while ( next ) {
+    for ( stack_node(T)* next = s->head; next; ) {
 		stack_node(T)* crnt = next;
 		next = crnt->next;
Index: doc/generic_types/evaluation/cpp-bench.cpp
===================================================================
--- doc/generic_types/evaluation/cpp-bench.cpp	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/cpp-bench.cpp	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -7,5 +7,5 @@
 
 int main(int argc, char** argv) {
-	std::ofstream out{"cpp-out.txt"};
+	std::ofstream out{"/dev/null"};
 	int maxi = 0, vali = 42;
 	stack<int> si, ti;
Index: doc/generic_types/evaluation/cpp-stack.hpp
===================================================================
--- doc/generic_types/evaluation/cpp-stack.hpp	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/cpp-stack.hpp	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -13,9 +13,7 @@
 	void copy(const stack<T>& o) {
 		node** crnt = &head;
-		node* next = o.head;
-		while ( next ) {
+		for ( node* next = o.head; next; next = next->next ) {
 			*crnt = new node{ next->value }; /***/
 			crnt = &(*crnt)->next;
-			next = next->next;
 		}
 		*crnt = nullptr;
@@ -23,6 +21,5 @@
 public:
 	void clear() {
-		node* next = head;
-		while ( next ) {
+	    for ( node* next = head; next; ) {
 			node* crnt = next;
 			next = crnt->next;
Index: doc/generic_types/evaluation/cpp-vbench.cpp
===================================================================
--- doc/generic_types/evaluation/cpp-vbench.cpp	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/cpp-vbench.cpp	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -7,5 +7,5 @@
 
 int main(int argc, char** argv) {
-	std::ofstream out{"cpp-vout.txt"};
+	std::ofstream out{"/dev/null"};
 	integer maxi{ 0 }, vali{ 42 };
 	stack si, ti;
Index: doc/generic_types/evaluation/cpp-vstack.cpp
===================================================================
--- doc/generic_types/evaluation/cpp-vstack.cpp	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/evaluation/cpp-vstack.cpp	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -6,9 +6,7 @@
 void stack::copy(const stack& o) {
 	node** crnt = &head;
-	node* next = o.head;
-	while ( next ) {
+	for ( node* next = o.head; next; next = next->next ) {
 		*crnt = new node{ *next->value };
 		crnt = &(*crnt)->next;
-		next = next->next;
 	}
 	*crnt = nullptr;
@@ -35,6 +33,5 @@
 
 void stack::clear() {
-	node* next = head;
-	while ( next ) {
+    for ( node* next = head; next; ) {
 		node* crnt = next;
 		next = crnt->next;
Index: doc/generic_types/generic_types.tex
===================================================================
--- doc/generic_types/generic_types.tex	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ doc/generic_types/generic_types.tex	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -232,5 +232,5 @@
 int comp( const void * t1, const void * t2 ) { return *(double *)t1 < *(double *)t2 ? -1 :
 				*(double *)t2 < *(double *)t1 ? 1 : 0; }
-double key = 5.0, vals[10] = { /* 10 floating-point values */ };
+double key = 5.0, vals[10] = { /* 10 sorted floating-point values */ };
 double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp );	$\C{// search sorted array}$
 \end{lstlisting}
@@ -354,5 +354,5 @@
 One of the known shortcomings of standard C is that it does not provide reusable type-safe abstractions for generic data structures and algorithms.
 Broadly speaking, there are three approaches to implement abstract data-structures in C.
-One approach is to write bespoke data structures for each context in which they are needed.
+One approach is to write bespoke data-structures for each context in which they are needed.
 While this approach is flexible and supports integration with the C type-checker and tooling, it is also tedious and error-prone, especially for more complex data structures.
 A second approach is to use @void *@--based polymorphism, \eg the C standard-library functions @bsearch@ and @qsort@; an approach which does allow reuse of code for common functionality.
@@ -542,5 +542,5 @@
 \end{lstlisting}
 where the tuple variable-name serves the same purpose as the parameter name(s).
-Tuple variables can be composed of any types, except for array types, since array sizes are generally unknown.
+Tuple variables can be composed of any types, except for array types, since array sizes are generally unknown in C.
 
 One way to access the tuple-variable components is with assignment or composition:
@@ -552,9 +552,9 @@
 \begin{lstlisting}
 [int, int] * p = &qr;						$\C{// tuple pointer}$
-int rem = qr.1;								$\C{// access remainder}$
-int quo = div( 13, 5 ).0;					$\C{// access quotient}$
-p->0 = 5;									$\C{// change quotient}$
-bar( qr.1, qr );							$\C{// pass remainder and quotient/remainder}$
-rem = [42, div( 13, 5 )].0.1;				$\C{// access 2nd component of 1st component of tuple expression}$
+int rem = qr`.1`;							$\C{// access remainder}$
+int quo = div( 13, 5 )`.0`;					$\C{// access quotient}$
+p`->0` = 5;									$\C{// change quotient}$
+bar( qr`.1`, qr );							$\C{// pass remainder and quotient/remainder}$
+rem = [42, div( 13, 5 )]`.0.1`;				$\C{// access 2nd component of 1st component of tuple expression}$
 \end{lstlisting}
 
@@ -616,5 +616,5 @@
 This semantics means mass assignment differs from C cascading assignment (\eg @a = b = c@) in that conversions are applied in each individual assignment, which prevents data loss from the chain of conversions that can happen during a cascading assignment.
 For example, @[y, x] = 3.14@ performs the assignments @y = 3.14@ and @x = 3.14@, yielding @y == 3.14@ and @x == 3@;
-whereas C cascading assignment @y = x = 3.14@ performs the assignments @x = 3.14@ and @y = x@, yielding @3@ in @y@ and @x@.
+whereas, C cascading assignment @y = x = 3.14@ performs the assignments @x = 3.14@ and @y = x@, yielding @3@ in @y@ and @x@.
 Finally, tuple assignment is an expression where the result type is the type of the left-hand side of the assignment, just like all other assignment expressions in C.
 This example shows mass, multiple, and cascading assignment used in one expression:
@@ -742,5 +742,5 @@
 \end{lstlisting}
 Hence, function parameter and return lists are flattened for the purposes of type unification allowing the example to pass expression resolution.
-This relaxation is possible by extending the thunk scheme described by \citet{Bilson03}.
+This relaxation is possible by extending the thunk scheme described by~\citet{Bilson03}.
 Whenever a candidate's parameter structure does not exactly match the formal parameter's structure, a thunk is generated to specialize calls to the actual function:
 \begin{lstlisting}
@@ -748,5 +748,5 @@
 \end{lstlisting}
 so the thunk provides flattening and structuring conversions to inferred functions, improving the compatibility of tuples and polymorphism.
-These thunks take advantage of GCC C nested-functions to produce closures that have the usual function pointer signature.
+These thunks take advantage of GCC C nested-functions to produce closures that have the usual function-pointer signature.
 
 
@@ -829,5 +829,5 @@
 \subsection{Implementation}
 
-Tuples are implemented in the \CFA translator via a transformation into generic types.
+Tuples are implemented in the \CFA translator via a transformation into \emph{generic types}.
 For each $N$, the first time an $N$-tuple is seen in a scope a generic type with $N$ type parameters is generated, \eg:
 \begin{lstlisting}
@@ -1086,5 +1086,5 @@
 Finally, we demonstrate that \CFA performance for some idiomatic cases is better than C and close to \CC, showing the design is practically applicable.
 
-There is ongoing work on a wide range of \CFA feature extensions, including reference types, exceptions, concurrent primitives and modules.
+There is ongoing work on a wide range of \CFA feature extensions, including reference types, arrays with size, exceptions, concurrent primitives and modules.
 (While all examples in the paper compile and run, a public beta-release of \CFA will take another 8--12 months to finalize these additional extensions.)
 In addition, there are interesting future directions for the polymorphism design.
@@ -1092,5 +1092,5 @@
 \CFA polymorphic functions use dynamic virtual-dispatch; 
 the runtime overhead of this approach is low, but not as low as inlining, and it may be beneficial to provide a mechanism for performance-sensitive code.
-Two promising approaches are an @inline@ annotation at polymorphic function call sites to create a template-specialization of the function (provided the code is visible) or placing an @inline@ annotation on polymorphic function-definitions to instantiate a specialized version for some set of types.
+Two promising approaches are an @inline@ annotation at polymorphic function call sites to create a template-specialization of the function (provided the code is visible) or placing an @inline@ annotation on polymorphic function-definitions to instantiate a specialized version for some set of types (\CC template specialization).
 These approaches are not mutually exclusive and allow performance optimizations to be applied only when necessary, without suffering global code-bloat.
 In general, we believe separate compilation, producing smaller code, works well with loaded hardware-caches, which may offset the benefit of larger inlined-code.
@@ -1117,5 +1117,5 @@
 Throughout, @/***/@ designates a counted redundant type annotation.
 
-\medskip\noindent
+\smallskip\noindent
 \CFA
 \begin{lstlisting}[xleftmargin=2\parindentlnth,aboveskip=0pt,belowskip=0pt]
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/concurrency/invoke.h	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -38,6 +38,6 @@
       };
 
-      struct __thread_stack_t {
-            struct thread_desc * top;
+      struct __condition_stack_t {
+            struct __condition_criterion_t * top;
       };
 
@@ -48,7 +48,7 @@
             struct thread_desc * pop_head( struct __thread_queue_t * );
 
-            void ?{}( struct __thread_stack_t * );
-            void push( struct __thread_stack_t *, struct thread_desc * );            
-            struct thread_desc * pop( struct __thread_stack_t * );
+            void ?{}( struct __condition_stack_t * );
+            void push( struct __condition_stack_t *, struct __condition_criterion_t * );
+            struct __condition_criterion_t * pop( struct __condition_stack_t * );
 
             void ?{}(spinlock * this);
@@ -82,5 +82,5 @@
             struct thread_desc * owner;               // current owner of the monitor
             struct __thread_queue_t entry_queue;      // queue of threads that are blocked waiting for the monitor
-            struct __thread_stack_t signal_stack;     // stack of threads to run next once we exit the monitor
+            struct __condition_stack_t signal_stack;  // stack of conditions to run next once we exit the monitor
             struct monitor_desc * stack_owner;        // if bulk acquiring was used we need to synchronize signals with an other monitor
             unsigned int recursion;                   // monitor routines can be called recursively, we need to keep track of that
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/concurrency/kernel	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -55,9 +55,15 @@
 //-----------------------------------------------------------------------------
 // Processor
-enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule };
+enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule };
+
+//TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
 struct FinishAction {
 	FinishOpCode action_code;
 	thread_desc * thrd;
 	spinlock * lock;
+	spinlock ** locks;
+	unsigned short lock_count;
+	thread_desc ** thrds;
+	unsigned short thrd_count;
 };
 static inline void ?{}(FinishAction * this) { 
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/concurrency/kernel.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -235,4 +235,17 @@
 		ScheduleThread( this->finish.thrd );
 	}
+	else if( this->finish.action_code == Release_Multi ) {
+		for(int i = 0; i < this->finish.lock_count; i++) {
+			unlock( this->finish.locks[i] );
+		}
+	}
+	else if( this->finish.action_code == Release_Multi_Schedule ) {
+		for(int i = 0; i < this->finish.lock_count; i++) {
+			unlock( this->finish.locks[i] );
+		}
+		for(int i = 0; i < this->finish.thrd_count; i++) {
+			ScheduleThread( this->finish.thrds[i] );
+		}
+	}
 	else {
 		assert(this->finish.action_code == No_Action);
@@ -335,4 +348,20 @@
 	this_processor->finish.lock = lock;
 	this_processor->finish.thrd = thrd;
+	suspend();
+}
+
+void ScheduleInternal(spinlock ** locks, unsigned short count) {
+	this_processor->finish.action_code = Release_Multi;
+	this_processor->finish.locks = locks;
+	this_processor->finish.lock_count = count;
+	suspend();
+}
+
+void ScheduleInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
+	this_processor->finish.action_code = Release_Multi_Schedule;
+	this_processor->finish.locks = locks;
+	this_processor->finish.lock_count = lock_count;
+	this_processor->finish.thrds = thrds;
+	this_processor->finish.thrd_count = thrd_count;
 	suspend();
 }
@@ -529,16 +558,16 @@
 }
 
-void ?{}( __thread_stack_t * this ) {
+void ?{}( __condition_stack_t * this ) {
 	this->top = NULL;
 }
 
-void push( __thread_stack_t * this, thread_desc * t ) {
-	assert(t->next != NULL);
+void push( __condition_stack_t * this, __condition_criterion_t * t ) {
+	assert( !t->next );
 	t->next = this->top;
 	this->top = t;
 }
 
-thread_desc * pop( __thread_stack_t * this ) {
-	thread_desc * top = this->top;
+__condition_criterion_t * pop( __condition_stack_t * this ) {
+	__condition_criterion_t * top = this->top;
 	if( top ) {
 		this->top = top->next;
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/concurrency/kernel_private.h	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -26,8 +26,10 @@
 thread_desc * nextThread(cluster * this);
 
-void ScheduleInternal();
+void ScheduleInternal(void);
 void ScheduleInternal(spinlock * lock);
 void ScheduleInternal(thread_desc * thrd);
 void ScheduleInternal(spinlock * lock, thread_desc * thrd);
+void ScheduleInternal(spinlock ** locks, unsigned short count);
+void ScheduleInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/monitor
===================================================================
--- src/libcfa/concurrency/monitor	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/concurrency/monitor	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -46,8 +46,32 @@
 //-----------------------------------------------------------------------------
 // Internal scheduling
+
+struct __condition_criterion_t {
+	bool ready;						//Whether or not the criterion is met (True if met)
+	monitor_desc * target;				//The monitor this criterion concerns
+	struct __condition_node_t * owner;		//The parent node to which this criterion belongs
+	__condition_criterion_t * next;		//Intrusive linked list Next field
+};
+
+struct __condition_node_t {
+	thread_desc * waiting_thread;			//Thread that needs to be woken when all criteria are met
+	__condition_criterion_t * criteria; 	//Array of criteria (Criterions are contiguous in memory)
+	unsigned short count;				//Number of criterions in the criteria
+	__condition_node_t * next;			//Intrusive linked list Next field
+};
+
+struct __condition_blocked_queue_t {
+	__condition_node_t * head;
+	__condition_node_t ** tail;
+};
+
+void ?{}( __condition_blocked_queue_t * );
+void append( __condition_blocked_queue_t *, __condition_node_t * );
+__condition_node_t * pop_head( __condition_blocked_queue_t * );
+
 struct condition {
-	__thread_queue_t blocked;
-	monitor_desc ** monitors;
-	unsigned short monitor_count;
+	__condition_blocked_queue_t blocked;	//Link list which contains the blocked threads as-well as the information needed to unblock them
+	monitor_desc ** monitors;			//Array of monitor pointers (Monitors are NOT contiguous in memory)
+	unsigned short monitor_count;			//Number of monitors in the array
 };
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/concurrency/monitor.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -20,21 +20,31 @@
 #include "libhdr.h"
 
-void set_owner( monitor_desc * this, thread_desc * owner ) {
-	//Pass the monitor appropriately
-	this->owner = owner;
-
-	//We are passing the monitor to someone else, which means recursion level is not 0
-	this->recursion = owner ? 1 : 0;
-}
+//-----------------------------------------------------------------------------
+// Forward declarations
+static inline void set_owner( monitor_desc * this, thread_desc * owner );
+static inline thread_desc * next_thread( monitor_desc * this );
+
+static inline void lock_all( spinlock ** locks, unsigned short count );
+static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count );
+static inline void unlock_all( spinlock ** locks, unsigned short count );
+static inline void unlock_all( monitor_desc ** locks, unsigned short count );
+
+static inline void save_recursion   ( monitor_desc ** ctx, unsigned int * /*out*/ recursions, unsigned short count );
+static inline void restore_recursion( monitor_desc ** ctx, unsigned int * /*in */ recursions, unsigned short count );
+
+static inline thread_desc * check_condition( __condition_criterion_t * );
+static inline void brand_condition( condition * );
+static inline unsigned short insert_unique( thread_desc ** thrds, unsigned short end, thread_desc * val );
+
+//-----------------------------------------------------------------------------
+// Enter/Leave routines
+
 
 extern "C" {
-	void __enter_monitor_desc(monitor_desc * this, monitor_desc * leader) {
+	void __enter_monitor_desc(monitor_desc * this) {
 		lock( &this->lock );
 		thread_desc * thrd = this_thread();
 
-		// //Update the stack owner
-		// this->stack_owner = leader;
-
-		LIB_DEBUG_PRINT_SAFE("Entering %p (o: %p, r: %i)\n", this, this->owner, this->recursion);
+		LIB_DEBUG_PRINT_SAFE("%p Entering %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
 
 		if( !this->owner ) {
@@ -61,25 +71,12 @@
 
 	// leave pseudo code :
-	// 	decrement level
-	// 	leve == 0 ?
-	// 		no : done
-	// 		yes :
-	// 			signal stack empty ?
-	//				has leader :
-	//					bulk acquiring means we don't own the signal stack
-	//					ignore it but don't release the monitor
-	// 				yes :
-	// 					next in entry queue is new owner
-	// 				no :
-	// 					top of the signal stack is the owner
-	//					context switch to him right away
-	//
-	void __leave_monitor_desc(monitor_desc * this, monitor_desc * leader) {
+	//	TODO
+	void __leave_monitor_desc(monitor_desc * this) {
 		lock( &this->lock );
 
-		LIB_DEBUG_PRINT_SAFE("Leaving %p (o: %p, r: %i)\n", this, this->owner, this->recursion);
-
 		thread_desc * thrd = this_thread();
-		assertf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", this->owner, thrd, this->recursion );
+
+		LIB_DEBUG_PRINT_SAFE("%p Leaving %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
+		assertf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", thrd, this->owner, this->recursion );
 
 		//Leaving a recursion level, decrement the counter
@@ -89,38 +86,9 @@
 		//it means we don't need to do anything
 		if( this->recursion != 0) {
-			// this->stack_owner = leader;
 			unlock( &this->lock );
 			return;
 		}
-			
-		// //If we don't own the signal stack then just leave it to the owner
-		// if( this->stack_owner ) {
-		// 	this->stack_owner = leader;
-		// 	unlock( &this->lock );
-		// 	return;
-		// }
-
-		//We are the stack owner and have left the last recursion level.
-		//We are in charge of passing the monitor
-		thread_desc * new_owner = 0;
-
-		//Check the signaller stack
-		new_owner = pop( &this->signal_stack );
-		if( new_owner ) {
-			//The signaller stack is not empty,
-			//transfer control immediately
-			set_owner( this, new_owner );
-			// this->stack_owner = leader;
-			ScheduleInternal( &this->lock, new_owner );
-			return;
-		}
-		
-		// No signaller thread
-		// Get the next thread in the entry_queue
-		new_owner = pop_head( &this->entry_queue );
-		set_owner( this, new_owner );
-
-		// //Update the stack owner
-		// this->stack_owner = leader;
+
+		thread_desc * new_owner = next_thread( this );
 
 		//We can now let other threads in safely
@@ -133,14 +101,12 @@
 
 static inline void enter(monitor_desc ** monitors, int count) {
-	__enter_monitor_desc( monitors[0], NULL );
-	for(int i = 1; i < count; i++) {
-		__enter_monitor_desc( monitors[i], monitors[0] );
+	for(int i = 0; i < count; i++) {
+		__enter_monitor_desc( monitors[i] );
 	}
 }
 
 static inline void leave(monitor_desc ** monitors, int count) {
-	__leave_monitor_desc( monitors[0], NULL );
-	for(int i = count - 1; i >= 1; i--) {
-		__leave_monitor_desc( monitors[i], monitors[0] );
+	for(int i = count - 1; i >= 0; i--) {
+		__leave_monitor_desc( monitors[i] );
 	}
 }
@@ -169,53 +135,56 @@
 // Internal scheduling
 void wait( condition * this ) {
-	assertf(false, "NO SUPPORTED");
-	// LIB_DEBUG_FPRINTF("Waiting\n");
-	thread_desc * this_thrd = this_thread();
-
-	if( !this->monitors ) {
-		this->monitors = this_thrd->current_monitors;
-		this->monitor_count = this_thrd->current_monitor_count;
-	}
+	LIB_DEBUG_PRINT_SAFE("Waiting\n");
+
+	brand_condition( this );
+
+	//Check that everything is as expected
+	assertf( this->monitors != NULL, "Waiting with no monitors (%p)", this->monitors );
+	assertf( this->monitor_count != 0, "Waiting with 0 monitors (%i)", this->monitor_count );
 
 	unsigned short count = this->monitor_count;
-
-	//Check that everything is as expected
-	assert( this->monitors != NULL );
-	assert( this->monitor_count != 0 );
-
 	unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
 	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
 
-	// LIB_DEBUG_FPRINTF("Getting ready to wait\n");
-
-	//Loop on all the monitors and release the owner
-	for( unsigned int i = 0; i < count; i++ ) {
-		monitor_desc * cur = this->monitors[i];
-
-		assert( cur );
-
-		// LIB_DEBUG_FPRINTF("cur %p lock %p\n", cur, &cur->lock);
-
-		//Store the locks for later
-		locks[i] = &cur->lock;
-
-		//Protect the monitors
-		lock( locks[i] );
-		{		
-			//Save the recursion levels
-			recursions[i] = cur->recursion;
-
-			//Release the owner
-			cur->recursion = 0;
-			cur->owner = NULL;
-		}
-		//Release the monitor
-		unlock( locks[i] );
-	}
-
-	// LIB_DEBUG_FPRINTF("Waiting now\n");
-
-	//Everything is ready to go to sleep
-	ScheduleInternal( locks, count );
+	LIB_DEBUG_PRINT_SAFE("count %i\n", count);
+
+	__condition_node_t waiter;
+	waiter.waiting_thread = this_thread();
+	waiter.count = count;
+	waiter.next = NULL;
+
+	__condition_criterion_t criteria[count];
+	for(int i = 0; i < count; i++) {
+		criteria[i].ready  = false;
+		criteria[i].target = this->monitors[i];
+		criteria[i].owner  = &waiter;
+		criteria[i].next   = NULL;
+		LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
+	}
+
+	waiter.criteria = criteria;
+	append( &this->blocked, &waiter );
+
+	lock_all( this->monitors, locks, count );
+	save_recursion( this->monitors, recursions, count );
+	//DON'T unlock, ask the kernel to do it
+
+	//Find the next thread(s) to run
+	unsigned short thread_count = count;
+	thread_desc * threads[ count ];
+
+	for( int i = 0; i < count; i++) {
+		thread_desc * new_owner = next_thread( this->monitors[i] );
+		thread_count = insert_unique( threads, i, new_owner );
+	}
+
+	LIB_DEBUG_PRINT_SAFE("Will unblock: ");
+	for(int i = 0; i < thread_count; i++) {
+		LIB_DEBUG_PRINT_SAFE("%p ", threads[i]);
+	}
+	LIB_DEBUG_PRINT_SAFE("\n");
+
+	// Everything is ready to go to sleep
+	ScheduleInternal( locks, count, threads, thread_count );
 
 
@@ -224,43 +193,184 @@
 
 	//We are back, restore the owners and recursions
-	for( unsigned int i = 0; i < count; i++ ) {
-		monitor_desc * cur = this->monitors[i];
-
-		//Protect the monitors
-		lock( locks[i] );
-		{
-			//Release the owner
-			cur->owner = this_thrd;
-			cur->recursion = recursions[i];
-		}
-		//Release the monitor
-		unlock( locks[i] );
-	}
-}
-
-static void __signal_internal( condition * this ) {
-	assertf(false, "NO SUPPORTED");
-	if( !this->blocked.head ) return;
+	lock_all( locks, count );
+	restore_recursion( this->monitors, recursions, count );
+	unlock_all( locks, count );
+}
+
+void signal( condition * this ) {
+	if( !this->blocked.head ) {
+		LIB_DEBUG_PRINT_SAFE("Nothing to signal\n");
+		return;
+	}
 
 	//Check that everything is as expected
 	assert( this->monitors );
 	assert( this->monitor_count != 0 );
+
+	unsigned short count = this->monitor_count;
 	
 	LIB_DEBUG_DO(
-		if ( this->monitors != this_thread()->current_monitors ) {
-			abortf( "Signal on condition %p made outside of the correct monitor(s)", this );
+		thread_desc * this_thrd = this_thread();
+		if ( this->monitor_count != this_thrd->current_monitor_count ) {
+			abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", this, this->monitor_count, this_thrd->current_monitor_count );
 		} // if
+
+		for(int i = 0; i < this->monitor_count; i++) {
+			if ( this->monitors[i] != this_thrd->current_monitors[i] ) {
+				abortf( "Signal on condition %p made with different monitor, expected %p got %i", this, this->monitors[i], this_thrd->current_monitors[i] );
+			} // if
+		}
 	);
 
-	monitor_desc * owner = this->monitors[0];
-	lock( &owner->lock );
-	{
-		thread_desc * unblock = pop_head( &this->blocked );
-		push( &owner->signal_stack, unblock );
-	}
-	unlock( &owner->lock );
-}
-
-void signal( condition * this ) {
-	__signal_internal( this );
-}
+	lock_all( this->monitors, NULL, count );
+	LIB_DEBUG_PRINT_SAFE("Signalling");
+
+	__condition_node_t * node = pop_head( &this->blocked );
+	for(int i = 0; i < count; i++) {
+		__condition_criterion_t * crit = &node->criteria[i];
+		LIB_DEBUG_PRINT_SAFE(" %p", crit->target);
+		assert( !crit->ready );
+		push( &crit->target->signal_stack, crit );
+	}
+
+	LIB_DEBUG_PRINT_SAFE("\n");
+
+	unlock_all( this->monitors, count );
+}
+
+//-----------------------------------------------------------------------------
+// Utilities
+
+static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
+	//Pass the monitor appropriately
+	this->owner = owner;
+
+	//We are passing the monitor to someone else, which means recursion level is not 0
+	this->recursion = owner ? 1 : 0;
+}
+
+static inline thread_desc * next_thread( monitor_desc * this ) {
+	//Check the signaller stack
+	__condition_criterion_t * urgent = pop( &this->signal_stack );
+	if( urgent ) {
+		//The signaller stack is not empty,
+		//regardless of if we are ready to baton pass,
+		//we need to set the monitor as in use
+		set_owner( this,  urgent->owner->waiting_thread );
+
+		return check_condition( urgent );
+	}
+
+	// No signaller thread
+	// Get the next thread in the entry_queue
+	thread_desc * new_owner = pop_head( &this->entry_queue );
+	set_owner( this, new_owner );
+
+	return new_owner;
+}
+
+static inline void lock_all( spinlock ** locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		lock( locks[i] );
+	}
+}
+
+static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		spinlock * l = &source[i]->lock;
+		lock( l );
+		if(locks) locks[i] = l;
+	}
+}
+
+static inline void unlock_all( spinlock ** locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		unlock( locks[i] );
+	}
+}
+
+static inline void unlock_all( monitor_desc ** locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		unlock( &locks[i]->lock );
+	}
+}
+
+
+static inline void save_recursion   ( monitor_desc ** ctx, unsigned int * /*out*/ recursions, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		recursions[i] = ctx[i]->recursion;
+	}
+}
+
+static inline void restore_recursion( monitor_desc ** ctx, unsigned int * /*in */ recursions, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		ctx[i]->recursion = recursions[i];
+	}
+}
+
+// Function has 2 different behavior
+// 1 - Marks a monitors as being ready to run
+// 2 - Checks if all the monitors are ready to run
+//     if so return the thread to run
+static inline thread_desc * check_condition( __condition_criterion_t * target ) {
+	__condition_node_t * node = target->owner;
+	unsigned short count = node->count;
+	__condition_criterion_t * criteria = node->criteria;
+
+	bool ready2run = true;
+
+	for(	int i = 0; i < count; i++ ) {
+		LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
+		if( &criteria[i] == target ) {
+			criteria[i].ready = true;
+			LIB_DEBUG_PRINT_SAFE( "True\n" );
+		}
+
+		ready2run = criteria[i].ready && ready2run;
+	}
+
+	LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
+	return ready2run ? node->waiting_thread : NULL;
+}
+
+static inline void brand_condition( condition * this ) {
+	thread_desc * thrd = this_thread();
+	if( !this->monitors ) {
+		LIB_DEBUG_PRINT_SAFE("Branding\n");
+		assertf( thrd->current_monitors != NULL, "No current monitor to brand condition", thrd->current_monitors );
+		this->monitors = thrd->current_monitors;
+		this->monitor_count = thrd->current_monitor_count;
+	}
+}
+
+static inline unsigned short insert_unique( thread_desc ** thrds, unsigned short end, thread_desc * val ) {
+	for(int i = 0; i < end; i++) {
+		if( thrds[i] == val ) return end;
+	}
+
+	thrds[end] = val;
+	return end + 1;
+}
+
+void ?{}( __condition_blocked_queue_t * this ) {
+	this->head = NULL;
+	this->tail = &this->head;
+}
+
+void append( __condition_blocked_queue_t * this, __condition_node_t * c ) {
+	assert(this->tail != NULL);
+	*this->tail = c;
+	this->tail = &c->next;
+}
+
+__condition_node_t * pop_head( __condition_blocked_queue_t * this ) {
+	__condition_node_t * head = this->head;
+	if( head ) {
+		this->head = head->next;
+		if( !head->next ) {
+			this->tail = &this->head;
+		}
+		head->next = NULL;
+	}
+	return head;
+}
Index: src/libcfa/interpose.c
===================================================================
--- src/libcfa/interpose.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/interpose.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -26,7 +26,6 @@
 
 #include "libhdr/libdebug.h"
+#include "libhdr/libtools.h"
 #include "startup.h"
-
-void abortf( const char *fmt, ... ) __attribute__ ((__nothrow__, __leaf__, __noreturn__));
 
 void interpose_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_CORE ) ));
@@ -115,27 +114,29 @@
 static char abort_text[ abort_text_size ];
 
-void abortf( const char fmt[], ... ) __attribute__ ((__nothrow__, __leaf__, __noreturn__)) {
-	void * kernel_data = kernel_abort();
-	
-	int len;
-	
-	if( fmt ) {
-		va_list args;
-		va_start( args, fmt );
+extern "C" {
+	void abortf( const char fmt[], ... ) __attribute__ ((__nothrow__, __leaf__, __noreturn__)) {
+		void * kernel_data = kernel_abort();
 
-		len = vsnprintf( abort_text, abort_text_size, fmt, args );
+		int len;
 
-		va_end( args );
+		if( fmt ) {
+			va_list args;
+			va_start( args, fmt );
 
+			len = vsnprintf( abort_text, abort_text_size, fmt, args );
+
+			va_end( args );
+
+			__lib_debug_write( STDERR_FILENO, abort_text, len );
+			__lib_debug_write( STDERR_FILENO, "\n", 1 );
+		}
+
+		len = snprintf( abort_text, abort_text_size, "Cforall Runtime error (UNIX pid:%ld)\n", (long int)getpid() ); // use UNIX pid (versus getPid)
 		__lib_debug_write( STDERR_FILENO, abort_text, len );
-		__lib_debug_write( STDERR_FILENO, "\n", 1 );
-	}
-
-	len = snprintf( abort_text, abort_text_size, "Cforall Runtime error (UNIX pid:%ld)\n", (long int)getpid() ); // use UNIX pid (versus getPid)
-    	__lib_debug_write( STDERR_FILENO, abort_text, len );
 
 
-	kernel_abort_msg( kernel_data, abort_text, abort_text_size );
+		kernel_abort_msg( kernel_data, abort_text, abort_text_size );
 
-	libc_abort();
+		libc_abort();
+	}
 }
Index: src/libcfa/libhdr/libtools.h
===================================================================
--- src/libcfa/libhdr/libtools.h	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/libhdr/libtools.h	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -22,6 +22,11 @@
 // } // libAbort
 
-#define abortf(...) abort();
-
+#ifdef __cforall
+extern "C" {
+#endif
+void abortf( const char fmt[], ... ) __attribute__ ((__nothrow__, __leaf__, __noreturn__));
+#ifdef __cforall
+}
+#endif
 
 #endif //__LIB_TOOLS_H__
Index: src/libcfa/rational
===================================================================
--- src/libcfa/rational	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/rational	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -12,7 +12,8 @@
 // Created On       : Wed Apr  6 17:56:25 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed May  4 14:11:45 2016
-// Update Count     : 16
+// Last Modified On : Mon May  1 08:25:06 2017
+// Update Count     : 33
 //
+
 #ifndef RATIONAL_H
 #define RATIONAL_H
@@ -21,6 +22,7 @@
 
 // implementation
+typedef long int RationalImpl;
 struct Rational {
-	long int numerator, denominator;					// invariant: denominator > 0
+	RationalImpl numerator, denominator;					// invariant: denominator > 0
 }; // Rational
 
@@ -31,12 +33,14 @@
 // constructors
 void ?{}( Rational * r );
-void ?{}( Rational * r, long int n );
-void ?{}( Rational * r, long int n, long int d );
+void ?{}( Rational * r, RationalImpl n );
+void ?{}( Rational * r, RationalImpl n, RationalImpl d );
 
-// getter/setter for numerator/denominator
-long int numerator( Rational r );
-long int numerator( Rational r, long int n );
-long int denominator( Rational r );
-long int denominator( Rational r, long int d );
+// getter for numerator/denominator
+RationalImpl numerator( Rational r );
+RationalImpl denominator( Rational r );
+[ RationalImpl, RationalImpl ] ?=?( * [ RationalImpl, RationalImpl ] dest, Rational src );
+// setter for numerator/denominator
+RationalImpl numerator( Rational r, RationalImpl n );
+RationalImpl denominator( Rational r, RationalImpl d );
 
 // comparison
@@ -57,5 +61,5 @@
 // conversion
 double widen( Rational r );
-Rational narrow( double f, long int md );
+Rational narrow( double f, RationalImpl md );
 
 // I/O
Index: src/libcfa/rational.c
===================================================================
--- src/libcfa/rational.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/libcfa/rational.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -10,6 +10,6 @@
 // Created On       : Wed Apr  6 17:54:28 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jul  9 11:18:04 2016
-// Update Count     : 40
+// Last Modified On : Thu Apr 27 17:05:06 2017
+// Update Count     : 51
 // 
 
@@ -30,7 +30,7 @@
 // Calculate greatest common denominator of two numbers, the first of which may be negative. Used to reduce rationals.
 // alternative: https://en.wikipedia.org/wiki/Binary_GCD_algorithm
-static long int gcd( long int a, long int b ) {
+static RationalImpl gcd( RationalImpl a, RationalImpl b ) {
 	for ( ;; ) {										// Euclid's algorithm
-		long int r = a % b;
+		RationalImpl r = a % b;
 	  if ( r == 0 ) break;
 		a = b;
@@ -40,5 +40,5 @@
 } // gcd
 
-static long int simplify( long int *n, long int *d ) {
+static RationalImpl simplify( RationalImpl *n, RationalImpl *d ) {
 	if ( *d == 0 ) {
 		serr | "Invalid rational number construction: denominator cannot be equal to 0." | endl;
@@ -56,10 +56,10 @@
 } // rational
 
-void ?{}( Rational * r, long int n ) {
+void ?{}( Rational * r, RationalImpl n ) {
 	r{ n, 1 };
 } // rational
 
-void ?{}( Rational * r, long int n, long int d ) {
-	long int t = simplify( &n, &d );					// simplify
+void ?{}( Rational * r, RationalImpl n, RationalImpl d ) {
+	RationalImpl t = simplify( &n, &d );				// simplify
 	r->numerator = n / t;
 	r->denominator = d / t;
@@ -67,13 +67,23 @@
 
 
-// getter/setter for numerator/denominator
-
-long int numerator( Rational r ) {
+// getter for numerator/denominator
+
+RationalImpl numerator( Rational r ) {
 	return r.numerator;
 } // numerator
 
-long int numerator( Rational r, long int n ) {
-	long int prev = r.numerator;
-	long int t = gcd( abs( n ), r.denominator );		// simplify
+RationalImpl denominator( Rational r ) {
+	return r.denominator;
+} // denominator
+
+[ RationalImpl, RationalImpl ] ?=?( * [ RationalImpl, RationalImpl ] dest, Rational src ) {
+	return *dest = src.[ numerator, denominator ];
+}
+
+// setter for numerator/denominator
+
+RationalImpl numerator( Rational r, RationalImpl n ) {
+	RationalImpl prev = r.numerator;
+	RationalImpl t = gcd( abs( n ), r.denominator );		// simplify
 	r.numerator = n / t;
 	r.denominator = r.denominator / t;
@@ -81,11 +91,7 @@
 } // numerator
 
-long int denominator( Rational r ) {
-	return r.denominator;
-} // denominator
-
-long int denominator( Rational r, long int d ) {
-	long int prev = r.denominator;
-	long int t = simplify( &r.numerator, &d );			// simplify
+RationalImpl denominator( Rational r, RationalImpl d ) {
+	RationalImpl prev = r.denominator;
+	RationalImpl t = simplify( &r.numerator, &d );			// simplify
 	r.numerator = r.numerator / t;
 	r.denominator = d / t;
@@ -170,5 +176,5 @@
 
 // http://www.ics.uci.edu/~eppstein/numth/frap.c
-Rational narrow( double f, long int md ) {
+Rational narrow( double f, RationalImpl md ) {
 	if ( md <= 1 ) {									// maximum fractional digits too small?
 		return (Rational){ f, 1};						// truncate fraction
@@ -176,10 +182,10 @@
 
 	// continued fraction coefficients
-	long int m00 = 1, m11 = 1, m01 = 0, m10 = 0;
-	long int ai, t;
+	RationalImpl m00 = 1, m11 = 1, m01 = 0, m10 = 0;
+	RationalImpl ai, t;
 
 	// find terms until denom gets too big
 	for ( ;; ) {
-		ai = (long int)f;
+		ai = (RationalImpl)f;
 	  if ( ! (m10 * ai + m11 <= md) ) break;
 		t = m00 * ai + m01;
@@ -202,5 +208,5 @@
 forall( dtype istype | istream( istype ) )
 istype * ?|?( istype *is, Rational *r ) {
-	long int t;
+	RationalImpl t;
 	is | &(r->numerator) | &(r->denominator);
 	t = simplify( &(r->numerator), &(r->denominator) );
Index: src/tests/.expect/32/attributes.txt
===================================================================
--- src/tests/.expect/32/attributes.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/32/attributes.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,3 +1,341 @@
-attributes.c:74 error: cannot redefine typedef: ptrdiff_t
-attributes.c:75 error: cannot redefine typedef: size_t
-make: *** [attributes] Error 1
+__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
+__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
+__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
+__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern int atexit(void (*__func)(void));
+__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(int __status);
+extern int printf(const char *__restrict __format, ...);
+int __la__Fi___1(){
+    int ___retval_la__i_1;
+    L: __attribute__ ((unused)) ((void)1);
+}
+__attribute__ ((unused)) struct __anonymous0 {
+};
+static inline void ___constructor__F_P13s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1);
+static inline void ___constructor__F_P13s__anonymous013s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1, struct __anonymous0 ___src__13s__anonymous0_1);
+static inline void ___destructor__F_P13s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1);
+static inline struct __anonymous0 ___operator_assign__F13s__anonymous0_P13s__anonymous013s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1, struct __anonymous0 ___src__13s__anonymous0_1);
+static inline void ___constructor__F_P13s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1){
+}
+static inline void ___constructor__F_P13s__anonymous013s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1, struct __anonymous0 ___src__13s__anonymous0_1){
+}
+static inline void ___destructor__F_P13s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1){
+}
+static inline struct __anonymous0 ___operator_assign__F13s__anonymous0_P13s__anonymous013s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1, struct __anonymous0 ___src__13s__anonymous0_1){
+    return ((struct __anonymous0 )___src__13s__anonymous0_1);
+}
+__attribute__ ((unused)) struct Agn1;
+__attribute__ ((unused)) struct Agn2 {
+};
+static inline void ___constructor__F_P5sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1);
+static inline void ___constructor__F_P5sAgn25sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1, struct Agn2 ___src__5sAgn2_1);
+static inline void ___destructor__F_P5sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1);
+static inline struct Agn2 ___operator_assign__F5sAgn2_P5sAgn25sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1, struct Agn2 ___src__5sAgn2_1);
+static inline void ___constructor__F_P5sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1){
+}
+static inline void ___constructor__F_P5sAgn25sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1, struct Agn2 ___src__5sAgn2_1){
+}
+static inline void ___destructor__F_P5sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1){
+}
+static inline struct Agn2 ___operator_assign__F5sAgn2_P5sAgn25sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1, struct Agn2 ___src__5sAgn2_1){
+    return ((struct Agn2 )___src__5sAgn2_1);
+}
+enum __attribute__ ((unused)) __anonymous1 {
+    __E1__C13e__anonymous1_1,
+};
+enum __attribute__ ((unused)) Agn3;
+enum __attribute__ ((packed)) Agn3 {
+    __E2__C5eAgn3_1,
+};
+__attribute__ ((unused)) struct __anonymous2;
+__attribute__ ((unused)) struct __anonymous3;
+struct Fdl {
+    __attribute__ ((unused)) int __f1__i_1;
+    __attribute__ ((unused)) int __f2__i_1;
+    __attribute__ ((unused,unused)) int __f3__i_1;
+    __attribute__ ((unused)) int __f4__i_1;
+    __attribute__ ((unused,unused)) int __f5__i_1;
+    __attribute__ ((used,packed)) int __f6__i_1;
+    __attribute__ ((used,unused,unused)) int __f7__i_1;
+    __attribute__ ((used,used,unused)) int __f8__i_1;
+    __attribute__ ((unused)) int __anonymous_object0;
+    __attribute__ ((unused,unused)) int *__f9__Pi_1;
+};
+static inline void ___constructor__F_P4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1);
+static inline void ___constructor__F_P4sFdl4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1, struct Fdl ___src__4sFdl_1);
+static inline void ___destructor__F_P4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1);
+static inline struct Fdl ___operator_assign__F4sFdl_P4sFdl4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1, struct Fdl ___src__4sFdl_1);
+static inline void ___constructor__F_P4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdl4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1, struct Fdl ___src__4sFdl_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=___src__4sFdl_1.__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=___src__4sFdl_1.__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=___src__4sFdl_1.__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=___src__4sFdl_1.__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))=___src__4sFdl_1.__f5__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))=___src__4sFdl_1.__f6__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))=___src__4sFdl_1.__f7__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))=___src__4sFdl_1.__f8__i_1) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))=___src__4sFdl_1.__f9__Pi_1) /* ?{} */);
+}
+static inline void ___destructor__F_P4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1){
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))) /* ^?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))) /* ^?{} */);
+}
+static inline struct Fdl ___operator_assign__F4sFdl_P4sFdl4sFdl_autogen___1(struct Fdl *___dst__P4sFdl_1, struct Fdl ___src__4sFdl_1){
+    ((void)((*___dst__P4sFdl_1).__f1__i_1=___src__4sFdl_1.__f1__i_1));
+    ((void)((*___dst__P4sFdl_1).__f2__i_1=___src__4sFdl_1.__f2__i_1));
+    ((void)((*___dst__P4sFdl_1).__f3__i_1=___src__4sFdl_1.__f3__i_1));
+    ((void)((*___dst__P4sFdl_1).__f4__i_1=___src__4sFdl_1.__f4__i_1));
+    ((void)((*___dst__P4sFdl_1).__f5__i_1=___src__4sFdl_1.__f5__i_1));
+    ((void)((*___dst__P4sFdl_1).__f6__i_1=___src__4sFdl_1.__f6__i_1));
+    ((void)((*___dst__P4sFdl_1).__f7__i_1=___src__4sFdl_1.__f7__i_1));
+    ((void)((*___dst__P4sFdl_1).__f8__i_1=___src__4sFdl_1.__f8__i_1));
+    ((void)((*___dst__P4sFdl_1).__f9__Pi_1=___src__4sFdl_1.__f9__Pi_1));
+    return ((struct Fdl )___src__4sFdl_1);
+}
+static inline void ___constructor__F_P4sFdli_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdlii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliiii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1, int __f4__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliiiii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1, int __f4__i_1, int __f5__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))=__f5__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliiiiii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1, int __f4__i_1, int __f5__i_1, int __f6__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))=__f5__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))=__f6__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliiiiiii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1, int __f4__i_1, int __f5__i_1, int __f6__i_1, int __f7__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))=__f5__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))=__f6__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))=__f7__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliiiiiiii_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1, int __f4__i_1, int __f5__i_1, int __f6__i_1, int __f7__i_1, int __f8__i_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))=__f5__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))=__f6__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))=__f7__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))=__f8__i_1) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))) /* ?{} */);
+}
+static inline void ___constructor__F_P4sFdliiiiiiiiPi_autogen___1(struct Fdl *___dst__P4sFdl_1, int __f1__i_1, int __f2__i_1, int __f3__i_1, int __f4__i_1, int __f5__i_1, int __f6__i_1, int __f7__i_1, int __f8__i_1, int *__f9__Pi_1){
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f1__i_1)))=__f1__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f2__i_1)))=__f2__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f3__i_1)))=__f3__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f4__i_1)))=__f4__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f5__i_1)))=__f5__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f6__i_1)))=__f6__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f7__i_1)))=__f7__i_1) /* ?{} */);
+    ((void)((*((int *)(&(*___dst__P4sFdl_1).__f8__i_1)))=__f8__i_1) /* ?{} */);
+    ((void)((*((int **)(&(*___dst__P4sFdl_1).__f9__Pi_1)))=__f9__Pi_1) /* ?{} */);
+}
+__attribute__ ((unused)) int __f__Fi___1() asm ( "xyz" );
+__attribute__ ((used,used)) const int __vd1__Ci_1;
+__attribute__ ((used,unused)) const int __vd2__Ci_1;
+__attribute__ ((used,used,used,used)) const int *__vd3__PCi_1;
+__attribute__ ((used,used,unused,used,unused)) const int *__vd4__PCi_1;
+__attribute__ ((used,used,used)) const int __vd5__A0Ci_1[((unsigned int )5)];
+__attribute__ ((used,used,unused,used)) const int __vd6__A0Ci_1[((unsigned int )5)];
+__attribute__ ((used,used,used,used)) const int (*__vd7__PFCi___1)();
+__attribute__ ((used,used,unused,used,used)) const int (*__vd8__PFCi___1)();
+__attribute__ ((unused,used)) int __f1__Fi___1();
+__attribute__ ((unused)) int __f1__Fi___1(){
+    int ___retval_f1__i_1;
+}
+__attribute__ ((unused,unused,unused,used)) int **const __f2__FCPPi___1();
+__attribute__ ((unused,unused,unused)) int **const __f2__FCPPi___1(){
+    int **const ___retval_f2__CPPi_1;
+}
+__attribute__ ((unused,used,unused)) int (*__f3__FPA0i_i__1(int __anonymous_object1))[];
+__attribute__ ((unused,unused)) int (*__f3__FPA0i_i__1(int __p__i_1))[]{
+    int (*___retval_f3__PA0i_1)[];
+}
+__attribute__ ((unused,used,unused)) int (*__f4__FPFi_i____1())(int __anonymous_object2);
+__attribute__ ((unused,unused)) int (*__f4__FPFi_i____1())(int __anonymous_object3){
+    int (*___retval_f4__PFi_i__1)(int __anonymous_object4);
+}
+int __vtr__Fi___1(){
+    int ___retval_vtr__i_1;
+    __attribute__ ((unused,unused,used)) int __t1__i_2;
+    __attribute__ ((unused,unused,unused,unused,unused)) int **__t2__PPi_2;
+    __attribute__ ((unused,unused,unused)) int __t3__A0i_2[((unsigned int )5)];
+    __attribute__ ((unused,unused,unused,unused,unused)) int **__t4__A0PPi_2[((unsigned int )5)];
+    __attribute__ ((unused,unused,unused)) int __t5__Fi___2();
+    __attribute__ ((unused,unused,unused,unused)) int *__t6__FPi___2();
+}
+int __ipd1__Fi_ii__1(__attribute__ ((unused,unused,unused)) int __p__i_1, __attribute__ ((unused,unused,unused)) int __q__i_1);
+int __ipd1__Fi_ii__1(__attribute__ ((unused,unused,unused)) int __p__i_1, __attribute__ ((unused,unused,unused)) int __q__i_1){
+    int ___retval_ipd1__i_1;
+}
+int __ipd2__Fi_PiPi__1(__attribute__ ((unused,unused,unused,unused)) int *__p__Pi_1, __attribute__ ((unused,unused,unused)) int *__q__Pi_1);
+int __ipd2__Fi_PiPi__1(__attribute__ ((unused,unused,unused,unused)) int *__p__Pi_1, __attribute__ ((unused,unused,unused)) int *__q__Pi_1){
+    int ___retval_ipd2__i_1;
+}
+int __ipd3__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__p__Pi_1, __attribute__ ((unused,unused,unused)) int *__q__Pi_1);
+int __ipd3__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__p__Pi_1, __attribute__ ((unused,unused,unused)) int *__q__Pi_1){
+    int ___retval_ipd3__i_1;
+}
+int __ipd4__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__p__PFi___1)(), __attribute__ ((unused,unused,unused)) int (*__q__PFi___1)());
+int __ipd4__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__p__PFi___1)(), __attribute__ ((unused,unused,unused)) int (*__q__PFi___1)()){
+    int ___retval_ipd4__i_1;
+}
+int __tpr1__Fi_i__1(__attribute__ ((unused,unused,unused)) int __Foo__i_1);
+int __tpr2__Fi_PPi__1(__attribute__ ((unused,unused,unused,unused,unused,unused)) int **__Foo__PPi_1);
+int __tpr3__Fi_Pi__1(__attribute__ ((unused,unused,unused)) int *__Foo__Pi_1);
+int __tpr4__Fi_PFi_Pi___1(__attribute__ ((unused,unused)) int (*__anonymous_object5)(__attribute__ ((unused,unused)) int __anonymous_object6[((unsigned int )5)]));
+int __tpr5__Fi_PFi____1(__attribute__ ((unused,unused,unused)) int (*__Foo__PFi___1)());
+int __tpr6__Fi_PFi____1(__attribute__ ((unused,unused,unused)) int (*__Foo__PFi___1)());
+int __tpr7__Fi_PFi_PFi_i____1(__attribute__ ((unused,unused)) int (*__anonymous_object7)(__attribute__ ((unused)) int (*__anonymous_object8)(__attribute__ ((unused,unused)) int __anonymous_object9)));
+int __ad__Fi___1(){
+    int ___retval_ad__i_1;
+    __attribute__ ((used,unused)) int __ad1__i_2;
+    __attribute__ ((unused,unused,unused)) int *__ad2__Pi_2;
+    __attribute__ ((unused,unused,unused)) int __ad3__A0i_2[((unsigned int )5)];
+    __attribute__ ((unused,unused,unused,unused,unused)) int (*__ad4__PA0i_2)[((unsigned int )10)];
+    __attribute__ ((unused,unused,unused,unused,used)) int __ad5__i_2;
+    __attribute__ ((unused,unused,unused,unused,unused)) int __ad6__Fi___2();
+    ((void)sizeof(__attribute__ ((unused,unused)) int ));
+    ((void)sizeof(__attribute__ ((unused,unused,unused,unused)) int **));
+    ((void)sizeof(__attribute__ ((unused,unused,unused)) int [5]));
+    ((void)sizeof(__attribute__ ((unused,unused,unused)) int (*)[10]));
+    ((void)sizeof(__attribute__ ((unused,unused,unused)) int ()));
+    __attribute__ ((unused)) struct __anonymous4 {
+        int __i__i_2;
+    };
+    inline void ___constructor__F_P13s__anonymous4_autogen___2(struct __anonymous4 *___dst__P13s__anonymous4_2){
+        ((void)((*((int *)(&(*___dst__P13s__anonymous4_2).__i__i_2)))) /* ?{} */);
+    }
+    inline void ___constructor__F_P13s__anonymous413s__anonymous4_autogen___2(struct __anonymous4 *___dst__P13s__anonymous4_2, struct __anonymous4 ___src__13s__anonymous4_2){
+        ((void)((*((int *)(&(*___dst__P13s__anonymous4_2).__i__i_2)))=___src__13s__anonymous4_2.__i__i_2) /* ?{} */);
+    }
+    inline void ___destructor__F_P13s__anonymous4_autogen___2(struct __anonymous4 *___dst__P13s__anonymous4_2){
+        ((void)((*((int *)(&(*___dst__P13s__anonymous4_2).__i__i_2)))) /* ^?{} */);
+    }
+    inline struct __anonymous4 ___operator_assign__F13s__anonymous4_P13s__anonymous413s__anonymous4_autogen___2(struct __anonymous4 *___dst__P13s__anonymous4_2, struct __anonymous4 ___src__13s__anonymous4_2){
+        ((void)((*___dst__P13s__anonymous4_2).__i__i_2=___src__13s__anonymous4_2.__i__i_2));
+        return ((struct __anonymous4 )___src__13s__anonymous4_2);
+    }
+    inline void ___constructor__F_P13s__anonymous4i_autogen___2(struct __anonymous4 *___dst__P13s__anonymous4_2, int __i__i_2){
+        ((void)((*((int *)(&(*___dst__P13s__anonymous4_2).__i__i_2)))=__i__i_2) /* ?{} */);
+    }
+    ((void)sizeof(struct __anonymous4 ));
+    enum __attribute__ ((unused)) __anonymous5 {
+        __R__C13e__anonymous5_2,
+    };
+    inline void ___constructor__F_P13e__anonymous5_intrinsic___2(enum __anonymous5 *___dst__P13e__anonymous5_2){
+    }
+    inline void ___constructor__F_P13e__anonymous513e__anonymous5_intrinsic___2(enum __anonymous5 *___dst__P13e__anonymous5_2, enum __anonymous5 ___src__13e__anonymous5_2){
+        ((void)((*___dst__P13e__anonymous5_2)=___src__13e__anonymous5_2));
+    }
+    inline void ___destructor__F_P13e__anonymous5_intrinsic___2(enum __anonymous5 *___dst__P13e__anonymous5_2){
+    }
+    inline enum __anonymous5 ___operator_assign__F13e__anonymous5_P13e__anonymous513e__anonymous5_intrinsic___2(enum __anonymous5 *___dst__P13e__anonymous5_2, enum __anonymous5 ___src__13e__anonymous5_2){
+        return ((enum __anonymous5 )((*___dst__P13e__anonymous5_2)=___src__13e__anonymous5_2));
+    }
+    ((void)sizeof(enum __anonymous5 ));
+}
+int __apd1__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__anonymous_object10, __attribute__ ((unused,unused,unused)) int *__anonymous_object11);
+int __apd2__Fi_PPiPPi__1(__attribute__ ((unused,unused,unused,unused)) int **__anonymous_object12, __attribute__ ((unused,unused,unused,unused)) int **__anonymous_object13);
+int __apd3__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__anonymous_object14, __attribute__ ((unused,unused,unused)) int *__anonymous_object15);
+int __apd4__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object16)(), __attribute__ ((unused,unused,unused)) int (*__anonymous_object17)());
+int __apd5__Fi_PFi_i_PFi_i___1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object18)(__attribute__ ((unused)) int __anonymous_object19), __attribute__ ((unused,unused,unused)) int (*__anonymous_object20)(__attribute__ ((unused)) int __anonymous_object21));
+int __apd6__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object22)(), __attribute__ ((unused,unused,unused)) int (*__anonymous_object23)());
+int __apd7__Fi_PFi_i_PFi_i___1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object24)(__attribute__ ((unused)) int __anonymous_object25), __attribute__ ((unused,unused,unused)) int (*__anonymous_object26)(__attribute__ ((unused)) int __anonymous_object27));
+struct Vad {
+    __attribute__ ((unused)) int __anonymous_object28;
+    __attribute__ ((unused,unused)) int *__anonymous_object29;
+    __attribute__ ((unused,unused)) int __anonymous_object30[((unsigned int )10)];
+    __attribute__ ((unused,unused)) int (*__anonymous_object31)();
+};
+static inline void ___constructor__F_P4sVad_autogen___1(struct Vad *___dst__P4sVad_1);
+static inline void ___constructor__F_P4sVad4sVad_autogen___1(struct Vad *___dst__P4sVad_1, struct Vad ___src__4sVad_1);
+static inline void ___destructor__F_P4sVad_autogen___1(struct Vad *___dst__P4sVad_1);
+static inline struct Vad ___operator_assign__F4sVad_P4sVad4sVad_autogen___1(struct Vad *___dst__P4sVad_1, struct Vad ___src__4sVad_1);
+static inline void ___constructor__F_P4sVad_autogen___1(struct Vad *___dst__P4sVad_1){
+}
+static inline void ___constructor__F_P4sVad4sVad_autogen___1(struct Vad *___dst__P4sVad_1, struct Vad ___src__4sVad_1){
+}
+static inline void ___destructor__F_P4sVad_autogen___1(struct Vad *___dst__P4sVad_1){
+}
+static inline struct Vad ___operator_assign__F4sVad_P4sVad4sVad_autogen___1(struct Vad *___dst__P4sVad_1, struct Vad ___src__4sVad_1){
+    return ((struct Vad )___src__4sVad_1);
+}
Index: src/tests/.expect/64/attributes.txt
===================================================================
--- src/tests/.expect/64/attributes.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/64/attributes.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -9,5 +9,5 @@
     L: __attribute__ ((unused)) ((void)1);
 }
-struct __attribute__ ((unused)) __anonymous0 {
+__attribute__ ((unused)) struct __anonymous0 {
 };
 static inline void ___constructor__F_P13s__anonymous0_autogen___1(struct __anonymous0 *___dst__P13s__anonymous0_1);
@@ -24,6 +24,6 @@
     return ((struct __anonymous0 )___src__13s__anonymous0_1);
 }
-struct __attribute__ ((unused)) Agn1;
-struct __attribute__ ((unused)) Agn2 {
+__attribute__ ((unused)) struct Agn1;
+__attribute__ ((unused)) struct Agn2 {
 };
 static inline void ___constructor__F_P5sAgn2_autogen___1(struct Agn2 *___dst__P5sAgn2_1);
@@ -47,6 +47,6 @@
     __E2__C5eAgn3_1,
 };
-struct __attribute__ ((unused)) __anonymous2;
-struct __attribute__ ((unused)) __anonymous3;
+__attribute__ ((unused)) struct __anonymous2;
+__attribute__ ((unused)) struct __anonymous3;
 struct Fdl {
     __attribute__ ((unused)) int __f1__i_1;
@@ -234,10 +234,4 @@
     int (*___retval_f4__PFi_i__1)(int __anonymous_object4);
 }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(long unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern int atexit0(void (*__func)(void), void *__anonymous_object5, void *__anonymous_object6);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(int __status);
-__attribute__ ((format(printf, 1, 2))) extern int printf(const char *__restrict __format, ...);
 int __vtr__Fi___1(){
     int ___retval_vtr__i_1;
@@ -268,8 +262,8 @@
 int __tpr2__Fi_PPi__1(__attribute__ ((unused,unused,unused,unused,unused,unused)) int **__Foo__PPi_1);
 int __tpr3__Fi_Pi__1(__attribute__ ((unused,unused,unused)) int *__Foo__Pi_1);
-int __tpr4__Fi_PFi_Pi___1(__attribute__ ((unused,unused)) int (*__anonymous_object7)(__attribute__ ((unused,unused)) int __anonymous_object8[((long unsigned int )5)]));
+int __tpr4__Fi_PFi_Pi___1(__attribute__ ((unused,unused)) int (*__anonymous_object5)(__attribute__ ((unused,unused)) int __anonymous_object6[((long unsigned int )5)]));
 int __tpr5__Fi_PFi____1(__attribute__ ((unused,unused,unused)) int (*__Foo__PFi___1)());
 int __tpr6__Fi_PFi____1(__attribute__ ((unused,unused,unused)) int (*__Foo__PFi___1)());
-int __tpr7__Fi_PFi_PFi_i____1(__attribute__ ((unused,unused)) int (*__anonymous_object9)(__attribute__ ((unused)) int (*__anonymous_object10)(__attribute__ ((unused,unused)) int __anonymous_object11)));
+int __tpr7__Fi_PFi_PFi_i____1(__attribute__ ((unused,unused)) int (*__anonymous_object7)(__attribute__ ((unused)) int (*__anonymous_object8)(__attribute__ ((unused,unused)) int __anonymous_object9)));
 int __ad__Fi___1(){
     int ___retval_ad__i_1;
@@ -285,5 +279,5 @@
     ((void)sizeof(__attribute__ ((unused,unused,unused)) int (*)[10]));
     ((void)sizeof(__attribute__ ((unused,unused,unused)) int ()));
-    struct __attribute__ ((unused)) __anonymous4 {
+    __attribute__ ((unused)) struct __anonymous4 {
         int __i__i_2;
     };
@@ -320,16 +314,16 @@
     ((void)sizeof(enum __anonymous5 ));
 }
-int __apd1__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__anonymous_object12, __attribute__ ((unused,unused,unused)) int *__anonymous_object13);
-int __apd2__Fi_PPiPPi__1(__attribute__ ((unused,unused,unused,unused)) int **__anonymous_object14, __attribute__ ((unused,unused,unused,unused)) int **__anonymous_object15);
-int __apd3__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__anonymous_object16, __attribute__ ((unused,unused,unused)) int *__anonymous_object17);
-int __apd4__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object18)(), __attribute__ ((unused,unused,unused)) int (*__anonymous_object19)());
-int __apd5__Fi_PFi_i_PFi_i___1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object20)(__attribute__ ((unused)) int __anonymous_object21), __attribute__ ((unused,unused,unused)) int (*__anonymous_object22)(__attribute__ ((unused)) int __anonymous_object23));
-int __apd6__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object24)(), __attribute__ ((unused,unused,unused)) int (*__anonymous_object25)());
-int __apd7__Fi_PFi_i_PFi_i___1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object26)(__attribute__ ((unused)) int __anonymous_object27), __attribute__ ((unused,unused,unused)) int (*__anonymous_object28)(__attribute__ ((unused)) int __anonymous_object29));
+int __apd1__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__anonymous_object10, __attribute__ ((unused,unused,unused)) int *__anonymous_object11);
+int __apd2__Fi_PPiPPi__1(__attribute__ ((unused,unused,unused,unused)) int **__anonymous_object12, __attribute__ ((unused,unused,unused,unused)) int **__anonymous_object13);
+int __apd3__Fi_PiPi__1(__attribute__ ((unused,unused,unused)) int *__anonymous_object14, __attribute__ ((unused,unused,unused)) int *__anonymous_object15);
+int __apd4__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object16)(), __attribute__ ((unused,unused,unused)) int (*__anonymous_object17)());
+int __apd5__Fi_PFi_i_PFi_i___1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object18)(__attribute__ ((unused)) int __anonymous_object19), __attribute__ ((unused,unused,unused)) int (*__anonymous_object20)(__attribute__ ((unused)) int __anonymous_object21));
+int __apd6__Fi_PFi__PFi____1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object22)(), __attribute__ ((unused,unused,unused)) int (*__anonymous_object23)());
+int __apd7__Fi_PFi_i_PFi_i___1(__attribute__ ((unused,unused,unused)) int (*__anonymous_object24)(__attribute__ ((unused)) int __anonymous_object25), __attribute__ ((unused,unused,unused)) int (*__anonymous_object26)(__attribute__ ((unused)) int __anonymous_object27));
 struct Vad {
-    __attribute__ ((unused)) int __anonymous_object30;
-    __attribute__ ((unused,unused)) int *__anonymous_object31;
-    __attribute__ ((unused,unused)) int __anonymous_object32[((long unsigned int )10)];
-    __attribute__ ((unused,unused)) int (*__anonymous_object33)();
+    __attribute__ ((unused)) int __anonymous_object28;
+    __attribute__ ((unused,unused)) int *__anonymous_object29;
+    __attribute__ ((unused,unused)) int __anonymous_object30[((long unsigned int )10)];
+    __attribute__ ((unused,unused)) int (*__anonymous_object31)();
 };
 static inline void ___constructor__F_P4sVad_autogen___1(struct Vad *___dst__P4sVad_1);
Index: src/tests/.expect/castError.txt
===================================================================
--- src/tests/.expect/castError.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/castError.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -39,3 +39,2 @@
 
 
-make: *** [castError] Error 1
Index: src/tests/.expect/completeTypeError.txt
===================================================================
--- src/tests/.expect/completeTypeError.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/completeTypeError.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -41,3 +41,2 @@
 
 
-make: *** [completeTypeError] Error 1
Index: src/tests/.expect/concurrent/sched-int-multi.txt
===================================================================
--- src/tests/.expect/concurrent/sched-int-multi.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/.expect/concurrent/sched-int-multi.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,7 @@
+Waiting All
+Entering A
+Entering A & B
+Signal
+Leaving  A & B
+Leaving  A
+Done waiting
Index: src/tests/.expect/concurrent/sched-int-multi2.txt
===================================================================
--- src/tests/.expect/concurrent/sched-int-multi2.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/.expect/concurrent/sched-int-multi2.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,12 @@
+Waiting 1
+Waiting 2
+Waiting 3
+Waiting 4
+Signaling ABC
+Signaling AB
+Signaling BC
+Signaling AC
+Waking 4
+Waking 3
+Waking 2
+Waking 1
Index: src/tests/.expect/concurrent/sched-int.txt
===================================================================
--- src/tests/.expect/concurrent/sched-int.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/.expect/concurrent/sched-int.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,3 @@
+Step 1
+Step 2
+Step 3
Index: src/tests/.expect/constant0-1DP.txt
===================================================================
--- src/tests/.expect/constant0-1DP.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/constant0-1DP.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -31,3 +31,2 @@
 constant0-1.c:50 error: duplicate object definition for x: const pointer to pointer to signed int
 constant0-1.c:50 error: duplicate object definition for 0: pointer to pointer to signed int
-make: *** [constant0-1DP] Error 1
Index: src/tests/.expect/constant0-1NDDP.txt
===================================================================
--- src/tests/.expect/constant0-1NDDP.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/constant0-1NDDP.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -15,3 +15,2 @@
 constant0-1.c:67 error: duplicate object definition for x: const pointer to signed int
 constant0-1.c:67 error: duplicate object definition for 0: const pointer to signed int
-make: *** [constant0-1NDDP] Error 1
Index: src/tests/.expect/declarationErrors.txt
===================================================================
--- src/tests/.expect/declarationErrors.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/declarationErrors.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -67,3 +67,2 @@
 
 
-make: *** [declarationErrors] Error 1
Index: src/tests/.expect/dtor-early-exit-ERR1.txt
===================================================================
--- src/tests/.expect/dtor-early-exit-ERR1.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/dtor-early-exit-ERR1.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,3 +1,1 @@
 dtor-early-exit.c:142 error: jump to label 'L1' crosses initialization of y Branch (Goto)
-
-make: *** [dtor-early-exit-ERR1] Error 1
Index: src/tests/.expect/dtor-early-exit-ERR2.txt
===================================================================
--- src/tests/.expect/dtor-early-exit-ERR2.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/dtor-early-exit-ERR2.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,3 +1,1 @@
 dtor-early-exit.c:142 error: jump to label 'L2' crosses initialization of y Branch (Goto)
-
-make: *** [dtor-early-exit-ERR2] Error 1
Index: src/tests/.expect/memberCtors-ERR1.txt
===================================================================
--- src/tests/.expect/memberCtors-ERR1.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/memberCtors-ERR1.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,2 +1,1 @@
 memberCtors.c:62 error: in void ?{}(B *b), field a2 used before being constructed
-make: *** [memberCtors-ERR1] Error 1
Index: src/tests/.expect/rational.txt
===================================================================
--- src/tests/.expect/rational.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/.expect/rational.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,37 @@
+constructor
+3/1 4/1 0/1
+1/2 5/7
+2/3 -3/2
+-2/3 3/2
+logical
+-2/1 -3/2
+1
+1
+1
+0
+0
+arithmetic
+-2/1 -3/2
+-7/2
+-1/2
+3/1
+4/3
+conversion
+0.75
+0.142857142857143
+3.14159292035398
+3/4
+1/7
+355/113
+decompose
+355/113 0 9
+more tests
+-3/2
+0
+1/2 1 1/2
+2/1 1 2/1
+0/1
+1/2 1 1/2
+2/2147483647
+5/2147483647
+2/3 4/5
Index: src/tests/.expect/scopeErrors.txt
===================================================================
--- src/tests/.expect/scopeErrors.txt	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.expect/scopeErrors.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -7,4 +7,2 @@
   with body 
     CompoundStmt
-
-make: *** [scopeErrors] Error 1
Index: src/tests/.gitignore
===================================================================
--- src/tests/.gitignore	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/.gitignore	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -1,1 +1,2 @@
 .out/
+.err/
Index: src/tests/.in/rational.txt
===================================================================
--- src/tests/.in/rational.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/.in/rational.txt	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,1 @@
+2 3 4 5
Index: src/tests/Makefile.am
===================================================================
--- src/tests/Makefile.am	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/Makefile.am	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -22,12 +22,15 @@
 concurrent=yes
 quick_test+= coroutine thread monitor
+concurrent_test=coroutine thread monitor multi-monitor sched-int sched-int-multi sched-int-multi2 sched-ext sched-ext-multi preempt
 else
 concurrent=no
+concurrent_test=
 endif
-
 
 # applies to both programs
 EXTRA_FLAGS =
-CFLAGS = -g -Wall -Wno-unused-function @CFA_FLAGS@ ${EXTRA_FLAGS}
+BUILD_FLAGS = -g -Wall -Wno-unused-function @CFA_FLAGS@ ${EXTRA_FLAGS}
+TEST_FLAGS = $(if $(test), 2> .err/${@}.log, )
+CFLAGS = ${TEST_FLAGS} ${BUILD_FLAGS}
 CC = @CFA_BINDIR@/@CFA_NAME@
 
@@ -51,6 +54,9 @@
 	@+python test.py --list --concurrent=${concurrent}
 
+concurrency :
+	@+python test.py --debug=${debug} --concurrent=${concurrent} ${concurrent_test}
+
 .dummy : .dummy.c
-	${CC} ${CFLAGS} -XCFA -n ${<} -o ${@}
+	${CC} ${BUILD_FLAGS} -XCFA -n ${<} -o ${@}				#don't use CFLAGS, this rule is not a real test
 
 constant0-1DP : constant0-1.c
Index: src/tests/Makefile.in
===================================================================
--- src/tests/Makefile.in	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/Makefile.in	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -125,5 +125,5 @@
 CFA_NAME = @CFA_NAME@
 CFA_PREFIX = @CFA_PREFIX@
-CFLAGS = -g -Wall -Wno-unused-function @CFA_FLAGS@ ${EXTRA_FLAGS}
+CFLAGS = ${TEST_FLAGS} ${BUILD_FLAGS}
 CPP = @CPP@
 CPPFLAGS = @CPPFLAGS@
@@ -229,7 +229,11 @@
 @BUILD_CONCURRENCY_FALSE@concurrent = no
 @BUILD_CONCURRENCY_TRUE@concurrent = yes
+@BUILD_CONCURRENCY_FALSE@concurrent_test = 
+@BUILD_CONCURRENCY_TRUE@concurrent_test = coroutine thread monitor multi-monitor sched-int sched-int-multi sched-int-multi2 sched-ext sched-ext-multi preempt
 
 # applies to both programs
 EXTRA_FLAGS = 
+BUILD_FLAGS = -g -Wall -Wno-unused-function @CFA_FLAGS@ ${EXTRA_FLAGS}
+TEST_FLAGS = $(if $(test), 2> .err/${@}.log, )
 fstream_test_SOURCES = fstream_test.c
 vector_test_SOURCES = vector/vector_int.c vector/array.c vector/vector_test.c
@@ -669,6 +673,9 @@
 	@+python test.py --list --concurrent=${concurrent}
 
+concurrency :
+	@+python test.py --debug=${debug} --concurrent=${concurrent} ${concurrent_test}
+
 .dummy : .dummy.c
-	${CC} ${CFLAGS} -XCFA -n ${<} -o ${@}
+	${CC} ${BUILD_FLAGS} -XCFA -n ${<} -o ${@}				#don't use CFLAGS, this rule is not a real test
 
 constant0-1DP : constant0-1.c
Index: src/tests/attributes.c
===================================================================
--- src/tests/attributes.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/attributes.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -68,19 +68,4 @@
 __attribute__(( unused )) int (* __attribute__(( unused )) f4())(int) __attribute__(( used ));
 __attribute__(( unused )) int (* __attribute__(( unused )) f4())(int) {}
-
-#ifdef __CFA__
-extern "C" {
-#endif // __CFA__
-typedef long int ptrdiff_t;
-typedef long unsigned int size_t;
-extern void *malloc (size_t __size) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__malloc__)) ;
-extern void free (void *__ptr) __attribute__ ((__nothrow__ , __leaf__));
-extern void abort (void) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__noreturn__));
-extern int atexit0 (void (*__func) (void), void *, void *) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1)));
-extern void exit (int __status) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__noreturn__));
-extern int printf (__const char *__restrict __format, ...) __attribute__ ((format (printf, 1, 2)));
-#ifdef __CFA__
-}
-#endif // __CFA__
 
 
Index: src/tests/pybin/__init__.py
===================================================================
--- src/tests/pybin/__init__.py	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/pybin/__init__.py	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,1 @@
+#This file is empty but needs to exist for python import to work
Index: src/tests/pybin/tools.py
===================================================================
--- src/tests/pybin/tools.py	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/pybin/tools.py	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,96 @@
+import __main__
+import argparse
+import os
+import re
+import stat
+
+from subprocess import Popen, PIPE, STDOUT
+
+# helper functions to run terminal commands
+def sh(cmd, dry_run = False, print2stdout = True):
+	if dry_run : 	# if this is a dry_run, only print the commands that would be ran
+		print("cmd: %s" % cmd)
+		return 0, None
+	else :			# otherwise create a pipe and run the desired command
+		proc = Popen(cmd, stdout=None if print2stdout else PIPE, stderr=STDOUT, shell=True)
+		out, err = proc.communicate()
+		return proc.returncode, out
+
+# Remove 1 or more files silently
+def rm( files, dry_run = False ):
+	try:
+		for file in files:
+			sh("rm -f %s > /dev/null 2>&1" % file, dry_run)
+	except TypeError:
+		sh("rm -f %s > /dev/null 2>&1" % files, dry_run)
+
+def chdir( dest = __main__.__file__ ):
+	abspath = os.path.abspath(dest)
+	dname = os.path.dirname(abspath)
+	os.chdir(dname)
+
+# helper function to replace patterns in a file
+def file_replace(fname, pat, s_after):
+    # first, see if the pattern is even in the file.
+    with open(fname) as f:
+        if not any(re.search(pat, line) for line in f):
+            return # pattern does not occur in file so we are done.
+
+    # pattern is in the file, so perform replace operation.
+    with open(fname) as f:
+        out_fname = fname + ".tmp"
+        out = open(out_fname, "w")
+        for line in f:
+            out.write(re.sub(pat, s_after, line))
+        out.close()
+        os.rename(out_fname, fname)
+
+# helper function to check if a files contains only a spacific string
+def fileContainsOnly(file, text) :
+	with open(file) as f:
+		ff = f.read().strip()
+		result = ff == text.strip()
+
+		return result;
+
+# check whether or not a file is executable
+def fileIsExecutable(file) :
+	try :
+		fileinfo = os.stat(file)
+		return bool(fileinfo.st_mode & stat.S_IXUSR)
+	except Exception as inst:
+		print(type(inst))    # the exception instance
+		print(inst.args)     # arguments stored in .args
+		print(inst)
+		return False
+
+# check if arguments is yes or no
+def yes_no(string):
+	if string == "yes" :
+		return True
+	if string == "no" :
+		return False
+	raise argparse.ArgumentTypeError(msg)
+	return False
+
+# diff two files
+def diff( lhs, rhs, dry_run ):
+	# diff the output of the files
+	diff_cmd = ("diff --ignore-all-space "
+				"--ignore-blank-lines "
+				"--old-group-format='\t\tmissing lines :\n"
+				"%%<' \\\n"
+				"--new-group-format='\t\tnew lines :\n"
+				"%%>' \\\n"
+				"--unchanged-group-format='%%=' \\"
+				"--changed-group-format='\t\texpected :\n"
+				"%%<\n"
+				"\t\tgot :\n"
+				"%%>' \\\n"
+				"--new-line-format='\t\t%%dn\t%%L' \\\n"
+				"--old-line-format='\t\t%%dn\t%%L' \\\n"
+				"--unchanged-line-format='' \\\n"
+				"%s %s")
+
+	# fetch return code and error from the diff command
+	return sh(diff_cmd % (lhs, rhs), dry_run, False)	
Index: src/tests/rational.c
===================================================================
--- src/tests/rational.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/rational.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -10,6 +10,6 @@
 // Created On       : Mon Mar 28 08:43:12 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jul  5 18:29:37 2016
-// Update Count     : 25
+// Last Modified On : Thu Apr 27 17:05:19 2017
+// Update Count     : 40
 // 
 
@@ -36,5 +36,5 @@
 	b = (Rational){ -3, 2 };
 	sout | a | b | endl;
-	sout | a == 1 | endl;
+//	sout | a == 1 | endl; // FIX ME
 	sout | a != b | endl;
 	sout | a <  b | endl;
@@ -61,4 +61,10 @@
 	sout | narrow( 3.14159265358979, 256 ) | endl;
 
+	sout | "decompose" | endl;
+	RationalImpl n, d;
+	[n, d] = a;
+	sout | a | n | d | endl;
+
+	sout | "more tests" | endl;
 	Rational x = { 1, 2 }, y = { 2 };
 	sout | x - y | endl;
Index: src/tests/sched-int-multi.c
===================================================================
--- src/tests/sched-int-multi.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/sched-int-multi.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,53 @@
+#include <fstream>
+#include <kernel>
+#include <monitor>
+#include <thread>
+
+monitor global_t {};
+
+global_t globalB;
+global_t globalA;
+
+condition cond;
+
+thread Signalee {};
+thread Signaler {};
+
+void signalee_action( global_t * mutex a, global_t * mutex b ) {
+	sout | "Waiting All" | endl;
+	wait( &cond );
+	sout | "Done waiting" | endl;
+}
+
+void main( Signalee* this ) {
+	signalee_action( &globalA, &globalB );
+}
+
+void signaler_action_inner( global_t * mutex a, global_t * mutex b ) {
+	sout | "Entering A & B" | endl;
+	sout | "Signal" | endl;
+	signal( &cond );
+	sout | "Leaving  A & B" | endl;
+}
+
+void signaler_action( global_t * mutex a, global_t * b ) {
+	sout | "Entering A" | endl;
+	signaler_action_inner( a, b );
+	sout | "Leaving  A" | endl;
+}
+
+void main( Signaler* this ) {
+	for(int i = 0; i < 10_000; i++) {
+		asm volatile ("" : : : "memory");
+	}
+
+	signaler_action( &globalA, &globalB );
+}
+
+int main(int argc, char* argv[]) {
+	processor p;
+	{
+		Signalee a;
+		Signaler b;
+	}
+}
Index: src/tests/sched-int-multi2.c
===================================================================
--- src/tests/sched-int-multi2.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/sched-int-multi2.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,119 @@
+#include <fstream>
+#include <kernel>
+#include <monitor>
+#include <thread>
+
+monitor global_t {};
+
+global_t globalA;
+global_t globalB;
+global_t globalC;
+
+condition condAB, condAC, condBC, condABC;
+
+thread Signaler {};
+thread WaiterAB {};
+thread WaiterAC {};
+thread WaiterBC {};
+thread WaiterABC{};
+
+int state;
+
+/*
+multi phase
+*/
+
+//----------------------------------------------------------------------------------------------------
+// Tools
+void signal( condition * cond, global_t * mutex a, global_t * mutex b ) {
+	signal( cond );
+}
+
+void signal( condition * cond, global_t * mutex a, global_t * mutex b, global_t * mutex c ) {
+	signal( cond );
+}
+
+void wait( condition * cond, global_t * mutex a, global_t * mutex b ) {
+	state++;
+	sout | "Waiting" | state | endl;
+	wait( cond );
+	sout | "Waking" | state | endl;
+	state--;
+}
+
+void wait( condition * cond, global_t * mutex a, global_t * mutex b, global_t * mutex c ) {
+	state++;
+	sout | "Waiting" | state | endl;
+	wait( cond );
+	sout | "Waking" | state | endl;
+	state--;
+}
+
+//----------------------------------------------------------------------------------------------------
+// Signaler
+// signals respectively AB, AC, BC, ABC
+void signalerABC( global_t * mutex a, global_t * mutex b, global_t * mutex c ) {
+	sout | "Signaling ABC" | endl;
+	signal( &condABC, a, b, c );
+	sout | "Signaling AB" | endl;
+	signal( &condAB , a, b );
+	sout | "Signaling BC" | endl;
+	signal( &condBC , b, c );
+	sout | "Signaling AC" | endl;
+	signal( &condAC , a, c );
+}
+
+void signalerAB( global_t * mutex a, global_t * mutex b, global_t * c ) {
+	signalerABC(a, b, c);
+}
+
+void signalerA( global_t * mutex a, global_t * b, global_t * c ) {
+	signalerAB (a, b, c);
+}
+
+void main( Signaler* this ) {
+	while( state != 4 ) { yield(); }
+	signalerA( &globalA, &globalB, &globalC );
+}
+
+//----------------------------------------------------------------------------------------------------
+// Waiter ABC
+void main( WaiterABC* this ) {
+	while( state != 0 ) { yield(); }
+	wait( &condABC, &globalA, &globalB, &globalC );
+}
+
+//----------------------------------------------------------------------------------------------------
+// Waiter AB
+void main( WaiterAB* this ) {
+	while( state != 1 ) { yield(); }
+	wait( &condAB , &globalA, &globalB );
+}
+
+//----------------------------------------------------------------------------------------------------
+// Waiter AC
+void main( WaiterAC* this ) {
+	while( state != 2 ) { yield(); }
+	wait( &condAC , &globalA, &globalC );
+}
+
+//----------------------------------------------------------------------------------------------------
+// Waiter BC
+void main( WaiterBC* this ) {
+	while( state != 3 ) { yield(); }
+	wait( &condBC , &globalB, &globalC );
+}
+
+//----------------------------------------------------------------------------------------------------
+// Main
+int main(int argc, char* argv[]) {
+	state = 0;
+	processor p;
+	{
+		WaiterABC a;
+		WaiterAB  b;
+		WaiterBC  c;
+		WaiterAC  d;
+		Signaler  e;
+	}
+}
Index: src/tests/sched-int.c
===================================================================
--- src/tests/sched-int.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
+++ src/tests/sched-int.c	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -0,0 +1,60 @@
+#include <fstream>
+#include <kernel>
+#include <monitor>
+#include <thread>
+
+monitor global_t {
+	int value;
+};
+
+global_t global;
+
+condition cond;
+
+thread Signalee {};
+thread Signaler {};
+
+void step1( global_t * mutex this ) {
+	sout | "Step 1" | endl;
+	this->value = 1;
+	wait( &cond );
+}
+
+void step2( global_t * mutex this ) {
+	if( this->value != 1) abort();
+
+	sout | "Step 2" | endl;
+	this->value = 2;
+	signal( &cond );
+}
+
+void step3( global_t * mutex this ) {
+	if( this->value != 2) abort();
+
+	sout | "Step 3" | endl;
+	this->value = 3;
+	signal( &cond );
+}
+
+void main( Signalee* this ) {
+	step1( &global );
+	step3( &global );
+}
+
+void main( Signaler* this ) {
+	for(int i = 0; i < 10_000; i++) {
+		asm volatile ("" : : : "memory");
+	}
+
+	step2( &global );
+}
+
+int main(int argc, char* argv[]) {
+	assert( global.__mon.entry_queue.tail != NULL );
+	processor p;
+	{
+		Signalee a;
+		Signaler b;
+	}
+	if( global.value != 3) abort();
+}
Index: c/tests/sched_internal.c
===================================================================
--- src/tests/sched_internal.c	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ 	(revision )
@@ -1,56 +1,0 @@
-#include <kernel>
-#include <monitor>
-#include <thread>
-
-monitor global_t {
-	int value;
-};
-
-global_t global;
-
-condition cond;
-
-thread Signalee {};
-thread Signaler {};
-
-void step1( global_t * mutex this ) {
-	this->value = 1;
-	wait( &cond );
-}
-
-void step2( global_t * mutex this ) {
-	if( this->value != 1) abort();
-
-	this->value = 2;
-	signal( &cond );
-}
-
-void step3( global_t * mutex this ) {
-	if( this->value != 2) abort();
-
-	this->value = 2;
-	signal( &cond );
-}
-
-void main( Signalee* this ) {
-	step1( &global );
-	step3( &global );
-}
-
-void main( Signaler* this ) {
-	for(int i = 0; i < 10_000; i++) {
-		asm volatile ("" : : : "memory");
-	}
-
-	step2( &global );
-}
-
-int main(int argc, char* argv[]) {
-	assert( global.__mon.entry_queue.tail != NULL );
-	processor p;
-	{
-		Signalee a;
-		Signaler b;
-	}
-	if( global.value != 3) abort();
-}
Index: src/tests/test.py
===================================================================
--- src/tests/test.py	(revision 12d3187fedb1368cdd1434c418e7e1e50e159434)
+++ src/tests/test.py	(revision 20550989ac1a8e2e7bd901d050c04cd0baf675a6)
@@ -6,10 +6,11 @@
 from os import listdir, environ
 from os.path import isfile, join, splitext
-from subprocess import Popen, PIPE, STDOUT
+from pybin.tools import *
 
 import argparse
+import multiprocessing
 import os
 import re
-import stat
+import signal
 import sys
 
@@ -26,8 +27,16 @@
 def getMachineType():
 	sh('echo "void ?{}(int*a,int b){}int main(){return 0;}" > .dummy.c')
-	sh("make .dummy", print2stdout=False)
+	ret, out = sh("make .dummy -s", print2stdout=True)
+	
+	if ret != 0:
+		print("Failed to identify architecture:")
+		print(out)
+		print("Stopping")
+		rm( (".dummy.c",".dummy") )
+		sys.exit(1)
+
 	_, out = sh("file .dummy", print2stdout=False)
-	sh("rm -f .dummy.c > /dev/null 2>&1")
-	sh("rm -f .dummy > /dev/null 2>&1")
+	rm( (".dummy.c",".dummy") )
+
 	return re.search("ELF\s([0-9]+)-bit", out).group(1)
 
@@ -58,70 +67,99 @@
 	return generic_list + typed_list + concurrent_list;
 
-# helper functions to run terminal commands
-def sh(cmd, dry_run = False, print2stdout = True):
-	if dry_run : 	# if this is a dry_run, only print the commands that would be ran
-		print("cmd: %s" % cmd)
-		return 0, None
-	else :			# otherwise create a pipe and run the desired command
-		proc = Popen(cmd, stdout=None if print2stdout else PIPE, stderr=STDOUT, shell=True)
-		out, err = proc.communicate()
-		return proc.returncode, out
-
-# helper function to replace patterns in a file
-def file_replace(fname, pat, s_after):
-    # first, see if the pattern is even in the file.
-    with open(fname) as f:
-        if not any(re.search(pat, line) for line in f):
-            return # pattern does not occur in file so we are done.
-
-    # pattern is in the file, so perform replace operation.
-    with open(fname) as f:
-        out_fname = fname + ".tmp"
-        out = open(out_fname, "w")
-        for line in f:
-            out.write(re.sub(pat, s_after, line))
-        out.close()
-        os.rename(out_fname, fname)
-
-# tests output may differ depending on the depth of the makefile
-def fix_MakeLevel(file) :
-	if environ.get('MAKELEVEL') :
-		file_replace(file, "make\[%i\]" % int(environ.get('MAKELEVEL')), 'make' )
-
-# helper function to check if a files contains only a spacific string
-def fileContainsOnly(file, text) :
-	with open(file) as f:
-		ff = f.read().strip()
-		result = ff == text.strip()
-
-		return result;
-
-# check whether or not a file is executable
-def fileIsExecutable(file) :
-	try :
-		fileinfo = os.stat(file)
-		return bool(fileinfo.st_mode & stat.S_IXUSR)
-	except Exception as inst:
-		print(type(inst))    # the exception instance
-		print(inst.args)     # arguments stored in .args
-		print(inst)
-		return False
+# from the found tests, filter all the valid tests/desired tests
+def validTests( options ):
+	tests = []
+
+	# if we are regenerating the tests we need to find the information of the
+	# already existing tests and create new info for the new tests
+	if options.regenerate_expected :
+		for testname in options.tests :
+			if testname.endswith( (".c", ".cc", ".cpp") ):
+				print('ERROR: "%s", tests are not allowed to end with a C/C++/CFA extension, ignoring it' % testname, file=sys.stderr)
+			else :
+				found = [test for test in allTests if test.name == testname]
+				tests.append( found[0] if len(found) == 1 else Test(testname, testname) )
+
+	else :
+		# otherwise we only need to validate that all tests are present in the complete list
+		for testname in options.tests:
+			test = [t for t in allTests if t.name == testname]
+
+			if len(test) != 0 :
+				tests.append( test[0] )
+			else :
+				print('ERROR: No expected file for test %s, ignoring it' % testname, file=sys.stderr)
+
+	# make sure we have at least some test to run
+	if len(tests) == 0 :
+		print('ERROR: No valid test to run', file=sys.stderr)
+		sys.exit(1)
+
+	return tests
+
+# parses the option
+def getOptions():
+	# create a parser with the arguments for the tests script
+	parser = argparse.ArgumentParser(description='Script which runs cforall tests')
+	parser.add_argument('--debug', help='Run all tests in debug or release', type=yes_no, default='no')
+	parser.add_argument('--concurrent', help='Run concurrent tests', type=yes_no, default='yes')
+	parser.add_argument('--dry-run', help='Don\'t run the tests, only output the commands', action='store_true')
+	parser.add_argument('--list', help='List all test available', action='store_true')
+	parser.add_argument('--all', help='Run all test available', action='store_true')
+	parser.add_argument('--regenerate-expected', help='Regenerate the .expect by running the specified tets, can be used with --all option', action='store_true')
+	parser.add_argument('-j', '--jobs', help='Number of tests to run simultaneously', type=int, default='8')
+	parser.add_argument('--list-comp', help='List all valide arguments', action='store_true')
+	parser.add_argument('tests', metavar='test', type=str, nargs='*', help='a list of tests to run')
+
+	options =  parser.parse_args()
+
+	# script must have at least some tests to run or be listing
+	listing    = options.list or options.list_comp
+	all_tests  = options.all
+	some_tests = len(options.tests) > 0
+
+	# check that exactly one of the booleans is set to true
+	if not sum( (listing, all_tests, some_tests) ) == 1 :
+		print('ERROR: must have option \'--all\', \'--list\' or non-empty test list', file=sys.stderr)
+		parser.print_help()
+		sys.exit(1)
+
+	return options
+
+def jobCount( options ):
+	# check if the user already passed in a number of jobs for multi-threading
+	make_flags = environ.get('MAKEFLAGS')
+	make_jobs_fds = re.search("--jobserver-(auth|fds)=\s*([0-9]+),([0-9]+)", make_flags) if make_flags else None
+	if make_jobs_fds :
+		tokens = os.read(int(make_jobs_fds.group(2)), 1024)
+		options.jobs = len(tokens)
+		os.write(int(make_jobs_fds.group(3)), tokens)
+	else :
+		options.jobs = multiprocessing.cpu_count()
+
+	# make sure we have a valid number of jobs that corresponds to user input
+	if options.jobs <= 0 :
+		print('ERROR: Invalid number of jobs', file=sys.stderr)
+		sys.exit(1)
+
+	return min( options.jobs, len(tests) ), True if make_flags else False
 
 ################################################################################
 #               running test functions
 ################################################################################
+# logic to run a single test and return the result (No handling of printing or other test framework logic)
 def run_single_test(test, generate, dry_run, debug):
 
 	# find the output file based on the test name and options flag
 	out_file = (".out/%s.log" % test.name) if not generate else (".expect/%s.txt" % test.path)
+	err_file = ".err/%s.log" % test.name
 
 	# remove any outputs from the previous tests to prevent side effects
-	sh("rm -f %s" % out_file, dry_run)
-	sh("rm -f %s > /dev/null 2>&1" % test.name, dry_run)
-
-	options = "-debug" if debug else "-nodebug";
+	rm( (out_file, test.name), dry_run )
+
+	options = "-debug" if debug else "-nodebug"
 
 	# build, skipping to next test on error
-	make_ret, _ = sh("""%s EXTRA_FLAGS="-quiet %s" %s 2> %s 1> /dev/null""" % (make_cmd, options, test.name, out_file), dry_run)
+	make_ret, _ = sh("""%s test=yes EXTRA_FLAGS="-quiet %s" %s 2> %s 1> /dev/null""" % (make_cmd, options, test.name, out_file), dry_run)
 
 	# if the make command succeds continue otherwise skip to diff
@@ -137,9 +175,10 @@
 			sh("cat %s > %s" % (test.name, out_file), dry_run)
 
+	else :
+		# command failed save the log to less temporary file
+		sh("mv %s %s" % (err_file, out_file), dry_run)
+
 	retcode = 0
 	error = None
-
-	# fix output to prevent make depth to cause issues
-	fix_MakeLevel(out_file)
 
 	if generate :
@@ -151,22 +190,7 @@
 
 	else :
-		# diff the output of the files
-		diff_cmd = ("diff --old-group-format='\t\tmissing lines :\n"
-					"%%<' \\\n"
-					"--new-group-format='\t\tnew lines :\n"
-					"%%>' \\\n"
-					"--unchanged-group-format='%%=' \\"
-					"--changed-group-format='\t\texpected :\n"
-					"%%<\n"
-					"\t\tgot :\n"
-					"%%>' \\\n"
-					"--new-line-format='\t\t%%dn\t%%L' \\\n"
-					"--old-line-format='\t\t%%dn\t%%L' \\\n"
-					"--unchanged-line-format='' \\\n"
-					".expect/%s.txt .out/%s.log")
-
 		# fetch return code and error from the diff command
-		retcode, error = sh(diff_cmd % (test.path, test.name), dry_run, False)
-
+		retcode, error = diff(".expect/%s.txt" % test.path, ".out/%s.log" % test.name, dry_run)
+	
 	# clean the executable
 	sh("rm -f %s > /dev/null 2>&1" % test.name, dry_run)
@@ -174,35 +198,35 @@
 	return retcode, error
 
-def run_test_instance(t, generate, dry_run, debug) :
-	try :
-		# print formated name
-		name_txt = "%20s  " % t.name
-
-		#run the test instance and collect the result
-		test_failed, error = run_single_test(t, generate, dry_run, debug)
-
-		# update output based on current action
-		if generate :
-			failed_txt = "ERROR"
-			success_txt = "Done"
-		else :
-			failed_txt = "FAILED"
-			success_txt = "PASSED"
-
-		#print result with error if needed
-		text = name_txt + (failed_txt if test_failed else success_txt)
-		out = sys.stdout
-		if error :
-			text = text + "\n" + error
-			out = sys.stderr
-
-		print(text, file = out);
-		sys.stdout.flush()
-		sys.stderr.flush()
-		return test_failed
-
-	except KeyboardInterrupt:
-		test_failed = True
-
+# run a single test and handle the errors, outputs, printing, exception handling, etc.
+def run_test_worker(t, generate, dry_run, debug) :
+
+	signal.signal(signal.SIGINT, signal.SIG_DFL)
+	# print formated name
+	name_txt = "%20s  " % t.name
+
+	#run the test instance and collect the result
+	test_failed, error = run_single_test(t, generate, dry_run, debug)
+
+	# update output based on current action
+	if generate :
+		failed_txt = "ERROR"
+		success_txt = "Done"
+	else :
+		failed_txt = "FAILED"
+		success_txt = "PASSED"
+
+	#print result with error if needed
+	text = name_txt + (failed_txt if test_failed else success_txt)
+	out = sys.stdout
+	if error :
+		text = text + "\n" + error
+		out = sys.stderr
+
+	print(text, file = out);
+	sys.stdout.flush()
+	sys.stderr.flush()
+	signal.signal(signal.SIGINT, signal.SIG_IGN)
+
+	return test_failed
 
 # run the given list of tests with the given parameters
@@ -211,14 +235,18 @@
 	sh("%s clean > /dev/null 2>&1" % make_cmd, dry_run)
 
-	#make sure the required folder are present
-	sh('mkdir -p .out .expect', dry_run)
+	# make sure the required folder are present
+	sh('mkdir -p .out .expect .err', dry_run)
 
 	if generate :
 		print( "Regenerate tests for: " )
 
+	# create the executor for our jobs and handle the signal properly
+	original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
+	pool = Pool(jobs)
+	signal.signal(signal.SIGINT, original_sigint_handler)
+
 	# for each test to run
-	pool = Pool(jobs)
 	try :
-		results = pool.map_async(partial(run_test_instance, generate=generate, dry_run=dry_run, debug=debug), tests ).get(9999)
+		results = pool.map_async(partial(run_test_worker, generate=generate, dry_run=dry_run, debug=debug), tests ).get(3600)
 	except KeyboardInterrupt:
 		pool.terminate()
@@ -226,5 +254,5 @@
 		sys.exit(1)
 
-	#clean the workspace
+	# clean the workspace
 	sh("%s clean > /dev/null 2>&1" % make_cmd, dry_run)
 
@@ -235,97 +263,43 @@
 	return 0
 
-def yes_no(string):
-	if string == "yes" :
-		return True
-	if string == "no" :
-		return False
-	raise argparse.ArgumentTypeError(msg)
-	return False
-
 
 ################################################################################
 #               main loop
 ################################################################################
-# create a parser with the arguments for the tests script
-parser = argparse.ArgumentParser(description='Script which runs cforall tests')
-parser.add_argument('--debug', help='Run all tests in debug or release', type=yes_no, default='no')
-parser.add_argument('--concurrent', help='Run concurrent tests', type=yes_no, default='yes')
-parser.add_argument('--dry-run', help='Don\'t run the tests, only output the commands', action='store_true')
-parser.add_argument('--list', help='List all test available', action='store_true')
-parser.add_argument('--all', help='Run all test available', action='store_true')
-parser.add_argument('--regenerate-expected', help='Regenerate the .expect by running the specified tets, can be used with --all option', action='store_true')
-parser.add_argument('-j', '--jobs', help='Number of tests to run simultaneously', type=int, default='8')
-parser.add_argument('tests', metavar='test', type=str, nargs='*', help='a list of tests to run')
-
-# parse the command line arguments
-options = parser.parse_args()
-
-# script must have at least some tests to run
-if (len(options.tests) > 0  and     options.all and not options.list) \
-or (len(options.tests) == 0 and not options.all and not options.list) :
-	print('ERROR: must have option \'--all\' or non-empty test list', file=sys.stderr)
-	parser.print_help()
-	sys.exit(1)
-
-# fetch the liest of all valid tests
-allTests = listTests( options.concurrent )
-
-# if user wants all tests than no other treatement of the test list is required
-if options.all or options.list :
-	tests = allTests
-
-else :
-	#otherwise we need to validate that the test list that was entered is valid
-	tests = []
-
-	# if we are regenerating the tests we need to find the information of the
-	# already existing tests and create new info for the new tests
-	if options.regenerate_expected :
-		for testname in options.tests :
-			if testname.endswith(".c") or testname.endswith(".cc") or testname.endswith(".cpp") :
-				print('ERROR: "%s", tests are not allowed to end with a C/C++/CFA extension, ignoring it' % testname, file=sys.stderr)
-			else :
-				found = [test for test in allTests if test.name == testname]
-				tests.append( found[0] if len(found) == 1 else Test(testname, testname) )
-
-	else :
-		# otherwise we only need to validate that all tests are present in the complete list
-		for testname in options.tests:
-			test = [t for t in allTests if t.name == testname]
-
-			if len(test) != 0 :
-				tests.append( test[0] )
-			else :
-				print('ERROR: No expected file for test %s, ignoring it' % testname, file=sys.stderr)
-
-	# make sure we have at least some test to run
-	if len(tests) == 0 :
-		print('ERROR: No valid test to run', file=sys.stderr)
-		sys.exit(1)
-
-# sort the test alphabetically for convenience
-tests.sort(key=lambda t: t.name)
-
-# check if the user already passed in a number of jobs for multi-threading
-make_flags = environ.get('MAKEFLAGS')
-make_jobs_fds = re.search("--jobserver-fds=\s*([0-9]+),([0-9]+)", make_flags) if make_flags else None
-if make_jobs_fds :
-	tokens = os.read(int(make_jobs_fds.group(1)), 1024)
-	options.jobs = len(tokens)
-	os.write(int(make_jobs_fds.group(2)), tokens)
-
-# make sure we have a valid number of jobs that corresponds to user input
-if options.jobs <= 0 :
-	print('ERROR: Invalid number of jobs', file=sys.stderr)
-	sys.exit(1)
-
-print('Running (%s) on %i cores' % ("debug" if options.debug else "no debug", options.jobs))
-make_cmd = "make" if make_flags else ("make -j%i" % options.jobs)
-
-# users may want to simply list the tests
-if options.list :
-	print("\n".join(map(lambda t: "%s (%s)" % (t.name, t.path), tests)))
-
-else :
-	# otherwise run all tests and make sure to return the correct error code
-	sys.exit( run_tests(tests, options.regenerate_expected, options.dry_run, options.jobs, options.debug) )
+if __name__ == "__main__":
+	#always run from same folder
+	chdir() 
+	
+	# parse the command line arguments
+	options = getOptions()
+
+	# fetch the liest of all valid tests
+	allTests = listTests( options.concurrent )
+
+	# if user wants all tests than no other treatement of the test list is required
+	if options.all or options.list or options.list_comp :
+		tests = allTests
+
+	else :
+		#otherwise we need to validate that the test list that was entered is valid
+		tests = validTests( options )
+
+	# sort the test alphabetically for convenience
+	tests.sort(key=lambda t: t.name)
+
+	# users may want to simply list the tests
+	if options.list_comp :
+		print("-h --help --debug --concurrent --dry-run --list --all --regenerate-expected -j --jobs ", end='')
+		print(" ".join(map(lambda t: "%s" % (t.name), tests)))
+
+	elif options.list :
+		print("\n".join(map(lambda t: "%s (%s)" % (t.name, t.path), tests)))
+
+	else :
+		options.jobs, forceJobs = jobCount( options )
+
+		print('Running (%s) on %i cores' % ("debug" if options.debug else "no debug", options.jobs))
+		make_cmd = "make" if forceJobs else ("make -j%i" % options.jobs)
+
+		# otherwise run all tests and make sure to return the correct error code
+		sys.exit( run_tests(tests, options.regenerate_expected, options.dry_run, options.jobs, options.debug) )
