Index: src/InitTweak/FixInit.cc
===================================================================
--- src/InitTweak/FixInit.cc	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/InitTweak/FixInit.cc	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -385,5 +385,5 @@
 		void SelfAssignChecker::previsit( ApplicationExpr * appExpr ) {
 			DeclarationWithType * function = getFunction( appExpr );
-			if ( isAssignment( function ) ) {
+			if ( function->name == "?=?" ) { // doesn't use isAssignment, because ?+=?, etc. should not count as self-assignment
 				if ( appExpr->args.size() == 2 ) {
 					// check for structural similarity (same variable use, ignore casts, etc. - but does not look too deeply, anything looking like a function is off limits)
Index: src/ResolvExpr/Resolver.cc
===================================================================
--- src/ResolvExpr/Resolver.cc	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/ResolvExpr/Resolver.cc	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -492,4 +492,8 @@
 			}
 
+			if(clause.target.arguments.empty()) {
+				SemanticError( stmt->location, "Waitfor clause must have at least one mutex parameter");
+			}
+
 			// Find all alternatives for all arguments in canonical form
 			std::vector< AlternativeFinder > argAlternatives;
@@ -556,4 +560,6 @@
 							auto param_end = function->parameters.end();
 
+							int n_mutex_arg = 0;
+
 							// For every arguments of its set, check if it matches one of the parameter
 							// The order is important
@@ -564,15 +570,17 @@
 									// We ran out of parameters but still have arguments
 									// this function doesn't match
-									SemanticError( function, "candidate function not viable: too many mutex arguments\n" );
+									SemanticError( function, toString("candidate function not viable: too many mutex arguments, expected ", n_mutex_arg, "\n" ));
 								}
 
+								n_mutex_arg++;
+
 								// Check if the argument matches the parameter type in the current scope
-								if( ! unify( (*param)->get_type(), arg.expr->get_result(), resultEnv, resultNeed, resultHave, openVars, this->indexer ) ) {
+								if( ! unify( arg.expr->get_result(), (*param)->get_type(), resultEnv, resultNeed, resultHave, openVars, this->indexer ) ) {
 									// Type doesn't match
 									stringstream ss;
 									ss << "candidate function not viable: no known convertion from '";
+									(*param)->get_type()->print( ss );
+									ss << "' to '";
 									arg.expr->get_result()->print( ss );
-									ss << "' to '";
-									(*param)->get_type()->print( ss );
 									ss << "'\n";
 									SemanticError( function, ss.str() );
@@ -588,5 +596,5 @@
 								// We ran out of arguments but still have parameters left
 								// this function doesn't match
-								SemanticError( function, "candidate function not viable: too few mutex arguments\n" );
+								SemanticError( function, toString("candidate function not viable: too few mutex arguments, expected ", n_mutex_arg, "\n" ));
 							}
 
Index: src/SynTree/Statement.cc
===================================================================
--- src/SynTree/Statement.cc	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/SynTree/Statement.cc	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -453,6 +453,34 @@
 void WaitForStmt::print( std::ostream &os, Indenter indent ) const {
 	os << "Waitfor Statement" << endl;
-	os << indent << "... with block:" << endl << indent+1;
-	// block->print( os, indent + 4 );
+	indent += 1;
+	for( auto & clause : clauses ) {
+		os << indent << "target function :";
+		if(clause.target.function) { clause.target.function->print(os, indent + 1); }
+		os << endl << indent << "with arguments :" << endl;
+		for( auto & thing : clause.target.arguments) {
+			if(thing) { thing->print(os, indent + 1); }
+		}
+		os << indent << " with statment :" << endl;
+		if(clause.statement) { clause.statement->print(os, indent + 1); }
+
+		os << indent << " with condition :" << endl;
+		if(clause.condition) { clause.condition->print(os, indent + 1); }
+	}
+
+	os << indent << " timeout of :" << endl;
+	if(timeout.time) { timeout.time->print(os, indent + 1); }
+
+	os << indent << " with statment :" << endl;
+	if(timeout.statement) { timeout.statement->print(os, indent + 1); }
+
+	os << indent << " with condition :" << endl;
+	if(timeout.condition) { timeout.condition->print(os, indent + 1); }
+
+
+	os << indent << " else :" << endl;
+	if(orelse.statement) { orelse.statement->print(os, indent + 1); }
+
+	os << indent << " with condition :" << endl;
+	if(orelse.condition) { orelse.condition->print(os, indent + 1); }
 }
 
Index: src/libcfa/bits/locks.h
===================================================================
--- src/libcfa/bits/locks.h	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/bits/locks.h	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -69,4 +69,11 @@
 	}
 
+
+	#ifdef __CFA_DEBUG__
+		void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name);
+	#else
+		#define __cfaabi_dbg_record(x, y)
+	#endif
+
 	// Lock the spinlock, return false if already acquired
 	static inline _Bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
@@ -74,8 +81,5 @@
 		if( result ) {
 			disable_interrupts();
-			// __cfaabi_dbg_debug_do(
-			// 	this.prev_name = caller;
-			// 	this.prev_thrd = TL_GET( this_thread );
-			// )
+			__cfaabi_dbg_record( this, caller );
 		}
 		return result;
@@ -105,8 +109,5 @@
 		}
 		disable_interrupts();
-		// __cfaabi_dbg_debug_do(
-		// 	this.prev_name = caller;
-		// 	this.prev_thrd = TL_GET( this_thread );
-		// )
+		__cfaabi_dbg_record( this, caller );
 	}
 
Index: src/libcfa/concurrency/coroutine
===================================================================
--- src/libcfa/concurrency/coroutine	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/coroutine	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -32,9 +32,15 @@
 //-----------------------------------------------------------------------------
 // Ctors and dtors
-void ?{}(coStack_t & this);
-void ?{}(coroutine_desc & this);
-void ?{}(coroutine_desc & this, const char * name);
-void ^?{}(coStack_t & this);
-void ^?{}(coroutine_desc & this);
+// void ?{}( coStack_t & this );
+// void ^?{}( coStack_t & this );
+
+void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize );
+void ^?{}( coroutine_desc & this );
+
+static inline void ?{}( coroutine_desc & this)                                       { this{ "Anonymous Coroutine", NULL, 0 }; }
+static inline void ?{}( coroutine_desc & this, size_t stackSize)                     { this{ "Anonymous Coroutine", NULL, stackSize }; }
+static inline void ?{}( coroutine_desc & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
+static inline void ?{}( coroutine_desc & this, const char * name)                    { this{ name, NULL, 0 }; }
+static inline void ?{}( coroutine_desc & this, const char * name, size_t stackSize ) { this{ name, NULL, stackSize }; }
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/coroutine.c
===================================================================
--- src/libcfa/concurrency/coroutine.c	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/coroutine.c	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -39,47 +39,32 @@
 //-----------------------------------------------------------------------------
 // Coroutine ctors and dtors
-void ?{}(coStack_t& this) with( this ) {
-	size		= 65000;	// size of stack
-	storage	= NULL;	// pointer to stack
-	limit		= NULL;	// stack grows towards stack limit
-	base		= NULL;	// base of stack
-	context	= NULL;	// address of cfa_context_t
-	top		= NULL;	// address of top of storage
-	userStack	= false;
-}
-
-void ?{}(coStack_t& this, size_t size) {
-	this{};
-	this.size = size;
-
-	create_stack(&this, this.size);
-}
-
-void ?{}(coroutine_desc& this) {
-	this{ "Anonymous Coroutine" };
-}
-
-void ?{}(coroutine_desc& this, const char * name) with( this ) {
-	this.name = name;
-	errno_ = 0;
-	state = Start;
-	starter = NULL;
-	last = NULL;
-}
-
-void ?{}(coroutine_desc& this, size_t size) {
-	this{};
-	(this.stack){size};
+void ?{}( coStack_t & this, void * storage, size_t storageSize ) with( this ) {
+      size		 = storageSize == 0 ? 65000 : storageSize; // size of stack
+      this.storage = storage;                                // pointer to stack
+      limit		 = NULL;                                   // stack grows towards stack limit
+      base		 = NULL;                                   // base of stack
+      context	 = NULL;                                   // address of cfa_context_t
+      top		 = NULL;                                   // address of top of storage
+      userStack	 = storage != NULL;
 }
 
 void ^?{}(coStack_t & this) {
-	if ( ! this.userStack && this.storage ) {
-		__cfaabi_dbg_debug_do(
-			if ( mprotect( this.storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
-				abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
-			}
-		);
-		free( this.storage );
-	}
+      if ( ! this.userStack && this.storage ) {
+            __cfaabi_dbg_debug_do(
+                  if ( mprotect( this.storage, pageSize, PROT_READ | PROT_WRITE ) == -1 ) {
+                        abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
+                  }
+            );
+            free( this.storage );
+      }
+}
+
+void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize ) with( this ) {
+      (this.stack){storage, storageSize};
+      this.name = name;
+      errno_ = 0;
+      state = Start;
+      starter = NULL;
+      last = NULL;
 }
 
@@ -90,76 +75,80 @@
 forall(dtype T | is_coroutine(T))
 void prime(T& cor) {
-	coroutine_desc* this = get_coroutine(cor);
-	assert(this->state == Start);
+      coroutine_desc* this = get_coroutine(cor);
+      assert(this->state == Start);
 
-	this->state = Primed;
-	resume(cor);
+      this->state = Primed;
+      resume(cor);
 }
 
 // Wrapper for co
 void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
-	verify( TL_GET( preemption_state ).enabled || TL_GET( this_processor )->do_terminate );
-	disable_interrupts();
+      verify( TL_GET( preemption_state ).enabled || TL_GET( this_processor )->do_terminate );
+      disable_interrupts();
 
-	// set state of current coroutine to inactive
-	src->state = src->state == Halted ? Halted : Inactive;
+      // set state of current coroutine to inactive
+      src->state = src->state == Halted ? Halted : Inactive;
 
-	// set new coroutine that task is executing
-	TL_SET( this_coroutine, dst );
+      // set new coroutine that task is executing
+      TL_SET( this_coroutine, dst );
 
-	// context switch to specified coroutine
-	assert( src->stack.context );
-	CtxSwitch( src->stack.context, dst->stack.context );
-	// when CtxSwitch returns we are back in the src coroutine
+      // context switch to specified coroutine
+      assert( src->stack.context );
+      CtxSwitch( src->stack.context, dst->stack.context );
+      // when CtxSwitch returns we are back in the src coroutine
 
-	// set state of new coroutine to active
-	src->state = Active;
+      // set state of new coroutine to active
+      src->state = Active;
 
-	enable_interrupts( __cfaabi_dbg_ctx );
-	verify( TL_GET( preemption_state ).enabled || TL_GET( this_processor )->do_terminate );
+      enable_interrupts( __cfaabi_dbg_ctx );
+      verify( TL_GET( preemption_state ).enabled || TL_GET( this_processor )->do_terminate );
 } //ctxSwitchDirect
 
 void create_stack( coStack_t* this, unsigned int storageSize ) with( *this ) {
-	//TEMP HACK do this on proper kernel startup
-	if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
+      //TEMP HACK do this on proper kernel startup
+      if(pageSize == 0ul) pageSize = sysconf( _SC_PAGESIZE );
 
-	size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
+      size_t cxtSize = libCeiling( sizeof(machine_context_t), 8 ); // minimum alignment
 
-	if ( (intptr_t)storage == 0 ) {
-		userStack = false;
-		size = libCeiling( storageSize, 16 );
-		// use malloc/memalign because "new" raises an exception for out-of-memory
+      if ( !storage ) {
+            __cfaabi_dbg_print_safe("Kernel : Creating stack of size %zu for stack obj %p\n", cxtSize + size + 8, this);
 
-		// assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
-		__cfaabi_dbg_debug_do( storage = memalign( pageSize, cxtSize + size + pageSize ) );
-		__cfaabi_dbg_no_debug_do( storage = malloc( cxtSize + size + 8 ) );
+            userStack = false;
+            size = libCeiling( storageSize, 16 );
+            // use malloc/memalign because "new" raises an exception for out-of-memory
 
-		__cfaabi_dbg_debug_do(
-			if ( mprotect( storage, pageSize, PROT_NONE ) == -1 ) {
-				abort( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
-			} // if
-		);
+            // assume malloc has 8 byte alignment so add 8 to allow rounding up to 16 byte alignment
+            __cfaabi_dbg_debug_do( storage = memalign( pageSize, cxtSize + size + pageSize ) );
+            __cfaabi_dbg_no_debug_do( storage = malloc( cxtSize + size + 8 ) );
 
-		if ( (intptr_t)storage == 0 ) {
-			abort( "Attempt to allocate %zd bytes of storage for coroutine or task execution-state but insufficient memory available.", size );
-		} // if
+            __cfaabi_dbg_debug_do(
+                  if ( mprotect( storage, pageSize, PROT_NONE ) == -1 ) {
+                        abort( "(uMachContext &)%p.createContext() : internal error, mprotect failure, error(%d) %s.", this, (int)errno, strerror( (int)errno ) );
+                  } // if
+            );
 
-		__cfaabi_dbg_debug_do( limit = (char *)storage + pageSize );
-		__cfaabi_dbg_no_debug_do( limit = (char *)libCeiling( (unsigned long)storage, 16 ) ); // minimum alignment
+            if ( (intptr_t)storage == 0 ) {
+                  abort( "Attempt to allocate %zd bytes of storage for coroutine or task execution-state but insufficient memory available.", size );
+            } // if
 
-	} else {
-		assertf( ((size_t)storage & (libAlign() - 1)) != 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", storage, (int)libAlign() );
-		userStack = true;
-		size = storageSize - cxtSize;
+            __cfaabi_dbg_debug_do( limit = (char *)storage + pageSize );
+            __cfaabi_dbg_no_debug_do( limit = (char *)libCeiling( (unsigned long)storage, 16 ) ); // minimum alignment
 
-		if ( size % 16 != 0u ) size -= 8;
+      } else {
+            __cfaabi_dbg_print_safe("Kernel : stack obj %p using user stack %p(%u bytes)\n", this, storage, storageSize);
 
-		limit = (char *)libCeiling( (unsigned long)storage, 16 ); // minimum alignment
-	} // if
-	assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
+            assertf( ((size_t)storage & (libAlign() - 1)) == 0ul, "Stack storage %p for task/coroutine must be aligned on %d byte boundary.", storage, (int)libAlign() );
+            userStack = true;
+            size = storageSize - cxtSize;
 
-	base = (char *)limit + size;
-	context = base;
-	top = (char *)context + cxtSize;
+            if ( size % 16 != 0u ) size -= 8;
+
+            limit = (char *)libCeiling( (unsigned long)storage, 16 ); // minimum alignment
+      } // if
+      assertf( size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", size, MinStackSize );
+
+      base = (char *)limit + size;
+      context = base;
+      top = (char *)context + cxtSize;
 }
 
@@ -167,22 +156,22 @@
 // is not inline (We can't inline Cforall in C)
 extern "C" {
-	void __suspend_internal(void) {
-		suspend();
-	}
+      void __suspend_internal(void) {
+            suspend();
+      }
 
-	void __leave_coroutine(void) {
-		coroutine_desc * src = TL_GET( this_coroutine ); // optimization
+      void __leave_coroutine(void) {
+            coroutine_desc * src = TL_GET( this_coroutine ); // optimization
 
-		assertf( src->starter != 0,
-			"Attempt to suspend/leave coroutine \"%.256s\" (%p) that has never been resumed.\n"
-			"Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
-			src->name, src );
-		assertf( src->starter->state != Halted,
-			"Attempt by coroutine \"%.256s\" (%p) to suspend/leave back to terminated coroutine \"%.256s\" (%p).\n"
-			"Possible cause is terminated coroutine's main routine has already returned.",
-			src->name, src, src->starter->name, src->starter );
+            assertf( src->starter != 0,
+                  "Attempt to suspend/leave coroutine \"%.256s\" (%p) that has never been resumed.\n"
+                  "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
+                  src->name, src );
+            assertf( src->starter->state != Halted,
+                  "Attempt by coroutine \"%.256s\" (%p) to suspend/leave back to terminated coroutine \"%.256s\" (%p).\n"
+                  "Possible cause is terminated coroutine's main routine has already returned.",
+                  src->name, src, src->starter->name, src->starter );
 
-		CoroutineCtxSwitch( src, src->starter );
-	}
+            CoroutineCtxSwitch( src, src->starter );
+      }
 }
 
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/invoke.h	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -125,4 +125,7 @@
 		// pointer to monitor with sufficient lifetime for current monitors
 		struct monitor_desc *  self_mon_p;
+
+		// pointer to the cluster on which the thread is running
+		struct cluster * curr_cluster;
 
 		// monitors currently held by this thread
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/kernel	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -48,12 +48,20 @@
 	__queue_t(thread_desc) ready_queue;
 
+	// Name of the cluster
+	const char * name;
+
 	// Preemption rate on this cluster
 	Duration preemption_rate;
 };
 
+extern struct cluster * mainCluster;
 extern Duration default_preemption();
 
-void ?{} (cluster & this);
+void ?{} (cluster & this, const char * name, Duration preemption_rate);
 void ^?{}(cluster & this);
+
+static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
+static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
+static inline void ?{} (cluster & this, const char * name)        { this{name, default_preemption()}; }
 
 //-----------------------------------------------------------------------------
@@ -92,4 +100,7 @@
 	cluster * cltr;
 
+	// Name of the processor
+	const char * name;
+
 	// Handle to pthreads
 	pthread_t kernel_thread;
@@ -119,7 +130,10 @@
 };
 
-void  ?{}(processor & this);
-void  ?{}(processor & this, cluster * cltr);
+void  ?{}(processor & this, const char * name, cluster & cltr);
 void ^?{}(processor & this);
+
+static inline void  ?{}(processor & this)                    { this{ "Anonymous Processor", *mainCluster}; }
+static inline void  ?{}(processor & this, cluster & cltr)    { this{ "Anonymous Processor", cltr}; }
+static inline void  ?{}(processor & this, const char * name) { this{name, *mainCluster }; }
 
 // Local Variables: //
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/kernel.c	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -42,10 +42,9 @@
 KERNEL_STORAGE(cluster,           mainCluster);
 KERNEL_STORAGE(processor,         mainProcessor);
-KERNEL_STORAGE(processorCtx_t,    mainProcessorCtx);
 KERNEL_STORAGE(thread_desc,       mainThread);
 KERNEL_STORAGE(machine_context_t, mainThreadCtx);
 
-cluster *     mainCluster;
-processor *   mainProcessor;
+cluster     * mainCluster;
+processor   * mainProcessor;
 thread_desc * mainThread;
 
@@ -65,5 +64,5 @@
 
 //-----------------------------------------------------------------------------
-// Main thread construction
+// Struct to steal stack
 struct current_stack_info_t {
 	machine_context_t ctx;
@@ -90,4 +89,6 @@
 }
 
+//-----------------------------------------------------------------------------
+// Main thread construction
 void ?{}( coStack_t & this, current_stack_info_t * info) with( this ) {
 	size      = info->size;
@@ -111,4 +112,5 @@
 	self_cor{ info };
 	curr_cor = &self_cor;
+	curr_cluster = mainCluster;
 	self_mon.owner = &this;
 	self_mon.recursion = 1;
@@ -126,11 +128,6 @@
 //-----------------------------------------------------------------------------
 // Processor coroutine
-void ?{}(processorCtx_t & this) {}
-
-// Construct the processor context of the main processor
-void ?{}(processorCtx_t & this, processor * proc) {
-	(this.__cor){ "Processor" };
-	this.__cor.starter = NULL;
-	this.proc = proc;
+void ?{}(processorCtx_t & this) {
+
 }
 
@@ -141,10 +138,7 @@
 }
 
-void ?{}(processor & this) {
-	this{ mainCluster };
-}
-
-void ?{}(processor & this, cluster * cltr) with( this ) {
-	this.cltr = cltr;
+void ?{}(processor & this, const char * name, cluster & cltr) with( this ) {
+	this.name = name;
+	this.cltr = &cltr;
 	terminated{ 0 };
 	do_terminate = false;
@@ -154,17 +148,4 @@
 
 	start( &this );
-}
-
-void ?{}(processor & this, cluster * cltr, processorCtx_t & runner) with( this ) {
-	this.cltr = cltr;
-	terminated{ 0 };
-	do_terminate = false;
-	preemption_alarm = NULL;
-	pending_preemption = false;
-	kernel_thread = pthread_self();
-	runner.proc = &this;
-
-	__cfaabi_dbg_print_safe("Kernel : constructing main processor context %p\n", &runner);
-	runner{ &this };
 }
 
@@ -181,9 +162,9 @@
 }
 
-void ?{}(cluster & this) with( this ) {
+void ?{}(cluster & this, const char * name, Duration preemption_rate) with( this ) {
+	this.name = name;
+	this.preemption_rate = preemption_rate;
 	ready_queue{};
 	ready_queue_lock{};
-
-	preemption_rate = default_preemption();
 }
 
@@ -312,6 +293,5 @@
 	TL_SET( this_coroutine, NULL );
 	TL_SET( this_thread, NULL );
-	TL_GET( preemption_state ).enabled = false;
-	TL_GET( preemption_state ).disable_count = 1;
+	TL_GET( preemption_state ).[enabled, disable_count] = [false, 1];
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
@@ -402,5 +382,5 @@
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
 
-	with( *TL_GET( this_processor )->cltr ) {
+	with( *thrd->curr_cluster ) {
 		lock  ( ready_queue_lock __cfaabi_dbg_ctx2 );
 		append( ready_queue, thrd );
@@ -430,6 +410,8 @@
 void BlockInternal( __spinlock_t * lock ) {
 	disable_interrupts();
-	TL_GET( this_processor )->finish.action_code = Release;
-	TL_GET( this_processor )->finish.lock        = lock;
+	with( *TL_GET( this_processor ) ) {
+		finish.action_code = Release;
+		finish.lock        = lock;
+	}
 
 	verify( ! TL_GET( preemption_state ).enabled );
@@ -442,6 +424,8 @@
 void BlockInternal( thread_desc * thrd ) {
 	disable_interrupts();
-	TL_GET( this_processor )->finish.action_code = Schedule;
-	TL_GET( this_processor )->finish.thrd        = thrd;
+	with( *TL_GET( this_processor ) ) {
+		finish.action_code = Schedule;
+		finish.thrd        = thrd;
+	}
 
 	verify( ! TL_GET( preemption_state ).enabled );
@@ -455,7 +439,9 @@
 	assert(thrd);
 	disable_interrupts();
-	TL_GET( this_processor )->finish.action_code = Release_Schedule;
-	TL_GET( this_processor )->finish.lock        = lock;
-	TL_GET( this_processor )->finish.thrd        = thrd;
+	with( *TL_GET( this_processor ) ) {
+		finish.action_code = Release_Schedule;
+		finish.lock        = lock;
+		finish.thrd        = thrd;
+	}
 
 	verify( ! TL_GET( preemption_state ).enabled );
@@ -468,7 +454,9 @@
 void BlockInternal(__spinlock_t * locks [], unsigned short count) {
 	disable_interrupts();
-	TL_GET( this_processor )->finish.action_code = Release_Multi;
-	TL_GET( this_processor )->finish.locks       = locks;
-	TL_GET( this_processor )->finish.lock_count  = count;
+	with( *TL_GET( this_processor ) ) {
+		finish.action_code = Release_Multi;
+		finish.locks       = locks;
+		finish.lock_count  = count;
+	}
 
 	verify( ! TL_GET( preemption_state ).enabled );
@@ -481,9 +469,11 @@
 void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
 	disable_interrupts();
-	TL_GET( this_processor )->finish.action_code = Release_Multi_Schedule;
-	TL_GET( this_processor )->finish.locks       = locks;
-	TL_GET( this_processor )->finish.lock_count  = lock_count;
-	TL_GET( this_processor )->finish.thrds       = thrds;
-	TL_GET( this_processor )->finish.thrd_count  = thrd_count;
+	with( *TL_GET( this_processor ) ) {
+		finish.action_code = Release_Multi_Schedule;
+		finish.locks       = locks;
+		finish.lock_count  = lock_count;
+		finish.thrds       = thrds;
+		finish.thrd_count  = thrd_count;
+	}
 
 	verify( ! TL_GET( preemption_state ).enabled );
@@ -496,7 +486,9 @@
 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
 	verify( ! TL_GET( preemption_state ).enabled );
-	TL_GET( this_processor )->finish.action_code = thrd ? Release_Schedule : Release;
-	TL_GET( this_processor )->finish.lock        = lock;
-	TL_GET( this_processor )->finish.thrd        = thrd;
+	with( *TL_GET( this_processor ) ) {
+		finish.action_code = thrd ? Release_Schedule : Release;
+		finish.lock        = lock;
+		finish.thrd        = thrd;
+	}
 
 	returnToKernel();
@@ -511,4 +503,10 @@
 	verify( ! TL_GET( preemption_state ).enabled );
 	__cfaabi_dbg_print_safe("Kernel : Starting\n");
+
+	// Initialize the main cluster
+	mainCluster = (cluster *)&storage_mainCluster;
+	(*mainCluster){"Main Cluster"};
+
+	__cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
 
 	// Start by initializing the main thread
@@ -521,14 +519,30 @@
 	__cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
 
-	// Initialize the main cluster
-	mainCluster = (cluster *)&storage_mainCluster;
-	(*mainCluster){};
-
-	__cfaabi_dbg_print_safe("Kernel : main cluster ready\n");
+
+
+	// Construct the processor context of the main processor
+	void ?{}(processorCtx_t & this, processor * proc) {
+		(this.__cor){ "Processor" };
+		this.__cor.starter = NULL;
+		this.proc = proc;
+	}
+
+	void ?{}(processor & this) with( this ) {
+		name = "Main Processor";
+		cltr = mainCluster;
+		terminated{ 0 };
+		do_terminate = false;
+		preemption_alarm = NULL;
+		pending_preemption = false;
+		kernel_thread = pthread_self();
+
+		runner{ &this };
+		__cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
+	}
 
 	// Initialize the main processor and the main processor ctx
 	// (the coroutine that contains the processing control flow)
 	mainProcessor = (processor *)&storage_mainProcessor;
-	(*mainProcessor){ mainCluster, *(processorCtx_t *)&storage_mainProcessorCtx };
+	(*mainProcessor){};
 
 	//initialize the global state variables
@@ -725,4 +739,9 @@
 		thrd->dbg_next = NULL;
 	}
+
+	void __cfaabi_dbg_record(__spinlock_t & this, const char * prev_name) {
+		this.prev_name = prev_name;
+		this.prev_thrd = TL_GET( this_thread );
+	}
 )
 // Local Variables: //
Index: src/libcfa/concurrency/preemption.c
===================================================================
--- src/libcfa/concurrency/preemption.c	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/preemption.c	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -149,8 +149,10 @@
 	// Disable interrupts by incrementing the counter
 	void disable_interrupts() {
-		TL_GET( preemption_state ).enabled = false;
-		__attribute__((unused)) unsigned short new_val = TL_GET( preemption_state ).disable_count + 1;
-		TL_GET( preemption_state ).disable_count = new_val;
-		verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+		with( TL_GET( preemption_state ) ) {
+			enabled = false;
+			__attribute__((unused)) unsigned short new_val = disable_count + 1;
+			disable_count = new_val;
+			verify( new_val < 65_000u );              // If this triggers someone is disabling interrupts without enabling them
+		}
 	}
 
@@ -161,14 +163,16 @@
 		thread_desc * thrd = TL_GET( this_thread );	  // Cache the thread now since interrupts can start happening after the atomic add
 
-		unsigned short prev = TL_GET( preemption_state ).disable_count;
-		TL_GET( preemption_state ).disable_count -= 1;
-		verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
-
-		// Check if we need to prempt the thread because an interrupt was missed
-		if( prev == 1 ) {
-			TL_GET( preemption_state ).enabled = true;
-			if( proc->pending_preemption ) {
-				proc->pending_preemption = false;
-				BlockInternal( thrd );
+		with( TL_GET( preemption_state ) ){
+			unsigned short prev = disable_count;
+			disable_count -= 1;
+			verify( prev != 0u );                     // If this triggers someone is enabled already enabled interruptsverify( prev != 0u );
+
+			// Check if we need to prempt the thread because an interrupt was missed
+			if( prev == 1 ) {
+				enabled = true;
+				if( proc->pending_preemption ) {
+					proc->pending_preemption = false;
+					BlockInternal( thrd );
+				}
 			}
 		}
@@ -328,5 +332,5 @@
 	if( !preemption_ready() ) { return; }
 
-	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p).\n", this_processor, this_thread);
+	__cfaabi_dbg_print_buffer_decl( " KERNEL: preempting core %p (%p).\n", TL_GET( this_processor ), TL_GET( this_thread ) );
 
 	TL_GET( preemption_state ).in_progress = true;  // Sync flag : prevent recursive calls to the signal handler
Index: src/libcfa/concurrency/thread
===================================================================
--- src/libcfa/concurrency/thread	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/thread	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -23,7 +23,5 @@
 
 //-----------------------------------------------------------------------------
-// Coroutine trait
-// Anything that implements this trait can be resumed.
-// Anything that is resumed is a coroutine.
+// thread trait
 trait is_thread(dtype T) {
       void ^?{}(T& mutex this);
@@ -52,5 +50,5 @@
 }
 
-//extern thread_local thread_desc * volatile this_thread;
+extern struct cluster * mainCluster;
 
 forall( dtype T | is_thread(T) )
@@ -59,6 +57,16 @@
 //-----------------------------------------------------------------------------
 // Ctors and dtors
-void ?{}(thread_desc& this);
-void ^?{}(thread_desc& this);
+void ?{}(thread_desc & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
+void ^?{}(thread_desc & this);
+
+static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, NULL, 0 }; }
+static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, NULL, stackSize }; }
+static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, NULL, 0 }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0, stackSize }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
+static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, NULL, 0 }; }
+static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, NULL, 0 }; }
+static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, NULL, stackSize }; }
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/thread.c
===================================================================
--- src/libcfa/concurrency/thread.c	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/libcfa/concurrency/thread.c	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -30,12 +30,12 @@
 //-----------------------------------------------------------------------------
 // Thread ctors and dtors
-
-void ?{}(thread_desc& this) with( this ) {
-	self_cor{};
-	self_cor.name = "Anonymous Coroutine";
+void ?{}(thread_desc & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
+	self_cor{ name, storage, storageSize };
+	verify(&self_cor);
 	curr_cor = &self_cor;
 	self_mon.owner = &this;
 	self_mon.recursion = 1;
 	self_mon_p = &self_mon;
+	curr_cluster = &cl;
 	next = NULL;
 	__cfaabi_dbg_debug_do(
Index: src/tests/concurrent/examples/.expect/boundedBufferEXT.txt
===================================================================
--- src/tests/concurrent/examples/.expect/boundedBufferEXT.txt	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/tests/concurrent/examples/.expect/boundedBufferEXT.txt	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -1,79 +1,1 @@
-concurrent/examples/boundedBufferEXT.c:39:1 error: No alternatives for function in call to waitfor
-/u/pabuhr/software/cfa-cc/include/cfa/bits/containers.h:170:1 error: candidate function not viable: no mutex parameters
-forall
-  _6573_20_T: sized object type
-  ... with assertions
-    get_next: pointer to function
-    ... with parameters
-      reference to instance of type _6573_20_T (not function type) 
-    ... returning 
-      _retval_get_next: reference to pointer to instance of type _6573_20_T (not function type) 
-      ... with attributes: 
-        Attribute with name: unused
-
-
-
-  lvalue function
-... with parameters
-  this: reference to instance of struct __queue with body 1 
-  ... with parameters
-    instance of type _6573_20_T (not function type) 
-
-  it: pointer to pointer to instance of type _6573_20_T (not function type) 
-... returning 
-  _retval_remove: pointer to instance of type _6573_20_T (not function type) 
-  ... with attributes: 
-    Attribute with name: unused
-
-
-/usr/include/stdio.h:178:1 error: candidate function not viable: no mutex parameters
-lvalue function
-... with parameters
-  __filename: C pointer to const char
-... returning 
-  _retval_remove: signed int
-  ... with attributes: 
-    Attribute with name: unused
-
-
-concurrent/examples/boundedBufferEXT.c:47:1 error: No alternatives for function in call to waitfor
-concurrent/examples/boundedBufferEXT.c:37:1 error: candidate function not viable: too few mutex arguments
-forall
-  _6578_20_T: sized object type
-  ... with assertions
-    ?=?: pointer to function
-    ... with parameters
-      reference to instance of type _6578_20_T (not function type) 
-      instance of type _6578_20_T (not function type) 
-    ... returning 
-      _retval__operator_assign: instance of type _6578_20_T (not function type) 
-      ... with attributes: 
-        Attribute with name: unused
-
-
-    ?{}: pointer to function
-    ... with parameters
-      reference to instance of type _6578_20_T (not function type) 
-    ... returning nothing 
-
-    ?{}: pointer to function
-    ... with parameters
-      reference to instance of type _6578_20_T (not function type) 
-      instance of type _6578_20_T (not function type) 
-    ... returning nothing 
-
-    ^?{}: pointer to function
-    ... with parameters
-      reference to instance of type _6578_20_T (not function type) 
-    ... returning nothing 
-
-
-  lvalue function
-... with parameters
-  buffer: mutex reference to instance of struct Buffer with body 1 
-  ... with parameters
-    instance of type _6578_20_T (not function type) 
-
-  elem: instance of type _6578_20_T (not function type) 
-... returning nothing 
-
+total:400000
Index: src/tests/concurrent/examples/boundedBufferEXT.c
===================================================================
--- src/tests/concurrent/examples/boundedBufferEXT.c	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/tests/concurrent/examples/boundedBufferEXT.c	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -1,8 +1,8 @@
-// 
+//
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
-// 
-// boundedBufferEXT.c -- 
-// 
+//
+// boundedBufferEXT.c --
+//
 // Author           : Peter A. Buhr
 // Created On       : Wed Apr 18 22:52:12 2018
@@ -10,5 +10,5 @@
 // Last Modified On : Fri Apr 20 22:25:14 2018
 // Update Count     : 6
-// 
+//
 
 #include <stdlib>										// random
@@ -39,5 +39,5 @@
 forall( otype T )
 void insert( Buffer(T) & mutex buffer, T elem ) with( buffer ) {
-	if ( count == BufferSize ) waitfor( remove );
+	if ( count == BufferSize ) waitfor( remove, buffer );
 	elements[back] = elem;
 	back = ( back + 1 ) % BufferSize;
@@ -47,5 +47,5 @@
 forall( otype T )
 T remove( Buffer(T) & mutex buffer ) with( buffer ) {
-	if ( count == 0 ) waitfor( insert );
+	if ( count == 0 ) waitfor( insert, buffer );
 	T elem = elements[front];
 	front = ( front + 1 ) % BufferSize;
Index: src/tests/concurrent/thread.c
===================================================================
--- src/tests/concurrent/thread.c	(revision 9997feea08f61a0a83b01b685254ad9d8e8cd771)
+++ src/tests/concurrent/thread.c	(revision 534d84ecf33237664da77b0cdc43872095a15533)
@@ -7,6 +7,6 @@
 thread Second { semaphore* lock; };
 
-void ?{}( First & this, semaphore & lock ) { this.lock = &lock; }
-void ?{}( Second & this, semaphore & lock ) { this.lock = &lock; }
+void ?{}( First  & this, semaphore & lock ) { ((thread&)this){"Thread 1"}; this.lock = &lock; }
+void ?{}( Second & this, semaphore & lock ) { ((thread&)this){"Thread 2"}; this.lock = &lock; }
 
 void main(First& this) {
