Index: src/Concurrency/Keywords.cc
===================================================================
--- src/Concurrency/Keywords.cc	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/Concurrency/Keywords.cc	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -17,4 +17,5 @@
 #include "Concurrency/Keywords.h"
 
+#include "SymTab/AddVisit.h"
 #include "SynTree/Declaration.h"
 #include "SynTree/Expression.h"
@@ -29,4 +30,5 @@
 	namespace {
 		const std::list<Label> noLabels;
+		const std::list< Attribute * > noAttributes;
 		Type::StorageClasses noStorage;
 		Type::Qualifiers noQualifiers;
@@ -63,8 +65,24 @@
 	//                                           void main( MyCoroutine * this );
 	//
-	class CoroutineKeyword final : public Mutator {
+	class CoroutineKeyword final : public Visitor {
+	    template< typename Visitor >
+	    friend void SymTab::acceptAndAdd( std::list< Declaration * > &translationUnit, Visitor &visitor );
 	  public:
 
-		static void implement( std::list< Declaration * > & translationUnit ) {}
+		using Visitor::visit;
+		virtual void visit( StructDecl * decl ) override final;
+
+		void handle( StructDecl * );
+		Declaration * addField( StructDecl * );
+		void addRoutines( StructDecl *, Declaration * );
+
+		static void implement( std::list< Declaration * > & translationUnit ) {
+			CoroutineKeyword impl;
+			SymTab::acceptAndAdd( translationUnit, impl );
+		}
+
+	  private:
+		std::list< Declaration * > declsToAdd, declsToAddAfter;
+		StructDecl* coroutine_decl = nullptr;
 	};
 
@@ -97,6 +115,6 @@
 
 		using Visitor::visit;
-		virtual void visit( FunctionDecl *functionDecl ) override final;
-		virtual void visit(   StructDecl *functionDecl ) override final;
+		virtual void visit( FunctionDecl * decl ) override final;
+		virtual void visit(   StructDecl * decl ) override final;
 
 		std::list<DeclarationWithType*> findMutexArgs( FunctionDecl* );
@@ -111,4 +129,5 @@
 	  private:
 	  	StructDecl* monitor_decl = nullptr;
+		StructDecl* guard_decl = nullptr;
 	};
 
@@ -124,4 +143,112 @@
 
 	//=============================================================================================
+	// Coroutine keyword implementation
+	//=============================================================================================
+	void CoroutineKeyword::visit(StructDecl * decl) {
+		if( decl->get_name() == "coroutine_desc" ) {
+			assert( !coroutine_decl );
+			coroutine_decl = decl;
+		}
+		else if ( decl->is_coroutine() ) {
+			handle( decl );
+		}
+
+	}
+
+	void CoroutineKeyword::handle( StructDecl * decl ) {
+		if( ! decl->has_body() ) return;
+
+		if( !coroutine_decl ) throw SemanticError( "coroutine keyword requires coroutines to be in scope, add #include <coroutine>", decl );
+
+		Declaration * field = addField( decl );
+		addRoutines( decl, field );
+	}
+
+	Declaration * CoroutineKeyword::addField( StructDecl * decl ) {
+		Declaration * cor = new ObjectDecl(
+			"__cor",
+			noStorage,
+			LinkageSpec::Cforall,
+			nullptr,
+			new StructInstType(
+				noQualifiers,
+				coroutine_decl
+			),
+			nullptr
+		);
+
+		decl->get_members().push_back( cor );
+
+		return cor;
+	}
+
+	void CoroutineKeyword::addRoutines( StructDecl * decl, Declaration * field ) {
+		FunctionType * type = new FunctionType( noQualifiers, false );
+		type->get_parameters().push_back(
+			new ObjectDecl(
+				"this",
+				noStorage,
+				LinkageSpec::Cforall,
+				nullptr,
+				new PointerType(
+					noQualifiers,
+					new StructInstType(
+						noQualifiers,
+						decl
+					)
+				),
+				nullptr
+			)
+		);
+		type->get_returnVals().push_back(
+			new ObjectDecl(
+				"ret",
+				noStorage,
+				LinkageSpec::Cforall,
+				nullptr,
+				new PointerType(
+					noQualifiers,
+					new StructInstType(
+						noQualifiers,
+						coroutine_decl
+					)
+				),
+				nullptr
+			)
+		);
+
+		CompoundStmt * statement = new CompoundStmt( noLabels );
+		statement->push_back( 
+			new ReturnStmt(
+				noLabels,
+				new AddressExpr(
+					new UntypedMemberExpr(
+						new NameExpr( "__cor" ),
+						new UntypedExpr(
+							new NameExpr( "*?" ),
+							{ new NameExpr( "this" ) }
+						)
+					)
+				)
+			)
+		);
+
+		FunctionDecl * get_decl = new FunctionDecl(
+			"get_coroutine",
+			Type::Static,
+			LinkageSpec::Cforall,
+			type,
+			statement,
+			noAttributes,
+			Type::Inline
+		);
+
+		declsToAddAfter.push_back( get_decl );
+
+		get_decl->fixUniqueId();
+	}
+	
+
+	//=============================================================================================
 	// Mutex keyword implementation
 	//=============================================================================================
@@ -137,5 +264,7 @@
 		if( ! body ) return;
 
-		assert(monitor_decl);
+		if( !monitor_decl ) throw SemanticError( "mutex keyword requires monitors to be in scope, add #include <monitor>", decl );
+		if( !guard_decl ) throw SemanticError( "mutex keyword requires monitors to be in scope, add #include <monitor>", decl );
+
 		addStatments( body, mutexArgs );
 	}
@@ -146,4 +275,8 @@
 			monitor_decl = decl;
 		}
+		else if( decl->get_name() == "monitor_guard_t" ) {
+			assert( !guard_decl );
+			guard_decl = decl;
+		}
 	}
 
@@ -175,9 +308,8 @@
 
 		//Make sure that typed isn't mutex
-		if( ! base->get_mutex() ) throw SemanticError( "mutex keyword may only appear once per argument ", arg );
+		if( base->get_mutex() ) throw SemanticError( "mutex keyword may only appear once per argument ", arg );
 	}
 
 	void MutexKeyword::addStatments( CompoundStmt * body, const std::list<DeclarationWithType * > & args ) {
-
 		ObjectDecl * monitors = new ObjectDecl(
 			"__monitors",
@@ -199,8 +331,10 @@
 			),
 			new ListInit(
-				map_range < std::list<Initializer*> > ( args, [](DeclarationWithType * var ){
+				map_range < std::list<Initializer*> > ( args, [this](DeclarationWithType * var ){
+					Type * type = var->get_type()->clone();
+					type->set_mutex( false );
 					return new SingleInit( new UntypedExpr(
 						new NameExpr( "get_monitor" ),
-						{  new VariableExpr( var ) }
+						{  new CastExpr( new VariableExpr( var ), type ) }
 					) );
 				})
@@ -218,5 +352,5 @@
 				new StructInstType(
 					noQualifiers,
-					"monitor_guard_t"
+					guard_decl
 				),
 				new ListInit(
@@ -224,10 +358,12 @@
 						new SingleInit( new VariableExpr( monitors ) ),
 						new SingleInit( new ConstantExpr( Constant::from_ulong( args.size() ) ) )
-					}
+					},
+					noDesignators,
+					true
 				)
 			))
 		);
 
-		//monitor_desc * __monitors[] = { a, b };
+		//monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
 		body->push_front( new DeclStmt( noLabels, monitors) );
 	}
Index: src/InitTweak/FixInit.cc
===================================================================
--- src/InitTweak/FixInit.cc	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/InitTweak/FixInit.cc	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -534,5 +534,5 @@
 			} else {
 				// expr isn't a call expr, so create a new temporary variable to use to hold the value of the unique expression
-				unqExpr->set_object( new ObjectDecl( toString("_unq_expr_", unqExpr->get_id()), Type::StorageClasses(), LinkageSpec::C, nullptr, unqExpr->get_result()->clone(), nullptr ) );
+				unqExpr->set_object( new ObjectDecl( toString("_unq", unqExpr->get_id()), Type::StorageClasses(), LinkageSpec::C, nullptr, unqExpr->get_result()->clone(), nullptr ) );
 				unqExpr->set_var( new VariableExpr( unqExpr->get_object() ) );
 			}
@@ -764,8 +764,17 @@
 						}
 					} else {
-						stmtsToAddAfter.push_back( ctor );
+						ImplicitCtorDtorStmt * implicit = safe_dynamic_cast< ImplicitCtorDtorStmt * > ( ctor );
+						ExprStmt * ctorStmt = dynamic_cast< ExprStmt * >( implicit->get_callStmt() );
+						ApplicationExpr * ctorCall = nullptr;
+						if ( ctorStmt && (ctorCall = isIntrinsicCallExpr( ctorStmt->get_expr() )) && ctorCall->get_args().size() == 2 ) {
+							// clean up intrinsic copy constructor calls by making them into SingleInits
+							objDecl->set_init( new SingleInit( ctorCall->get_args().back() ) );
+							ctorCall->get_args().pop_back();
+						} else {
+							stmtsToAddAfter.push_back( ctor );
+							objDecl->set_init( NULL );
+							ctorInit->set_ctor( NULL );
+						}
 					} // if
-					objDecl->set_init( NULL );
-					ctorInit->set_ctor( NULL );
 				} else if ( Initializer * init = ctorInit->get_init() ) {
 					objDecl->set_init( init );
Index: src/Parser/ExpressionNode.cc
===================================================================
--- src/Parser/ExpressionNode.cc	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/Parser/ExpressionNode.cc	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -163,5 +163,5 @@
 ConstantExpr *build_constantStr( const std::string & str ) {
 	// string should probably be a primitive type
-	ArrayType *at = new ArrayType( emptyQualifiers, new BasicType( emptyQualifiers, BasicType::Char ),
+	ArrayType *at = new ArrayType( emptyQualifiers, new BasicType( Type::Qualifiers( Type::Const ), BasicType::Char ),
 				new ConstantExpr( Constant( new BasicType( emptyQualifiers, BasicType::UnsignedInt ),
 											toString( str.size()+1-2 ) ) ),  // +1 for '\0' and -2 for '"'
Index: src/Parser/lex.ll
===================================================================
--- src/Parser/lex.ll	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/Parser/lex.ll	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -202,5 +202,5 @@
 __const__		{ KEYWORD_RETURN(CONST); }				// GCC
 continue		{ KEYWORD_RETURN(CONTINUE); }
-_Coroutine		{ KEYWORD_RETURN(COROUTINE); }			// CFA
+coroutine		{ KEYWORD_RETURN(COROUTINE); }			// CFA
 default			{ KEYWORD_RETURN(DEFAULT); }
 disable			{ KEYWORD_RETURN(DISABLE); }			// CFA
Index: src/SynTree/Type.h
===================================================================
--- src/SynTree/Type.h	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/SynTree/Type.h	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -117,6 +117,8 @@
 		bool operator!=( Qualifiers other ) const { return (val & Mask) != (other.val & Mask); }
 		bool operator<=( Qualifiers other ) const {
-			return is_const <= other.is_const && is_volatile <= other.is_volatile &&
-				is_mutex >= other.is_mutex && is_atomic == other.is_atomic;
+			return is_const    <= other.is_const        //Any non-const converts to const without cost
+					&& is_volatile <= other.is_volatile     //Any non-volatile converts to volatile without cost
+					&& is_mutex    >= other.is_mutex        //Any mutex converts to non-mutex without cost
+					&& is_atomic   == other.is_atomic;      //No conversion from atomic to non atomic is free
 		}
 		bool operator<( Qualifiers other ) const { return *this != other && *this <= other; }
@@ -155,4 +157,5 @@
 	virtual Type * getComponent( unsigned i ) { assertf( size() == 1 && i == 0, "Type::getComponent was called with size %d and index %d\n", size(), i ); return this; }
 
+	/// return type without outer pointers and arrays
 	Type *stripDeclarator();
 
Index: src/Tuples/TupleExpansion.cc
===================================================================
--- src/Tuples/TupleExpansion.cc	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/Tuples/TupleExpansion.cc	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -194,5 +194,5 @@
 			}
 			BasicType * boolType = new BasicType( Type::Qualifiers(), BasicType::Bool );
-			ObjectDecl * finished = new ObjectDecl( toString( "_unq_expr_finished_", id ), Type::StorageClasses(), LinkageSpec::Cforall, nullptr, new BasicType( Type::Qualifiers(), BasicType::Bool ), new SingleInit( new ConstantExpr( Constant( boolType->clone(), "0" ) ), noDesignators ) );
+			ObjectDecl * finished = new ObjectDecl( toString( "_unq", id, "_finished_" ), Type::StorageClasses(), LinkageSpec::Cforall, nullptr, new BasicType( Type::Qualifiers(), BasicType::Bool ), new SingleInit( new ConstantExpr( Constant( boolType->clone(), "0" ) ), noDesignators ) );
 			addDeclaration( finished );
 			// (finished ? _unq_expr_N : (_unq_expr_N = <unqExpr->get_expr()>, finished = 1, _unq_expr_N))
@@ -225,5 +225,5 @@
 		if ( ! typeMap.count( tupleSize ) ) {
 			// generate struct type to replace tuple type based on the number of components in the tuple
-			StructDecl * decl = new StructDecl( toString( "_tuple_type_", tupleSize  ) );
+			StructDecl * decl = new StructDecl( toString( "_tuple", tupleSize, "_" ) );
 			decl->set_body( true );
 			for ( size_t i = 0; i < tupleSize; ++i ) {
Index: src/benchmark/CorCtxSwitch.c
===================================================================
--- src/benchmark/CorCtxSwitch.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/benchmark/CorCtxSwitch.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -24,5 +24,5 @@
 
 struct GreatSuspender {
-	coroutine_desc c;
+	coroutine_desc __cor;
 };
 
Index: src/benchmark/bench.c
===================================================================
--- src/benchmark/bench.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/benchmark/bench.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -86,5 +86,5 @@
 //=======================================
 
-struct CoroutineDummy { coroutine_desc c; };
+struct CoroutineDummy { coroutine_desc __cor; };
 DECL_COROUTINE(CoroutineDummy);
 void main(CoroutineDummy * this) {}
@@ -119,5 +119,5 @@
 struct CoroutineResume {
     int N;
-    coroutine_desc c;
+    coroutine_desc __cor;
 };
 
@@ -150,5 +150,5 @@
 //=======================================
 
-struct ThreadDummy { thread_desc t; };
+struct ThreadDummy { thread_desc __thrd; };
 DECL_THREAD(ThreadDummy);
 void main(ThreadDummy * this) {}
@@ -180,5 +180,5 @@
     int N;
     long long result;
-    thread_desc t;
+    thread_desc __thrd;
 };
 
Index: src/benchmark/csv-data.c
===================================================================
--- src/benchmark/csv-data.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/benchmark/csv-data.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -26,5 +26,5 @@
 
 struct GreatSuspender {
-	coroutine_desc c;
+	coroutine_desc __cor;
 };
 
Index: src/driver/Makefile.am
===================================================================
--- src/driver/Makefile.am	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/driver/Makefile.am	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -32,4 +32,5 @@
 
 install-exec-hook:
+	@test -z "$(CFA_BINDIR)" || $(MKDIR_P) "$(CFA_BINDIR)"
 	@echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) cfa '$(CFA_BINDIR)/$(CFA_NAME)'"; \
 	$(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) cfa $(CFA_BINDIR)/$(CFA_NAME) || exit $$?
Index: src/driver/Makefile.in
===================================================================
--- src/driver/Makefile.in	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/driver/Makefile.in	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -530,4 +530,5 @@
 
 install-exec-hook:
+	@test -z "$(CFA_BINDIR)" || $(MKDIR_P) "$(CFA_BINDIR)"
 	@echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) cfa '$(CFA_BINDIR)/$(CFA_NAME)'"; \
 	$(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) cfa $(CFA_BINDIR)/$(CFA_NAME) || exit $$?
Index: src/examples/multicore.c
===================================================================
--- src/examples/multicore.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/examples/multicore.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -2,5 +2,5 @@
 #include <thread>
 
-struct MyThread { thread_desc t; };
+struct MyThread { thread_desc __thrd; };
 
 DECL_THREAD(MyThread);
Index: src/libcfa/concurrency/coroutine
===================================================================
--- src/libcfa/concurrency/coroutine	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/coroutine	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -30,5 +30,5 @@
 };
 
-#define DECL_COROUTINE(X) static inline coroutine_desc* get_coroutine(X* this) { return &this->c; } void main(X* this)
+#define DECL_COROUTINE(X) static inline coroutine_desc* get_coroutine(X* this) { return &this->__cor; } void main(X* this)
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/invoke.c
===================================================================
--- src/libcfa/concurrency/invoke.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/invoke.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -29,5 +29,5 @@
 
 extern void __suspend_internal(void);
-extern void __thread_signal_termination(struct thread_desc*);
+extern void __leave_monitor_desc( struct monitor_desc * this );
 
 void CtxInvokeCoroutine(
@@ -56,4 +56,5 @@
 
 void CtxInvokeThread(
+      void (*dtor)(void *), 
       void (*main)(void *), 
       struct thread_desc *(*get_thread)(void *), 
@@ -63,5 +64,6 @@
 
       struct thread_desc* thrd = get_thread( this );
-      struct coroutine_desc* cor = &thrd->c;
+      struct coroutine_desc* cor = &thrd->cor;
+      struct monitor_desc* mon = &thrd->mon;
       cor->state = Active;
 
@@ -69,5 +71,5 @@
       main( this );
 
-      __thread_signal_termination(thrd);
+      __leave_monitor_desc( mon );
 
       //Final suspend, should never return
@@ -91,7 +93,7 @@
 	struct FakeStack {
 	    void *fixedRegisters[3];		  	// fixed registers ebx, edi, esi (popped on 1st uSwitch, values unimportant)
-	    uint32_t mxcr;                              // SSE Status and Control bits (control bits are preserved across function calls)
-            uint16_t fcw;                               // X97 FPU control word (preserved across function calls)
-	    void *rturn;                                // where to go on return from uSwitch
+	    uint32_t mxcr;                        // SSE Status and Control bits (control bits are preserved across function calls)
+          uint16_t fcw;                         // X97 FPU control word (preserved across function calls)
+	    void *rturn;                          // where to go on return from uSwitch
 	    void *dummyReturn;				// fake return compiler would have pushed on call to uInvoke
 	    void *argument[3];				// for 16-byte ABI, 16-byte alignment starts here
@@ -105,13 +107,15 @@
 	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->argument[0] = this;     // argument to invoke
 	((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->rturn = invoke;
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->mxcr = 0x1F80; //Vol. 2A 3-520
+      ((struct FakeStack *)(((struct machine_context_t *)stack->context)->SP))->fcw = 0x037F;  //Vol. 1 8-7 
 
 #elif defined( __x86_64__ )
 
       struct FakeStack {
-            void *fixedRegisters[5];			// fixed registers rbx, r12, r13, r14, r15
-            uint32_t mxcr;                              // SSE Status and Control bits (control bits are preserved across function calls)
-            uint16_t fcw;                               // X97 FPU control word (preserved across function calls)
-            void *rturn;                                // where to go on return from uSwitch
-            void *dummyReturn;				// NULL return address to provide proper alignment
+            void *fixedRegisters[5];            // fixed registers rbx, r12, r13, r14, r15
+            uint32_t mxcr;                      // SSE Status and Control bits (control bits are preserved across function calls)
+            uint16_t fcw;                       // X97 FPU control word (preserved across function calls)
+            void *rturn;                        // where to go on return from uSwitch
+            void *dummyReturn;                  // NULL return address to provide proper alignment
       };
 
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/invoke.h	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -28,5 +28,4 @@
       #define unlikely(x)    __builtin_expect(!!(x), 0)
       #define thread_local _Thread_local
-      #define SCHEDULER_CAPACITY 10
 
       struct spinlock {
@@ -39,10 +38,4 @@
       };
 
-      struct signal_once {
-            volatile bool condition;
-            struct spinlock lock;
-            struct simple_thread_list blocked;
-      };
-
       #ifdef __CFORALL__
       extern "Cforall" {
@@ -53,18 +46,15 @@
             void ?{}(spinlock * this);
             void ^?{}(spinlock * this);
-
-            void ?{}(signal_once * this);
-            void ^?{}(signal_once * this);
       }
       #endif
 
       struct coStack_t {
-            unsigned int size;		      // size of stack
-            void *storage;			      // pointer to stack
-            void *limit;			      // stack grows towards stack limit
-            void *base;				      // base of stack
-            void *context;			      // address of cfa_context_t
-            void *top;				      // address of top of storage
-            bool userStack;	
+            unsigned int size;                  // size of stack
+            void *storage;                      // pointer to stack
+            void *limit;                        // stack grows towards stack limit
+            void *base;                         // base of stack
+            void *context;                      // address of cfa_context_t
+            void *top;                          // address of top of storage
+            bool userStack;                     // whether or not the user allocated the stack
       };
 
@@ -72,16 +62,23 @@
 
       struct coroutine_desc {
-            struct coStack_t stack;
-            const char *name;			      // textual name for coroutine/task, initialized by uC++ generated code
-            int errno_;				      // copy of global UNIX variable errno
-            enum coroutine_state state;	      // current execution status for coroutine
-            struct coroutine_desc *starter;	      // first coroutine to resume this one
-            struct coroutine_desc *last;		      // last coroutine to resume this one
+            struct coStack_t stack;             // stack information of the coroutine
+            const char *name;                   // textual name for coroutine/task, initialized by uC++ generated code
+            int errno_;                         // copy of global UNIX variable errno
+            enum coroutine_state state;         // current execution status for coroutine
+            struct coroutine_desc *starter;     // first coroutine to resume this one
+            struct coroutine_desc *last;	      // last coroutine to resume this one
+      };
+
+      struct monitor_desc {
+            struct spinlock lock;
+            struct thread_desc * owner;
+            struct simple_thread_list entry_queue;
+            unsigned int recursion;
       };
 
       struct thread_desc {
-            struct coroutine_desc c;                 // coroutine body used to store context
-            struct signal_once terminated;      // indicate if execuation state is not halted
-            struct thread_desc * next;               // instrusive link field for threads
+            struct coroutine_desc cor;          // coroutine body used to store context
+            struct monitor_desc mon;            // monitor body used for mutual exclusion
+            struct thread_desc * next;          // instrusive link field for threads
       };
 
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/kernel	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -30,4 +30,13 @@
 void lock( spinlock * );
 void unlock( spinlock * );
+
+struct signal_once {
+	volatile bool condition;
+	struct spinlock lock;
+	struct simple_thread_list blocked;
+};
+
+void ?{}(signal_once * this);
+void ^?{}(signal_once * this);
 
 void wait( signal_once * );
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/kernel.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -107,5 +107,5 @@
 
 void ?{}( thread_desc * this, current_stack_info_t * info) {
-	(&this->c){ info };
+	(&this->cor){ info };
 }
 
@@ -113,5 +113,5 @@
 // Processor coroutine
 void ?{}(processorCtx_t * this, processor * proc) {
-	(&this->c){};
+	(&this->__cor){};
 	this->proc = proc;
 	proc->runner = this;
@@ -119,5 +119,5 @@
 
 void ?{}(processorCtx_t * this, processor * proc, current_stack_info_t * info) {
-	(&this->c){ info };
+	(&this->__cor){ info };
 	this->proc = proc;
 	proc->runner = this;
@@ -255,8 +255,8 @@
 	processorCtx_t proc_cor_storage = { proc, &info };
 
-	LIB_DEBUG_PRINTF("Coroutine : created stack %p\n", proc_cor_storage.c.stack.base);
+	LIB_DEBUG_PRINTF("Coroutine : created stack %p\n", proc_cor_storage.__cor.stack.base);
 
 	//Set global state
-	proc->current_coroutine = &proc->runner->c;
+	proc->current_coroutine = &proc->runner->__cor;
 	proc->current_thread = NULL;
 
@@ -268,7 +268,7 @@
 	// back to here. Instead directly call the main since we already are on the 
 	// appropriate stack.
-	proc_cor_storage.c.state = Active;
+	proc_cor_storage.__cor.state = Active;
       main( &proc_cor_storage );
-      proc_cor_storage.c.state = Halted;
+      proc_cor_storage.__cor.state = Halted;
 
 	// Main routine of the core returned, the core is now fully terminated
@@ -359,5 +359,5 @@
 	this_processor = systemProcessor;
 	this_processor->current_thread = mainThread;
-	this_processor->current_coroutine = &mainThread->c;
+	this_processor->current_coroutine = &mainThread->cor;
 
 	// SKULLDUGGERY: Force a context switch to the system processor to set the main thread's context to the current UNIX
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/kernel_private.h	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -35,5 +35,5 @@
 struct processorCtx_t {
 	processor * proc;
-	coroutine_desc c;
+	coroutine_desc __cor;
 };
 
Index: src/libcfa/concurrency/monitor
===================================================================
--- src/libcfa/concurrency/monitor	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/monitor	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -22,19 +22,8 @@
 #include "stdlib"
 
-struct monitor_desc {
-	spinlock lock;
-	thread_desc * owner;
-	simple_thread_list entry_queue;
-	unsigned int recursion;
-};
-
 static inline void ?{}(monitor_desc * this) {
 	this->owner = 0;
 	this->recursion = 0;
 }
-
-//Basic entering routine
-void enter(monitor_desc *);
-void leave(monitor_desc *);
 
 //Array entering routine
@@ -49,10 +38,4 @@
 static inline int ?<?(monitor_desc* lhs, monitor_desc* rhs) {
 	return ((intptr_t)lhs) < ((intptr_t)rhs);
-}
-
-static inline void ?{}( monitor_guard_t * this, monitor_desc ** m ) {
-	this->m = m;
-	this->count = 1;
-	enter( *this->m );
 }
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/monitor.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -19,54 +19,56 @@
 #include "kernel_private.h"
 
-void enter(monitor_desc * this) {
-	lock( &this->lock );
-	thread_desc * thrd = this_thread();
+extern "C" {
+	void __enter_monitor_desc(monitor_desc * this) {
+		lock( &this->lock );
+		thread_desc * thrd = this_thread();
 
-	if( !this->owner ) {
-		//No one has the monitor, just take it
-		this->owner = thrd;
-		this->recursion = 1;
-	}
-	else if( this->owner == thrd) {
-		//We already have the monitor, just not how many times we took it
-		assert( this->recursion > 0 );
-		this->recursion += 1;
-	}
-	else {
-		//Some one else has the monitor, wait in line for it
-		append( &this->entry_queue, thrd );
-		ScheduleInternal( &this->lock );
+		if( !this->owner ) {
+			//No one has the monitor, just take it
+			this->owner = thrd;
+			this->recursion = 1;
+		}
+		else if( this->owner == thrd) {
+			//We already have the monitor, just not how many times we took it
+			assert( this->recursion > 0 );
+			this->recursion += 1;
+		}
+		else {
+			//Some one else has the monitor, wait in line for it
+			append( &this->entry_queue, thrd );
+			ScheduleInternal( &this->lock );
 
-		//ScheduleInternal will unlock spinlock, no need to unlock ourselves
-		return; 
+			//ScheduleInternal will unlock spinlock, no need to unlock ourselves
+			return; 
+		}
+
+		unlock( &this->lock );
 	}
 
-	unlock( &this->lock );
-}
+	void __leave_monitor_desc(monitor_desc * this) {
+		lock( &this->lock );
 
-void leave(monitor_desc * this) {
-	lock( &this->lock );
+		thread_desc * thrd = this_thread();
+		assert( thrd == this->owner );
 
-	thread_desc * thrd = this_thread();
-	assert( thrd == this->owner );
+		//Leaving a recursion level, decrement the counter
+		this->recursion -= 1;
 
-	//Leaving a recursion level, decrement the counter
-	this->recursion -= 1;
+		//If we left the last level of recursion it means we are changing who owns the monitor
+		thread_desc * new_owner = 0;
+		if( this->recursion == 0) {
+			//Get the next thread in the list
+			new_owner = this->owner = pop_head( &this->entry_queue );
 
-	//If we left the last level of recursion it means we are changing who owns the monitor
-	thread_desc * new_owner = 0;
-	if( this->recursion == 0) {
-		//Get the next thread in the list
-		new_owner = this->owner = pop_head( &this->entry_queue );
+			//We are passing the monitor to someone else, which means recursion level is not 0
+			this->recursion = new_owner ? 1 : 0;
+		}	
 
-		//We are passing the monitor to someone else, which means recursion level is not 0
-		this->recursion = new_owner ? 1 : 0;
-	}	
+		unlock( &this->lock );
 
-	unlock( &this->lock );
-
-	//If we have a new owner, we need to wake-up the thread
-	if( new_owner ) {
-		ScheduleThread( new_owner );
+		//If we have a new owner, we need to wake-up the thread
+		if( new_owner ) {
+			ScheduleThread( new_owner );
+		}
 	}
 }
@@ -74,6 +76,5 @@
 void enter(monitor_desc ** monitors, int count) {
 	for(int i = 0; i < count; i++) {
-		// printf("%d\n", i);
-		enter( monitors[i] );
+		__enter_monitor_desc( monitors[i] );
 	}
 }
@@ -81,6 +82,5 @@
 void leave(monitor_desc ** monitors, int count) {
 	for(int i = count - 1; i >= 0; i--) {
-		// printf("%d\n", i);
-		leave( monitors[i] );
+		__leave_monitor_desc( monitors[i] );
 	}
 }
Index: src/libcfa/concurrency/thread
===================================================================
--- src/libcfa/concurrency/thread	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/thread	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -22,4 +22,5 @@
 
 #include "coroutine"
+#include "monitor"
 
 //-----------------------------------------------------------------------------
@@ -28,17 +29,27 @@
 // Anything that is resumed is a coroutine.
 trait is_thread(dtype T) {
+      void ^?{}(T* mutex this);
       void main(T* this);
       thread_desc* get_thread(T* this);
 };
 
-#define DECL_THREAD(X) thread_desc* get_thread(X* this) { return &this->t; } void main(X* this)
+#define DECL_THREAD(X) thread_desc* get_thread(X* this) { return &this->__thrd; } void main(X* this)
 
 forall( dtype T | is_thread(T) )
 static inline coroutine_desc* get_coroutine(T* this) {
-	return &get_thread(this)->c;
+	return &get_thread(this)->cor;
 }
 
-static inline coroutine_desc* get_coroutine(thread_desc* this) {
-	return &this->c;
+forall( dtype T | is_thread(T) )
+static inline monitor_desc* get_monitor(T * this) {
+	return &get_thread(this)->mon;
+}
+
+static inline coroutine_desc* get_coroutine(thread_desc * this) {
+	return &this->cor;
+}
+
+static inline monitor_desc* get_monitor(thread_desc * this) {
+	return &this->mon;
 }
 
@@ -64,5 +75,5 @@
 void ?{}( scoped(T)* this, P params );
 
-forall( dtype T | sized(T) | is_thread(T) | { void ^?{}(T*); } )
+forall( dtype T | sized(T) | is_thread(T) )
 void ^?{}( scoped(T)* this );
 
Index: src/libcfa/concurrency/thread.c
===================================================================
--- src/libcfa/concurrency/thread.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/concurrency/thread.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -35,19 +35,17 @@
 void start( T* this );
 
-forall( dtype T | is_thread(T) )
-void stop( T* this );
-
 //-----------------------------------------------------------------------------
 // Thread ctors and dtors
 
 void ?{}(thread_desc* this) {
-	(&this->c){};
-	this->c.name = "Anonymous Coroutine";
-	(&this->terminated){};
+	(&this->cor){};
+	this->cor.name = "Anonymous Coroutine";
+	this->mon.owner = this;
+	this->mon.recursion = 1;
 	this->next = NULL;
 }
 
 void ^?{}(thread_desc* this) {
-	^(&this->c){};
+	^(&this->cor){};
 }
 
@@ -64,7 +62,6 @@
 }
 
-forall( dtype T | sized(T) | is_thread(T) | { void ^?{}(T*); } )
+forall( dtype T | sized(T) | is_thread(T) )
 void ^?{}( scoped(T)* this ) {
-	stop(&this->handle);
 	^(&this->handle){};
 }
@@ -86,9 +83,4 @@
 
 	ScheduleThread(thrd_h);
-}
-
-forall( dtype T | is_thread(T) )
-void stop( T* this ) {
-	wait( & get_thread(this)->terminated );	
 }
 
@@ -116,14 +108,4 @@
 }
 
-// C Helper to signal the termination of a thread_desc
-// Used in invoke.c
-extern "C" {
-	void __thread_signal_termination( thread_desc * this ) {
-		this->c.state = Halted;
-		LIB_DEBUG_PRINTF("Thread end : %p\n", this);
-		signal( &this->terminated );	
-	}
-}
-
 // Local Variables: //
 // mode: c //
Index: src/libcfa/iostream.c
===================================================================
--- src/libcfa/iostream.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/libcfa/iostream.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar 21 20:58:48 2017
-// Update Count     : 347
+// Last Modified On : Tue Mar 21 22:05:57 2017
+// Update Count     : 348
 //
 
Index: src/main.cc
===================================================================
--- src/main.cc	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/main.cc	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -241,5 +241,5 @@
 		OPTPRINT( "fixNames" )
 		CodeGen::fixNames( translationUnit );
-		OPTPRINT( "tweakInit" )
+		OPTPRINT( "genInit" )
 		InitTweak::genInit( translationUnit );
 		OPTPRINT( "expandMemberTuples" );
Index: src/tests/avltree/avl.h
===================================================================
--- src/tests/avltree/avl.h	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/avltree/avl.h	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -61,5 +61,5 @@
 void ?{}(tree(K, V) *t, K key, V value);
 
-forall(otype K | Comparable(K), otype V)
+forall(otype K, otype V)
 void ^?{}(tree(K, V) * t);
 
Index: src/tests/avltree/avl1.c
===================================================================
--- src/tests/avltree/avl1.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/avltree/avl1.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -12,5 +12,5 @@
 }
 
-forall(otype K | Comparable(K), otype V)
+forall(otype K, otype V)
 void ^?{}(tree(K, V) * t){
   delete(t->left);
Index: src/tests/avltree/avl_test.c
===================================================================
--- src/tests/avltree/avl_test.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/avltree/avl_test.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -25,5 +25,5 @@
 
   // int -> char *
-  tree(int, char *) * smap = create(-1, "baz");
+  tree(int, const char *) * smap = create(-1, "baz");
   insert(&smap, 12, "bar");
   insert(&smap, 2, "foo");
@@ -35,17 +35,17 @@
   delete(smap);
 
-  // char* -> char*
-  struct c_str { char *str; };  // wraps a C string
-  int ?<?(c_str a, c_str b) {
-    return strcmp(a.str,b.str) < 0;
+  // const char* -> const char*
+  int ?<?(const char * a, const char * b) {
+    return strcmp(a, b) < 0;
   }
-  tree(c_str, char *) * ssmap = create((c_str){"queso"}, "cheese");
-  insert(&ssmap, (c_str){"foo"}, "bar");
-  insert(&ssmap, (c_str){"hello"}, "world");
+
+  tree(const char *, const char *) * ssmap = create("queso", "cheese");
+  insert(&ssmap, "foo", "bar");
+  insert(&ssmap, "hello", "world");
   assert( height(ssmap) == 2 );
 
-  printf("%s %s %s\n", *find(ssmap, (c_str){"hello"}), *find(ssmap, (c_str){"foo"}), *find(ssmap, (c_str){"queso"}));
+  printf("%s %s %s\n", *find(ssmap, "hello"), *find(ssmap, "foo"), *find(ssmap, "queso"));
 
-  remove(&ssmap, (c_str){"foo"});
+  remove(&ssmap, "foo");
   delete(ssmap);
 }
Index: src/tests/completeTypeError.c
===================================================================
--- src/tests/completeTypeError.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/completeTypeError.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -62,5 +62,5 @@
 
 forall(dtype T | sized(T))
-void qux(T * z) {
+void quux(T * z) {
 	// okay
 	bar(z);
Index: src/tests/coroutine.c
===================================================================
--- src/tests/coroutine.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/coroutine.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -2,7 +2,6 @@
 #include <coroutine>
 
-struct Fibonacci {
+coroutine Fibonacci {
       int fn; // used for communication
-      coroutine_desc c;
 };
 
@@ -11,13 +10,5 @@
 }
 
-coroutine_desc* get_coroutine(Fibonacci* this) {
-      return &this->c;
-}
-
 void main(Fibonacci* this) {
-#ifdef MORE_DEBUG
-      sout | "Starting main of coroutine " | this | endl;
-      sout | "Started from " | this->c.last | endl;
-#endif
       int fn1, fn2; 		// retained between resumes
       this->fn = 0;
@@ -45,17 +36,4 @@
 int main() {
       Fibonacci f1, f2;
-#ifdef MORE_DEBUG      
-      Fibonacci *pf1 = &f1, *pf2 = &f2;
-      coroutine_desc *cf1 = &f1.c, *cf2 = &f2.c;
-      covptr_t  *vf1 = vtable(pf1), *vf2 = vtable(pf2);
-      coroutine_desc *cv1 = get_coroutine(vf1), *cv2 = get_coroutine(vf2);
-      Fibonacci *ov1 = (Fibonacci *)get_object(vf1), *ov2 = (Fibonacci *)get_object(vf2);
-
-      sout | "User coroutines : " | pf1 | ' ' | pf2 | endl;
-      sout | "Coroutine data  : " | cf1 | ' ' | cf2 | endl;
-      sout | "Vptr address    : " | vf1 | ' ' | vf2 | endl;
-      sout | "Vptr obj data   : " | ov1 | ' ' | ov2 | endl;
-      sout | "Vptr cor data   : " | cv1 | ' ' | cv2 | endl;
-#endif
       for ( int i = 1; i <= 10; i += 1 ) {
             sout | next(&f1) | ' ' | next(&f2) | endl;
Index: src/tests/dtor-early-exit.c
===================================================================
--- src/tests/dtor-early-exit.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/dtor-early-exit.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -28,6 +28,6 @@
 // don't want these called
 void ?{}(A * a) { assert( false ); }
-void ?{}(A * a, char * name) { a->name = name; sout | "construct " | name | endl; a->x = (int*)malloc(); }
-void ?{}(A * a, char * name, int * ptr) { assert( false ); }
+void ?{}(A * a, const char * name) { a->name = name; sout | "construct " | name | endl; a->x = (int*)malloc(); }
+void ?{}(A * a, const char * name, int * ptr) { assert( false ); }
 
 A ?=?(A * a, A a) {  sout | "assign " | a->name | " " | a.name; return a; }
Index: src/tests/monitor.c
===================================================================
--- src/tests/monitor.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/monitor.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -13,26 +13,31 @@
 }
 
+monitor_desc * get_monitor( global_t * this ) {
+	return &this->m;
+}
+
 static global_t global;
 
-void increment( /*mutex*/ global_t * this ) {
-	monitor_desc * mon = &this->m;
-	monitor_guard_t g1 = { &mon };
-	{
-		monitor_guard_t g2 = { &mon };
-		{
-			monitor_guard_t g3 = { &mon };
-			this->value += 1;
-		}
-	}
+void increment3( global_t * mutex this ) {
+	this->value += 1;
 }
 
-struct MyThread { thread_desc t; };
+void increment2( global_t * mutex this ) {
+	increment3( this );
+}
+
+void increment( global_t * mutex this ) {
+	increment2( this );
+}
+
+struct MyThread { thread_desc __thrd; };
 
 DECL_THREAD(MyThread);
 
 void ?{}( MyThread * this ) {}
+void ^?{}( MyThread * mutex this ) {}
 
 void main( MyThread* this ) {
-	for(int i = 0; i < 1000000; i++) {
+	for(int i = 0; i < 1_000_000; i++) {
 		increment( &global );
 	}
Index: src/tests/multi-monitor.c
===================================================================
--- src/tests/multi-monitor.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/multi-monitor.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -6,14 +6,20 @@
 static int global12, global23, global13;
 
-static monitor_desc m1, m2, m3;
+struct monitor_t {
+	monitor_desc m;
+};
 
-void increment( /*mutex*/ monitor_desc * p1, /*mutex*/ monitor_desc * p2, int * value ) {
-	monitor_desc * mons[] = { p1, p2 };
-	monitor_guard_t g = { mons, 2 };
+monitor_desc * get_monitor( monitor_t * this ) {
+	return &this->m;
+}
+
+static monitor_t m1, m2, m3;
+
+void increment( monitor_t * mutex p1, monitor_t * mutex p2, int * value ) {
 	*value += 1;
 }
 
 struct MyThread { 
-	thread_desc t; 
+	thread_desc __thrd; 
 	int target;
 };
@@ -24,4 +30,6 @@
 	this->target = target;
 }
+
+void ^?{}( MyThread * mutex this ) {}
 
 void main( MyThread* this ) {
Index: src/tests/thread.c
===================================================================
--- src/tests/thread.c	(revision 829c907247aaf3211ea7e8c3d290193a6f2d15d5)
+++ src/tests/thread.c	(revision 87d13cd6f0b6eede5621fb0f5bd5b06911c656fe)
@@ -4,6 +4,6 @@
 #include <thread>
 
-struct First { thread_desc t; signal_once* lock; };
-struct Second { thread_desc t; signal_once* lock; };
+struct First { thread_desc __thrd; signal_once* lock; };
+struct Second { thread_desc __thrd; signal_once* lock; };
 
 DECL_THREAD(First);
@@ -12,4 +12,7 @@
 void ?{}( First * this, signal_once* lock ) { this->lock = lock; }
 void ?{}( Second * this, signal_once* lock ) { this->lock = lock; }
+
+void ^?{}( First  * mutex this ) {}
+void ^?{}( Second * mutex this ) {}
 
 void main(First* this) {
