Index: src/libcfa/concurrency/coroutines.c
===================================================================
--- src/libcfa/concurrency/coroutines.c	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/coroutines.c	(revision db6f06a9b651d88b1993663fa278245d8bb77626)
@@ -61,10 +61,5 @@
 
 void ?{}(coroutine* this) {
-	this->name = "Anonymous Coroutine";
-	this->errno_ = 0;
-	this->state = Start;
-      this->notHalted = true;
-	this->starter = NULL;
-	this->last = NULL;
+	this{ "Anonymous Coroutine" };
 }
 
@@ -73,5 +68,5 @@
 	this->errno_ = 0;
 	this->state = Start;
-      this->notHalted = true;
+	this->notHalted = true;
 	this->starter = NULL;
 	this->last = NULL;
@@ -169,6 +164,4 @@
 	this->context = this->base;
 	this->top = (char *)this->context + cxtSize;
-
-	LIB_DEBUG_PRINTF("Coroutine : created stack %p\n", this->base);
 }
 
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/invoke.h	(revision db6f06a9b651d88b1993663fa278245d8bb77626)
@@ -30,7 +30,21 @@
       #define SCHEDULER_CAPACITY 10
 
+      struct spinlock {
+            volatile int lock;
+      };
+
       struct simple_thread_list {
             struct thread * head;
             struct thread ** tail;
+      };
+
+      // struct simple_lock {
+      // 	struct simple_thread_list blocked;
+      // };
+
+      struct signal_once {
+            volatile bool condition;
+            struct spinlock lock;
+            struct simple_thread_list blocked;
       };
 
@@ -40,4 +54,13 @@
             void append( struct simple_thread_list *, struct thread * );
             struct thread * pop_head( struct simple_thread_list * );
+
+            // void ?{}(simple_lock * this);
+            // void ^?{}(simple_lock * this);
+
+            void ?{}(spinlock * this);
+            void ^?{}(spinlock * this);
+
+            void ?{}(signal_once * this);
+            void ^?{}(signal_once * this);
       }
       #endif
@@ -60,17 +83,12 @@
             int errno_;				// copy of global UNIX variable errno
             enum coroutine_state state;	// current execution status for coroutine
-            bool notHalted;			// indicate if execuation state is not halted
-
+            bool notHalted;		      // indicate if execuation state is not halted
             struct coroutine *starter;	// first coroutine to resume this one
             struct coroutine *last;		// last coroutine to resume this one
       };
 
-      struct simple_lock {
-      	struct simple_thread_list blocked;
-      };
-
       struct thread {
             struct coroutine c;
-            struct simple_lock lock;
+            signal_once terminated;		// indicate if execuation state is not halted
             struct thread * next;
       };
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/kernel	(revision db6f06a9b651d88b1993663fa278245d8bb77626)
@@ -27,8 +27,20 @@
 
 //-----------------------------------------------------------------------------
+// Locks
+// void lock( simple_lock * );
+// void lock( simple_lock *, spinlock * );
+// void unlock( simple_lock * );
+
+void lock( spinlock * );
+void unlock( spinlock * );
+
+void wait( signal_once * );
+void signal( signal_once * );
+
+//-----------------------------------------------------------------------------
 // Cluster
 struct cluster {
 	simple_thread_list ready_queue;
-	// pthread_spinlock_t lock;
+	spinlock lock;
 };
 
@@ -38,8 +50,16 @@
 //-----------------------------------------------------------------------------
 // Processor
-enum ProcessorAction { 
-	Reschedule,
-	NoAction
+enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule };
+struct FinishAction {
+	FinishOpCode action_code;
+	thread * thrd;
+	spinlock * lock;
 };
+static inline void ?{}(FinishAction * this) { 
+	this->action_code = No_Action;
+	this->thrd = NULL;
+	this->lock = NULL;
+}
+static inline void ^?{}(FinishAction * this) {}
 
 struct processor {
@@ -49,7 +69,9 @@
 	thread * current_thread;
 	pthread_t kernel_thread;
-	simple_lock lock;
-	volatile bool terminated;
-	ProcessorAction thread_action;
+	
+	signal_once terminated;
+	volatile bool is_terminated;
+
+	struct FinishAction finish;
 };
 
@@ -57,13 +79,4 @@
 void ?{}(processor * this, cluster * cltr);
 void ^?{}(processor * this);
-
-//-----------------------------------------------------------------------------
-// Locks
-
-void ?{}(simple_lock * this);
-void ^?{}(simple_lock * this);
-
-void lock( simple_lock * );
-void unlock( simple_lock * );
 
 #endif //KERNEL_H
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/kernel.c	(revision db6f06a9b651d88b1993663fa278245d8bb77626)
@@ -141,6 +141,4 @@
 }
 
-void start(processor * this);
-
 void ?{}(processor * this) {
 	this{ systemCluster };
@@ -151,6 +149,6 @@
 	this->current_coroutine = NULL;
 	this->current_thread = NULL;
-	(&this->lock){};
-	this->terminated = false;
+	(&this->terminated){};
+	this->is_terminated = false;
 
 	start( this );
@@ -161,6 +159,6 @@
 	this->current_coroutine = NULL;
 	this->current_thread = NULL;
-	(&this->lock){};
-	this->terminated = false;
+	(&this->terminated){};
+	this->is_terminated = false;
 
 	this->runner = runner;
@@ -170,8 +168,8 @@
 
 void ^?{}(processor * this) {
-	if( ! this->terminated ) {
+	if( ! this->is_terminated ) {
 		LIB_DEBUG_PRINTF("Kernel : core %p signaling termination\n", this);
-		this->terminated = true;
-		lock( &this->lock );
+		this->is_terminated = true;
+		wait( &this->terminated );
 	}
 }
@@ -194,10 +192,6 @@
 	LIB_DEBUG_PRINTF("Kernel : core %p starting\n", this);
 
-	fenv_t envp;
-	fegetenv( &envp );
-	LIB_DEBUG_PRINTF("Kernel : mxcsr %x\n", envp.__mxcsr);
-
 	thread * readyThread = NULL;
-	for( unsigned int spin_count = 0; ! this->terminated; spin_count++ ) 
+	for( unsigned int spin_count = 0; ! this->is_terminated; spin_count++ ) 
 	{
 		readyThread = nextThread( this->cltr );
@@ -208,5 +202,5 @@
 
 			//Some actions need to be taken from the kernel
-			finishRunning(this, readyThread);
+			finishRunning(this);
 
 			spin_count = 0;
@@ -219,5 +213,5 @@
 
 	LIB_DEBUG_PRINTF("Kernel : core %p unlocking thread\n", this);
-	unlock( &this->lock );
+	signal( &this->terminated );
 	LIB_DEBUG_PRINTF("Kernel : core %p terminated\n", this);
 }
@@ -230,5 +224,5 @@
 	
 	//Reset the terminating actions here
-	this->thread_action = NoAction;
+	this->finish.action_code = No_Action;
 
 	//Update global state
@@ -242,7 +236,17 @@
 // Once a thread has finished running, some of 
 // its final actions must be executed from the kernel
-void finishRunning(processor * this, thread * thrd) {
-	if(this->thread_action == Reschedule) {
-		ScheduleThread( thrd );
+void finishRunning(processor * this) {
+	if( this->finish.action_code == Release ) {
+		unlock( this->finish.lock );
+	}
+	else if( this->finish.action_code == Schedule ) {
+		ScheduleThread( this->finish.thrd );
+	}
+	else if( this->finish.action_code == Release_Schedule ) {
+		unlock( this->finish.lock );		
+		ScheduleThread( this->finish.thrd );
+	}
+	else {
+		assert(this->finish.action_code == No_Action);
 	}
 }
@@ -310,19 +314,37 @@
 	assertf( thrd->next == NULL, "Expected null got %p", thrd->next );
 	
-	spin_lock( &lock );
+	lock( &systemProcessor->cltr->lock );
 	append( &systemProcessor->cltr->ready_queue, thrd );
-	spin_unlock( &lock );
+	unlock( &systemProcessor->cltr->lock );
+}
+
+thread * nextThread(cluster * this) {
+	lock( &this->lock );
+	thread * head = pop_head( &this->ready_queue );
+	unlock( &this->lock );
+	return head;
 }
 
 void ScheduleInternal() {
-	get_this_processor()->thread_action = Reschedule;
 	suspend();
 }
 
-thread * nextThread(cluster * this) {
-	spin_lock( &lock );
-	thread * head = pop_head( &this->ready_queue );
-	spin_unlock( &lock );
-	return head;
+void ScheduleInternal( spinlock * lock ) {
+	get_this_processor()->finish.action_code = Release;
+	get_this_processor()->finish.lock = lock;
+	suspend();
+}
+
+void ScheduleInternal( thread * thrd ) {
+	get_this_processor()->finish.action_code = Schedule;
+	get_this_processor()->finish.thrd = thrd;
+	suspend();
+}
+
+void ScheduleInternal( spinlock * lock, thread * thrd ) {
+	get_this_processor()->finish.action_code = Release_Schedule;
+	get_this_processor()->finish.lock = lock;
+	get_this_processor()->finish.thrd = thrd;
+	suspend();
 }
 
@@ -374,5 +396,5 @@
 	// When its coroutine terminates, it return control to the mainThread
 	// which is currently here
-	systemProcessor->terminated = true;
+	systemProcessor->is_terminated = true;
 	suspend();
 
@@ -393,26 +415,83 @@
 //-----------------------------------------------------------------------------
 // Locks
-void ?{}( simple_lock * this ) {
-	( &this->blocked ){};
-}
-
-void ^?{}( simple_lock * this ) {
-
-}
-
-void lock( simple_lock * this ) {
+// void ?{}( simple_lock * this ) {
+// 	( &this->blocked ){};
+// }
+
+// void ^?{}( simple_lock * this ) {
+
+// }
+
+// void lock( simple_lock * this ) {
+// 	{
+// 		spin_lock( &lock );
+// 		append( &this->blocked, this_thread() );
+// 		spin_unlock( &lock );
+// 	}
+// 	ScheduleInternal();
+// }
+
+// void lock( simple_lock * this, spinlock * to_release ) {
+// 	{
+// 		spin_lock( &lock );
+// 		append( &this->blocked, this_thread() );
+// 		spin_unlock( &lock );
+// 	}
+// 	ScheduleInternal( to_release );
+// 	lock( to_release );
+// }
+
+// void unlock( simple_lock * this ) {
+// 	thread * it;
+// 	while( it = pop_head( &this->blocked) ) {
+// 		ScheduleThread( it );
+// 	}
+// }
+
+void ?{}( spinlock * this ) {
+	this->lock = 0;
+}
+void ^?{}( spinlock * this ) {
+
+}
+
+void lock( spinlock * this ) {
+	for ( unsigned int i = 1;; i += 1 ) {
+	  	if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) break;
+	}
+}
+
+void unlock( spinlock * this ) {
+	__sync_lock_release_4( &this->lock );
+}
+
+void ?{}( signal_once * this ) {
+	this->condition = false;
+}
+void ^?{}( signal_once * this ) {
+
+}
+
+void wait( signal_once * this ) {
+	lock( &this->lock );
+	if( !this->condition ) {
+		append( &this->blocked, this_thread() );
+		ScheduleInternal( &this->lock );
+		lock( &this->lock );
+	}
+	unlock( &this->lock );
+}
+
+void signal( signal_once * this ) {
+	lock( &this->lock );
 	{
-		spin_lock( &lock );
-		append( &this->blocked, this_thread() );
-		spin_unlock( &lock );
-	}
-	suspend();
-}
-
-void unlock( simple_lock * this ) {
-	thread * it;
-	while( it = pop_head( &this->blocked) ) {
-		ScheduleThread( it );
-	}
+		this->condition = true;
+
+		thread * it;
+		while( it = pop_head( &this->blocked) ) {
+			ScheduleThread( it );
+		}
+	}
+	unlock( &this->lock );
 }
 
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/kernel_private.h	(revision db6f06a9b651d88b1993663fa278245d8bb77626)
@@ -24,6 +24,10 @@
 // Scheduler
 void ScheduleThread( thread * );
+thread * nextThread(cluster * this);
+
 void ScheduleInternal();
-thread * nextThread(cluster * this);
+void ScheduleInternal(spinlock * lock);
+void ScheduleInternal(thread * thrd);
+void ScheduleInternal(spinlock * lock, thread * thrd);
 
 //-----------------------------------------------------------------------------
@@ -37,6 +41,7 @@
 
 void main(processorCtx_t *);
+void start(processor * this);
 void runThread(processor * this, thread * dst);
-void finishRunning(processor * this, thread * thrd);
+void finishRunning(processor * this);
 void spin(processor * this, unsigned int * spin_count);
 
Index: src/libcfa/concurrency/threads.c
===================================================================
--- src/libcfa/concurrency/threads.c	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/threads.c	(revision db6f06a9b651d88b1993663fa278245d8bb77626)
@@ -44,5 +44,5 @@
 	(&this->c){};
 	this->c.name = "Anonymous Coroutine";
-	(&this->lock){};
+	(&this->terminated){};
 	this->next = NULL;
 }
@@ -90,12 +90,9 @@
 forall( dtype T | is_thread(T) )
 void stop( T* this ) {
-	thread*  thrd = get_thread(this);
-	if( thrd->c.notHalted ) {
-		lock( &thrd->lock );
-	}
+	wait( & get_thread(this)->terminated );	
 }
 
 void yield( void ) {
-	ScheduleInternal();
+	ScheduleInternal( get_this_processor()->current_thread );
 }
 
@@ -124,6 +121,6 @@
 	void __thread_signal_termination( thread * this ) {
 		this->c.state = Halt;
-		this->c.notHalted = false;
-		unlock( &this->lock );
+		LIB_DEBUG_PRINTF("Thread end : %p\n", this);
+		signal( &this->terminated );	
 	}
 }
