Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 132fad41d05db7e0bdcbdc1ea3061093bc4cf351)
+++ src/libcfa/concurrency/kernel	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
@@ -9,5 +9,5 @@
 //
 // Author           : Thierry Delisle
-// Created On       : Tue Jan 17 12:27:26 2016
+// Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Thierry Delisle
 // Last Modified On : --
@@ -58,5 +58,4 @@
 void ^?{}(processor * this);
 
-
 //-----------------------------------------------------------------------------
 // Locks
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 132fad41d05db7e0bdcbdc1ea3061093bc4cf351)
+++ src/libcfa/concurrency/kernel.c	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
@@ -9,5 +9,5 @@
 //
 // Author           : Thierry Delisle
-// Created On       : Tue Jan 17 12:27:26 2016
+// Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Thierry Delisle
 // Last Modified On : --
@@ -20,5 +20,5 @@
 
 //Header
-#include "kernel"
+#include "kernel_private.h"
 
 //C Includes
@@ -31,5 +31,4 @@
 //CFA Includes
 #include "libhdr.h"
-#include "threads"
 
 //Private includes
@@ -51,11 +50,4 @@
 //-----------------------------------------------------------------------------
 // Kernel storage
-struct processorCtx_t {
-	processor * proc;
-	coroutine c;
-};
-
-DECL_COROUTINE(processorCtx_t);
-
 #define KERNEL_STORAGE(T,X) static char X##_storage[sizeof(T)]
 
@@ -194,12 +186,7 @@
 }
 
-//-----------------------------------------------------------------------------
-// Processor running routines
-void main(processorCtx_t *);
-thread * nextThread(cluster * this);
-void scheduleInternal(processor * this, thread * dst);
-void spin(processor * this, unsigned int * spin_count);
-void thread_schedule( thread * thrd );
-
+//=============================================================================================
+// Kernel Scheduling logic
+//=============================================================================================
 //Main of the processor contexts
 void main(processorCtx_t * runner) {
@@ -212,12 +199,19 @@
 
 	thread * readyThread = NULL;
-	for( unsigned int spin_count = 0; ! this->terminated; spin_count++ ) {
-		
+	for( unsigned int spin_count = 0; ! this->terminated; spin_count++ ) 
+	{
 		readyThread = nextThread( this->cltr );
 
-		if(readyThread) {
-			scheduleInternal(this, readyThread);
+		if(readyThread) 
+		{
+			runThread(this, readyThread);
+
+			//Some actions need to be taken from the kernel
+			finishRunning(this, readyThread);
+
 			spin_count = 0;
-		} else {
+		} 
+		else 
+		{
 			spin(this, &spin_count);
 		}		
@@ -229,38 +223,26 @@
 }
 
-//Declarations for scheduleInternal
-extern void ThreadCtxSwitch(coroutine * src, coroutine * dst);
-
-// scheduleInternal runs a thread by context switching 
+// runThread runs a thread by context switching 
 // from the processor coroutine to the target thread 
-void scheduleInternal(processor * this, thread * dst) {
+void runThread(processor * this, thread * dst) {
+	coroutine * proc_cor = get_coroutine(this->runner);
+	coroutine * thrd_cor = get_coroutine(dst);
+	
+	//Reset the terminating actions here
 	this->thread_action = NoAction;
 
-	// coroutine * proc_ctx = get_coroutine(this->ctx);
-	// coroutine * thrd_ctx = get_coroutine(dst);
-
-	// //Update global state
-	// this->current_thread = dst;
-
-	// // Context Switch to the thread
-	// ThreadCtxSwitch(proc_ctx, thrd_ctx);
-	// // when ThreadCtxSwitch returns we are back in the processor coroutine
-
-	coroutine * proc_ctx = get_coroutine(this->runner);
-	coroutine * thrd_ctx = get_coroutine(dst);
-      thrd_ctx->last = proc_ctx;
- 
-      // context switch to specified coroutine
-      // Which is now the current_coroutine
-      // LIB_DEBUG_PRINTF("Kernel : switching to ctx %p (from %p, current %p)\n", thrd_ctx, proc_ctx, this->current_coroutine);
-      this->current_thread = dst;
-      this->current_coroutine = thrd_ctx;
-      CtxSwitch( proc_ctx->stack.context, thrd_ctx->stack.context );
-      this->current_coroutine = proc_ctx;
-      // LIB_DEBUG_PRINTF("Kernel : returned from ctx %p (to %p, current %p)\n", thrd_ctx, proc_ctx, this->current_coroutine);
- 
-      // when CtxSwitch returns we are back in the processor coroutine
+	//Update global state
+	this->current_thread = dst;
+
+	// Context Switch to the thread
+	ThreadCtxSwitch(proc_cor, thrd_cor);
+	// when ThreadCtxSwitch returns we are back in the processor coroutine
+}
+
+// Once a thread has finished running, some of 
+// its final actions must be executed from the kernel
+void finishRunning(processor * this, thread * thrd) {
 	if(this->thread_action == Reschedule) {
-		thread_schedule( dst );
+		ScheduleThread( thrd );
 	}
 }
@@ -325,5 +307,5 @@
 //-----------------------------------------------------------------------------
 // Scheduler routines
-void thread_schedule( thread * thrd ) {
+void ScheduleThread( thread * thrd ) {
 	assertf( thrd->next == NULL, "Expected null got %p", thrd->next );
 	
@@ -331,4 +313,9 @@
 	append( &systemProcessor->cltr->ready_queue, thrd );
 	spin_unlock( &lock );
+}
+
+void ScheduleInternal() {
+	get_this_processor()->thread_action = Reschedule;
+	suspend();
 }
 
@@ -363,5 +350,5 @@
 	// Add the main thread to the ready queue 
 	// once resume is called on systemProcessor->ctx the mainThread needs to be scheduled like any normal thread
-	thread_schedule(mainThread);
+	ScheduleThread(mainThread);
 
 	//initialize the global state variables
@@ -426,5 +413,5 @@
 	thread * it;
 	while( it = pop_head( &this->blocked) ) {
-		thread_schedule( it );
+		ScheduleThread( it );
 	}
 }
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
+++ src/libcfa/concurrency/kernel_private.h	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
@@ -0,0 +1,57 @@
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// threads --
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Feb 13 12:27:26 2017
+// Last Modified By : Thierry Delisle
+// Last Modified On : --
+// Update Count     : 0
+//
+
+#ifndef KERNEL_PRIVATE_H
+#define KERNEL_PRIVATE_H
+
+#include "kernel"
+#include "threads"
+
+//-----------------------------------------------------------------------------
+// Scheduler
+void ScheduleThread( thread * );
+void ScheduleInternal();
+thread * nextThread(cluster * this);
+
+//-----------------------------------------------------------------------------
+// Processor
+struct processorCtx_t {
+	processor * proc;
+	coroutine c;
+};
+
+DECL_COROUTINE(processorCtx_t);
+
+void main(processorCtx_t *);
+void runThread(processor * this, thread * dst);
+void finishRunning(processor * this, thread * thrd);
+void spin(processor * this, unsigned int * spin_count);
+
+//-----------------------------------------------------------------------------
+// Threads
+extern "C" {
+      forall(dtype T | is_thread(T))
+      void CtxInvokeThread(T * this);
+}
+
+extern void ThreadCtxSwitch(coroutine * src, coroutine * dst);
+
+#endif //KERNEL_PRIVATE_H
+
+// Local Variables: //
+// mode: c //
+// tab-width: 4 //
+// End: //
Index: src/libcfa/concurrency/threads.c
===================================================================
--- src/libcfa/concurrency/threads.c	(revision 132fad41d05db7e0bdcbdc1ea3061093bc4cf351)
+++ src/libcfa/concurrency/threads.c	(revision 75f3522eb95aed7401634be95556b2b6f1ae5d97)
@@ -17,5 +17,5 @@
 #include "threads"
 
-#include "kernel"
+#include "kernel_private.h"
 #include "libhdr.h"
 
@@ -72,11 +72,4 @@
 //-----------------------------------------------------------------------------
 // Starting and stopping threads
-extern "C" {
-      forall(dtype T | is_thread(T))
-      void CtxInvokeThread(T * this);
-}
-
-extern void thread_schedule( thread * );
-
 forall( dtype T | is_thread(T) )
 void start( T* this ) {
@@ -92,10 +85,5 @@
 	CtxSwitch( thrd_c->last->stack.context, thrd_c->stack.context );
 
-	fenv_t envp;
-	fegetenv( &envp );
-	LIB_DEBUG_PRINTF("Thread : mxcsr %x\n", envp.__mxcsr);
-	LIB_DEBUG_PRINTF("Thread started : %p (t %p, c %p)\n", this, thrd_c, thrd_h);
-
-	thread_schedule(thrd_h);
+	ScheduleThread(thrd_h);
 }
 
@@ -109,22 +97,23 @@
 
 void yield( void ) {
-	get_this_processor()->thread_action = Reschedule;
-	suspend();
+	ScheduleInternal();
 }
 
 void ThreadCtxSwitch(coroutine* src, coroutine* dst) {
+	// set state of current coroutine to inactive
+	src->state = Inactive;
+	dst->state = Active;
+
+	//update the last resumer
 	dst->last = src;
 
-	// set state of current coroutine to inactive
-	src->state = Inactive;
-
-	// set new coroutine that task is executing
+	// set new coroutine that the processor is executing
+	// and context switch to it
 	get_this_processor()->current_coroutine = dst;	
-
-	// context switch to specified coroutine
 	CtxSwitch( src->stack.context, dst->stack.context );
-	// when CtxSwitch returns we are back in the src coroutine
+	get_this_processor()->current_coroutine = src;	
 
 	// set state of new coroutine to active
+	dst->state = Inactive;
 	src->state = Active;
 }
