Index: src/libcfa/concurrency/CtxSwitch-i386.S
===================================================================
--- src/libcfa/concurrency/CtxSwitch-i386.S	(revision 53a6c2a24c63e4878fd4222e11fef110ba3ee85a)
+++ src/libcfa/concurrency/CtxSwitch-i386.S	(revision bf2438c144672ed3463924e84dfb66424384ec03)
@@ -98,13 +98,4 @@
 	ret
 
-.text
-	.align 2
-.globl	CtxGet
-CtxGet:
-	movl %esp,SP_OFFSET(%eax)
-	movl %ebp,FP_OFFSET(%eax)
-
-	ret
-
 // Local Variables: //
 // compile-command: "make install" //
Index: src/libcfa/concurrency/CtxSwitch-x86_64.S
===================================================================
--- src/libcfa/concurrency/CtxSwitch-x86_64.S	(revision 53a6c2a24c63e4878fd4222e11fef110ba3ee85a)
+++ src/libcfa/concurrency/CtxSwitch-x86_64.S	(revision bf2438c144672ed3463924e84dfb66424384ec03)
@@ -1,3 +1,3 @@
-//                               -*- Mode: Asm -*- 
+//                               -*- Mode: Asm -*-
 //
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
@@ -18,13 +18,13 @@
 // Free Software  Foundation; either  version 2.1 of  the License, or  (at your
 // option) any later version.
-// 
+//
 // This library is distributed in the  hope that it will be useful, but WITHOUT
 // ANY  WARRANTY;  without even  the  implied  warranty  of MERCHANTABILITY  or
 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 // for more details.
-// 
+//
 // You should  have received a  copy of the  GNU Lesser General  Public License
 // along  with this library.
-// 
+//
 
 // This context switch routine depends on the fact that the stack of a new
@@ -93,15 +93,6 @@
 .globl	CtxInvokeStub
 CtxInvokeStub:
-	movq %rbx, %rdi 
+	movq %rbx, %rdi
 	jmp *%r12
-
-.text
-	.align 2
-.globl	CtxGet
-CtxGet:
-	movq %rsp,SP_OFFSET(%rdi)
-	movq %rbp,FP_OFFSET(%rdi)
-
-	ret
 
 // Local Variables: //
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 53a6c2a24c63e4878fd4222e11fef110ba3ee85a)
+++ src/libcfa/concurrency/invoke.h	(revision bf2438c144672ed3463924e84dfb66424384ec03)
@@ -99,5 +99,5 @@
 #ifndef _INVOKE_PRIVATE_H_
 #define _INVOKE_PRIVATE_H_
-      
+
       struct machine_context_t {
             void *SP;
@@ -109,5 +109,16 @@
       extern void CtxInvokeStub( void );
       void CtxSwitch( void * from, void * to ) asm ("CtxSwitch");
-      void CtxGet( void * this ) asm ("CtxGet");
+
+      #if   defined( __x86_64__ )
+      #define CtxGet( ctx ) __asm__ ( \
+                  "movq %%rsp,%0\n"   \
+                  "movq %%rbp,%1\n"   \
+            : "=rm" (ctx.SP), "=rm" (ctx.FP) )
+      #elif defined( __i386__ )
+      #define CtxGet( ctx ) __asm__ ( \
+                  "movl %%esp,%0\n"   \
+                  "movl %%ebp,%1\n"   \
+            : "=rm" (ctx.SP), "=rm" (ctx.FP) )
+      #endif
 
 #endif //_INVOKE_PRIVATE_H_
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 53a6c2a24c63e4878fd4222e11fef110ba3ee85a)
+++ src/libcfa/concurrency/kernel.c	(revision bf2438c144672ed3463924e84dfb66424384ec03)
@@ -72,5 +72,5 @@
 // Main thread construction
 struct current_stack_info_t {
-	machine_context_t ctx;	
+	machine_context_t ctx;
 	unsigned int size;		// size of stack
 	void *base;				// base of stack
@@ -82,5 +82,5 @@
 
 void ?{}( current_stack_info_t * this ) {
-	CtxGet( &this->ctx );
+	CtxGet( this->ctx );
 	this->base = this->ctx.FP;
 	this->storage = this->ctx.SP;
@@ -106,5 +106,5 @@
 
 void ?{}( coroutine_desc * this, current_stack_info_t * info) {
-	(&this->stack){ info };	
+	(&this->stack){ info };
 	this->name = "Main Thread";
 	this->errno_ = 0;
@@ -184,5 +184,5 @@
 
 void ^?{}(cluster * this) {
-	
+
 }
 
@@ -203,5 +203,5 @@
 
 		thread_desc * readyThread = NULL;
-		for( unsigned int spin_count = 0; ! this->is_terminated; spin_count++ ) 
+		for( unsigned int spin_count = 0; ! this->is_terminated; spin_count++ )
 		{
 			readyThread = nextThread( this->cltr );
@@ -229,10 +229,10 @@
 }
 
-// runThread runs a thread by context switching 
-// from the processor coroutine to the target thread 
+// runThread runs a thread by context switching
+// from the processor coroutine to the target thread
 void runThread(processor * this, thread_desc * dst) {
 	coroutine_desc * proc_cor = get_coroutine(this->runner);
 	coroutine_desc * thrd_cor = get_coroutine(dst);
-	
+
 	//Reset the terminating actions here
 	this->finish.action_code = No_Action;
@@ -246,5 +246,5 @@
 }
 
-// Once a thread has finished running, some of 
+// Once a thread has finished running, some of
 // its final actions must be executed from the kernel
 void finishRunning(processor * this) {
@@ -256,5 +256,5 @@
 	}
 	else if( this->finish.action_code == Release_Schedule ) {
-		unlock( this->finish.lock );		
+		unlock( this->finish.lock );
 		ScheduleThread( this->finish.thrd );
 	}
@@ -291,5 +291,5 @@
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
 	// which is needed for the 2-step context switch. However, there is no reason
-	// to waste the perfectly valid stack create by pthread. 
+	// to waste the perfectly valid stack create by pthread.
 	current_stack_info_t info;
 	machine_context_t ctx;
@@ -306,7 +306,7 @@
 	LIB_DEBUG_PRINT_SAFE("Kernel : core %p created (%p, %p)\n", proc, proc->runner, &ctx);
 
-	// SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't 
-	// resume it to start it like it normally would, it will just context switch 
-	// back to here. Instead directly call the main since we already are on the 
+	// SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
+	// resume it to start it like it normally would, it will just context switch
+	// back to here. Instead directly call the main since we already are on the
 	// appropriate stack.
 	proc_cor_storage.__cor.state = Active;
@@ -315,5 +315,5 @@
 
 	// Main routine of the core returned, the core is now fully terminated
-	LIB_DEBUG_PRINT_SAFE("Kernel : core %p main ended (%p)\n", proc, proc->runner);	
+	LIB_DEBUG_PRINT_SAFE("Kernel : core %p main ended (%p)\n", proc, proc->runner);
 
 	return NULL;
@@ -322,8 +322,8 @@
 void start(processor * this) {
 	LIB_DEBUG_PRINT_SAFE("Kernel : Starting core %p\n", this);
-	
+
 	pthread_create( &this->kernel_thread, NULL, CtxInvokeProcessor, (void*)this );
 
-	LIB_DEBUG_PRINT_SAFE("Kernel : core %p started\n", this);	
+	LIB_DEBUG_PRINT_SAFE("Kernel : core %p started\n", this);
 }
 
@@ -334,5 +334,5 @@
 
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
-	
+
 	lock( &systemProcessor->proc.cltr->lock );
 	append( &systemProcessor->proc.cltr->ready_queue, thrd );
@@ -392,8 +392,8 @@
 // Kernel boot procedures
 void kernel_startup(void) {
-	LIB_DEBUG_PRINT_SAFE("Kernel : Starting\n");	
+	LIB_DEBUG_PRINT_SAFE("Kernel : Starting\n");
 
 	// Start by initializing the main thread
-	// SKULLDUGGERY: the mainThread steals the process main thread 
+	// SKULLDUGGERY: the mainThread steals the process main thread
 	// which will then be scheduled by the systemProcessor normally
 	mainThread = (thread_desc *)&mainThread_storage;
@@ -417,5 +417,5 @@
 	systemProcessor{ systemCluster, (processorCtx_t *)&systemProcessorCtx_storage };
 
-	// Add the main thread to the ready queue 
+	// Add the main thread to the ready queue
 	// once resume is called on systemProcessor->runner the mainThread needs to be scheduled like any normal thread
 	ScheduleThread(mainThread);
@@ -428,5 +428,5 @@
 	// SKULLDUGGERY: Force a context switch to the system processor to set the main thread's context to the current UNIX
 	// context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
-	// mainThread is on the ready queue when this call is made. 
+	// mainThread is on the ready queue when this call is made.
 	resume( systemProcessor->proc.runner );
 
@@ -457,5 +457,5 @@
 	^(mainThread){};
 
-	LIB_DEBUG_PRINT_SAFE("Kernel : Shutdown complete\n");	
+	LIB_DEBUG_PRINT_SAFE("Kernel : Shutdown complete\n");
 }
 
@@ -473,8 +473,8 @@
 		kernel_abort_called = true;
 		unlock( &kernel_abort_lock );
-	} 
+	}
 	else {
 		unlock( &kernel_abort_lock );
-		
+
 		sigset_t mask;
 		sigemptyset( &mask );
@@ -482,5 +482,5 @@
 		sigaddset( &mask, SIGUSR1 );			// block SIGUSR1 signals
 		sigsuspend( &mask );				// block the processor to prevent further damage during abort
-		_exit( EXIT_FAILURE );				// if processor unblocks before it is killed, terminate it		
+		_exit( EXIT_FAILURE );				// if processor unblocks before it is killed, terminate it
 	}
 
@@ -497,5 +497,5 @@
 		len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", this_coroutine()->name, this_coroutine() );
 		__lib_debug_write( STDERR_FILENO, abort_text, len );
-	} 
+	}
 	else {
 		__lib_debug_write( STDERR_FILENO, ".\n", 2 );
@@ -590,5 +590,5 @@
 		}
 		head->next = NULL;
-	}	
+	}
 	return head;
 }
@@ -609,5 +609,5 @@
 		this->top = top->next;
 		top->next = NULL;
-	}	
+	}
 	return top;
 }
