Index: src/libcfa/concurrency/alarm.c
===================================================================
--- src/libcfa/concurrency/alarm.c	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/alarm.c	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -186,5 +186,5 @@
 
 	disable_interrupts();
-	lock( &event_kernel->lock DEBUG_CTX2 );
+	lock( event_kernel->lock DEBUG_CTX2 );
 	{
 		verify( validate( alarms ) );
@@ -196,5 +196,5 @@
 		}
 	}
-	unlock( &event_kernel->lock );
+	unlock( event_kernel->lock );
 	this->set = true;
 	enable_interrupts( DEBUG_CTX );
@@ -203,10 +203,10 @@
 void unregister_self( alarm_node_t * this ) {
 	disable_interrupts();
-	lock( &event_kernel->lock DEBUG_CTX2 );
+	lock( event_kernel->lock DEBUG_CTX2 );
 	{
 		verify( validate( &event_kernel->alarms ) );
 		remove( &event_kernel->alarms, this );
 	}
-	unlock( &event_kernel->lock );
+	unlock( event_kernel->lock );
 	enable_interrupts( DEBUG_CTX );
 	this->set = false;
Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/invoke.h	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -14,6 +14,6 @@
 //
 
-#include <stdbool.h>
-#include <stdint.h>
+#include "bits/defs.h"
+#include "bits/locks.h"
 
 #ifdef __CFORALL__
@@ -25,17 +25,6 @@
 #define _INVOKE_H_
 
-	#define unlikely(x)    __builtin_expect(!!(x), 0)
-	#define thread_local _Thread_local
-
 	typedef void (*fptr_t)();
 	typedef int_fast16_t __lock_size_t;
-
-	struct spinlock {
-		volatile int lock;
-		#ifdef __CFA_DEBUG__
-			const char * prev_name;
-			void* prev_thrd;
-		#endif
-	};
 
 	struct __thread_queue_t {
@@ -58,7 +47,4 @@
 		void push( struct __condition_stack_t &, struct __condition_criterion_t * );
 		struct __condition_criterion_t * pop( struct __condition_stack_t & );
-
-		void  ?{}(spinlock & this);
-		void ^?{}(spinlock & this);
 	}
 	#endif
@@ -122,5 +108,5 @@
 	struct monitor_desc {
 		// spinlock to protect internal data
-		struct spinlock lock;
+		struct __spinlock_t lock;
 
 		// current owner of the monitor
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/kernel	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -26,18 +26,18 @@
 //-----------------------------------------------------------------------------
 // Locks
-// Lock the spinlock, spin if already acquired
-void lock      ( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, spin if already acquired
+// void lock      ( spinlock * DEBUG_CTX_PARAM2 );
 
-// Lock the spinlock, yield repeatedly if already acquired
-void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, yield repeatedly if already acquired
+// void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
 
-// Lock the spinlock, return false if already acquired
-bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, return false if already acquired
+// bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
 
-// Unlock the spinlock
-void unlock    ( spinlock * );
+// // Unlock the spinlock
+// void unlock    ( spinlock * );
 
 struct semaphore {
-	spinlock lock;
+	__spinlock_t lock;
 	int count;
 	__thread_queue_t waiting;
@@ -54,5 +54,5 @@
 struct cluster {
 	// Ready queue locks
-	spinlock ready_queue_lock;
+	__spinlock_t ready_queue_lock;
 
 	// Ready queue for threads
@@ -74,6 +74,6 @@
 	FinishOpCode action_code;
 	thread_desc * thrd;
-	spinlock * lock;
-	spinlock ** locks;
+	__spinlock_t * lock;
+	__spinlock_t ** locks;
 	unsigned short lock_count;
 	thread_desc ** thrds;
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/kernel.c	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -242,5 +242,5 @@
 void finishRunning(processor * this) {
 	if( this->finish.action_code == Release ) {
-		unlock( this->finish.lock );
+		unlock( *this->finish.lock );
 	}
 	else if( this->finish.action_code == Schedule ) {
@@ -248,15 +248,15 @@
 	}
 	else if( this->finish.action_code == Release_Schedule ) {
-		unlock( this->finish.lock );
+		unlock( *this->finish.lock );
 		ScheduleThread( this->finish.thrd );
 	}
 	else if( this->finish.action_code == Release_Multi ) {
 		for(int i = 0; i < this->finish.lock_count; i++) {
-			unlock( this->finish.locks[i] );
+			unlock( *this->finish.locks[i] );
 		}
 	}
 	else if( this->finish.action_code == Release_Multi_Schedule ) {
 		for(int i = 0; i < this->finish.lock_count; i++) {
-			unlock( this->finish.locks[i] );
+			unlock( *this->finish.locks[i] );
 		}
 		for(int i = 0; i < this->finish.thrd_count; i++) {
@@ -334,7 +334,7 @@
 	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
 
-	lock(   &this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
+	lock(   this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
 	append( this_processor->cltr->ready_queue, thrd );
-	unlock( &this_processor->cltr->ready_queue_lock );
+	unlock( this_processor->cltr->ready_queue_lock );
 
 	verify( disable_preempt_count > 0 );
@@ -343,7 +343,7 @@
 thread_desc * nextThread(cluster * this) {
 	verify( disable_preempt_count > 0 );
-	lock( &this->ready_queue_lock DEBUG_CTX2 );
+	lock( this->ready_queue_lock DEBUG_CTX2 );
 	thread_desc * head = pop_head( this->ready_queue );
-	unlock( &this->ready_queue_lock );
+	unlock( this->ready_queue_lock );
 	verify( disable_preempt_count > 0 );
 	return head;
@@ -358,5 +358,5 @@
 }
 
-void BlockInternal( spinlock * lock ) {
+void BlockInternal( __spinlock_t * lock ) {
 	disable_interrupts();
 	this_processor->finish.action_code = Release;
@@ -384,5 +384,5 @@
 }
 
-void BlockInternal( spinlock * lock, thread_desc * thrd ) {
+void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
 	assert(thrd);
 	disable_interrupts();
@@ -398,5 +398,5 @@
 }
 
-void BlockInternal(spinlock * locks [], unsigned short count) {
+void BlockInternal(__spinlock_t * locks [], unsigned short count) {
 	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi;
@@ -411,5 +411,5 @@
 }
 
-void BlockInternal(spinlock * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
+void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
 	disable_interrupts();
 	this_processor->finish.action_code = Release_Multi_Schedule;
@@ -426,5 +426,5 @@
 }
 
-void LeaveThread(spinlock * lock, thread_desc * thrd) {
+void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
 	verify( disable_preempt_count > 0 );
 	this_processor->finish.action_code = thrd ? Release_Schedule : Release;
@@ -516,6 +516,6 @@
 }
 
-static spinlock kernel_abort_lock;
-static spinlock kernel_debug_lock;
+static __spinlock_t kernel_abort_lock;
+static __spinlock_t kernel_debug_lock;
 static bool kernel_abort_called = false;
 
@@ -523,13 +523,13 @@
 	// abort cannot be recursively entered by the same or different processors because all signal handlers return when
 	// the globalAbort flag is true.
-	lock( &kernel_abort_lock DEBUG_CTX2 );
+	lock( kernel_abort_lock DEBUG_CTX2 );
 
 	// first task to abort ?
 	if ( !kernel_abort_called ) {			// not first task to abort ?
 		kernel_abort_called = true;
-		unlock( &kernel_abort_lock );
+		unlock( kernel_abort_lock );
 	}
 	else {
-		unlock( &kernel_abort_lock );
+		unlock( kernel_abort_lock );
 
 		sigset_t mask;
@@ -561,9 +561,9 @@
 extern "C" {
 	void __lib_debug_acquire() {
-		lock( &kernel_debug_lock DEBUG_CTX2 );
+		lock( kernel_debug_lock DEBUG_CTX2 );
 	}
 
 	void __lib_debug_release() {
-		unlock( &kernel_debug_lock );
+		unlock( kernel_debug_lock );
 	}
 }
@@ -574,41 +574,4 @@
 //-----------------------------------------------------------------------------
 // Locks
-void ?{}( spinlock & this ) {
-	this.lock = 0;
-}
-void ^?{}( spinlock & this ) {
-
-}
-
-bool try_lock( spinlock * this DEBUG_CTX_PARAM2 ) {
-	return this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0;
-}
-
-void lock( spinlock * this DEBUG_CTX_PARAM2 ) {
-	for ( unsigned int i = 1;; i += 1 ) {
-		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
-	}
-	LIB_DEBUG_DO(
-		this->prev_name = caller;
-		this->prev_thrd = this_thread;
-	)
-}
-
-void lock_yield( spinlock * this DEBUG_CTX_PARAM2 ) {
-	for ( unsigned int i = 1;; i += 1 ) {
-		if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
-		yield();
-	}
-	LIB_DEBUG_DO(
-		this->prev_name = caller;
-		this->prev_thrd = this_thread;
-	)
-}
-
-
-void unlock( spinlock * this ) {
-	__sync_lock_release_4( &this->lock );
-}
-
 void  ?{}( semaphore & this, int count = 1 ) {
 	(this.lock){};
@@ -619,5 +582,5 @@
 
 void P(semaphore & this) {
-	lock( &this.lock DEBUG_CTX2 );
+	lock( this.lock DEBUG_CTX2 );
 	this.count -= 1;
 	if ( this.count < 0 ) {
@@ -629,5 +592,5 @@
 	}
 	else {
-	    unlock( &this.lock );
+	    unlock( this.lock );
 	}
 }
@@ -635,5 +598,5 @@
 void V(semaphore & this) {
 	thread_desc * thrd = NULL;
-	lock( &this.lock DEBUG_CTX2 );
+	lock( this.lock DEBUG_CTX2 );
 	this.count += 1;
 	if ( this.count <= 0 ) {
@@ -642,5 +605,5 @@
 	}
 
-	unlock( &this.lock );
+	unlock( this.lock );
 
 	// make new owner
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/kernel_private.h	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -45,10 +45,10 @@
 //Block current thread and release/wake-up the following resources
 void BlockInternal(void);
-void BlockInternal(spinlock * lock);
+void BlockInternal(__spinlock_t * lock);
 void BlockInternal(thread_desc * thrd);
-void BlockInternal(spinlock * lock, thread_desc * thrd);
-void BlockInternal(spinlock * locks [], unsigned short count);
-void BlockInternal(spinlock * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
-void LeaveThread(spinlock * lock, thread_desc * thrd);
+void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
+void BlockInternal(__spinlock_t * locks [], unsigned short count);
+void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
+void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
 
 //-----------------------------------------------------------------------------
@@ -66,5 +66,5 @@
 struct event_kernel_t {
 	alarm_list_t alarms;
-	spinlock lock;
+	__spinlock_t lock;
 };
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/monitor.c	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -34,11 +34,11 @@
 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
 
-static inline void lock_all  ( spinlock * locks [], __lock_size_t count );
-static inline void lock_all  ( monitor_desc * source [], spinlock * /*out*/ locks [], __lock_size_t count );
-static inline void unlock_all( spinlock * locks [], __lock_size_t count );
+static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
+static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
+static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
 
-static inline void save   ( monitor_desc * ctx [], __lock_size_t count, spinlock * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
-static inline void restore( monitor_desc * ctx [], __lock_size_t count, spinlock * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
+static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
+static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
 
 static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
@@ -71,5 +71,5 @@
 	unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
 	__waitfor_mask_t masks [ count ];                         /* Save the current waitfor masks to restore them later                                */ \
-	spinlock *   locks     [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
+	__spinlock_t *   locks [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
 
 #define monitor_save    save   ( monitors, count, locks, recursions, masks )
@@ -85,5 +85,5 @@
 	static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
 		// Lock the monitor spinlock, lock_yield to reduce contention
-		lock_yield( &this->lock DEBUG_CTX2 );
+		lock_yield( this->lock DEBUG_CTX2 );
 		thread_desc * thrd = this_thread;
 
@@ -127,5 +127,5 @@
 
 		// Release the lock and leave
-		unlock( &this->lock );
+		unlock( this->lock );
 		return;
 	}
@@ -133,5 +133,5 @@
 	static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
 		// Lock the monitor spinlock, lock_yield to reduce contention
-		lock_yield( &this->lock DEBUG_CTX2 );
+		lock_yield( this->lock DEBUG_CTX2 );
 		thread_desc * thrd = this_thread;
 
@@ -145,5 +145,5 @@
 			set_owner( this, thrd );
 
-			unlock( &this->lock );
+			unlock( this->lock );
 			return;
 		}
@@ -197,5 +197,5 @@
 	void __leave_monitor_desc( monitor_desc * this ) {
 		// Lock the monitor spinlock, lock_yield to reduce contention
-		lock_yield( &this->lock DEBUG_CTX2 );
+		lock_yield( this->lock DEBUG_CTX2 );
 
 		LIB_DEBUG_PRINT_SAFE("Kernel : %10p Leaving mon %p (%p)\n", this_thread, this, this->owner);
@@ -210,5 +210,5 @@
 		if( this->recursion != 0) {
 			LIB_DEBUG_PRINT_SAFE("Kernel :  recursion still %d\n", this->recursion);
-			unlock( &this->lock );
+			unlock( this->lock );
 			return;
 		}
@@ -218,5 +218,5 @@
 
 		// We can now let other threads in safely
-		unlock( &this->lock );
+		unlock( this->lock );
 
 		//We need to wake-up the thread
@@ -243,5 +243,5 @@
 
 		// Lock the monitor now
-		lock_yield( &this->lock DEBUG_CTX2 );
+		lock_yield( this->lock DEBUG_CTX2 );
 
 		disable_interrupts();
@@ -730,21 +730,21 @@
 }
 
-static inline void lock_all( spinlock * locks [], __lock_size_t count ) {
+static inline void lock_all( __spinlock_t * locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		lock_yield( locks[i] DEBUG_CTX2 );
-	}
-}
-
-static inline void lock_all( monitor_desc * source [], spinlock * /*out*/ locks [], __lock_size_t count ) {
+		lock_yield( *locks[i] DEBUG_CTX2 );
+	}
+}
+
+static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		spinlock * l = &source[i]->lock;
-		lock_yield( l DEBUG_CTX2 );
+		__spinlock_t * l = &source[i]->lock;
+		lock_yield( *l DEBUG_CTX2 );
 		if(locks) locks[i] = l;
 	}
 }
 
-static inline void unlock_all( spinlock * locks [], __lock_size_t count ) {
+static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		unlock( locks[i] );
+		unlock( *locks[i] );
 	}
 }
@@ -752,5 +752,5 @@
 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
 	for( __lock_size_t i = 0; i < count; i++ ) {
-		unlock( &locks[i]->lock );
+		unlock( locks[i]->lock );
 	}
 }
@@ -759,5 +759,5 @@
 	monitor_desc * ctx [],
 	__lock_size_t count,
-	__attribute((unused)) spinlock * locks [],
+	__attribute((unused)) __spinlock_t * locks [],
 	unsigned int /*out*/ recursions [],
 	__waitfor_mask_t /*out*/ masks []
@@ -772,5 +772,5 @@
 	monitor_desc * ctx [],
 	__lock_size_t count,
-	spinlock * locks [],
+	__spinlock_t * locks [],
 	unsigned int /*out*/ recursions [],
 	__waitfor_mask_t /*out*/ masks []
Index: src/libcfa/concurrency/preemption.c
===================================================================
--- src/libcfa/concurrency/preemption.c	(revision 2f6a7e9384831f69b216448a851da2b01007a715)
+++ src/libcfa/concurrency/preemption.c	(revision ea7d2b051267e571f113e8dabae0d886eda94432)
@@ -355,7 +355,7 @@
 		case SI_KERNEL:
 			// LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread tick\n");
-			lock( &event_kernel->lock DEBUG_CTX2 );
+			lock( event_kernel->lock DEBUG_CTX2 );
 			tick_preemption();
-			unlock( &event_kernel->lock );
+			unlock( event_kernel->lock );
 			break;
 		// Signal was not sent by the kernel but by an other thread
