Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 1f81d61c0a8d53662c51e645fbbb076881fd849c)
+++ src/libcfa/concurrency/kernel	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -43,15 +43,45 @@
 extern struct cluster * mainCluster;
 
-enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule };
+enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule, Callback };
+
+typedef void (*__finish_callback_fptr_t)(void);
 
 //TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
 struct FinishAction {
 	FinishOpCode action_code;
+	/*
+	// Union of possible actions
+	union {
+		// Option 1 : locks and threads
+		struct {
+			// 1 thread or N thread
+			union {
+				thread_desc * thrd;
+				struct {
+					thread_desc ** thrds;
+					unsigned short thrd_count;
+				};
+			};
+			// 1 lock or N lock
+			union {
+				__spinlock_t * lock;
+				struct {
+					__spinlock_t ** locks;
+					unsigned short lock_count;
+				};
+			};
+		};
+		// Option 2 : action pointer
+		__finish_callback_fptr_t callback;
+	};
+	/*/
 	thread_desc * thrd;
+	thread_desc ** thrds;
+	unsigned short thrd_count;
 	__spinlock_t * lock;
 	__spinlock_t ** locks;
 	unsigned short lock_count;
-	thread_desc ** thrds;
-	unsigned short thrd_count;
+	__finish_callback_fptr_t callback;
+	//*/
 };
 static inline void ?{}(FinishAction & this) {
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 1f81d61c0a8d53662c51e645fbbb076881fd849c)
+++ src/libcfa/concurrency/kernel.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -49,5 +49,5 @@
 thread_desc * mainThread;
 
-struct { __dllist_t(cluster    ) list; __spinlock_t lock; } global_clusters;
+struct { __dllist_t(cluster) list; __spinlock_t lock; } global_clusters;
 
 //-----------------------------------------------------------------------------
@@ -257,23 +257,20 @@
 // its final actions must be executed from the kernel
 void finishRunning(processor * this) with( this->finish ) {
-	if( action_code == Release ) {
-		verify( ! kernelTLS.preemption_state.enabled );
+	verify( ! kernelTLS.preemption_state.enabled );
+	choose( action_code ) {
+	case No_Action:
+		break;
+	case Release:
 		unlock( *lock );
-	}
-	else if( action_code == Schedule ) {
+	case Schedule:
 		ScheduleThread( thrd );
-	}
-	else if( action_code == Release_Schedule ) {
-		verify( ! kernelTLS.preemption_state.enabled );
+	case Release_Schedule:
 		unlock( *lock );
 		ScheduleThread( thrd );
-	}
-	else if( action_code == Release_Multi ) {
-		verify( ! kernelTLS.preemption_state.enabled );
+	case Release_Multi:
 		for(int i = 0; i < lock_count; i++) {
 			unlock( *locks[i] );
 		}
-	}
-	else if( action_code == Release_Multi_Schedule ) {
+	case Release_Multi_Schedule:
 		for(int i = 0; i < lock_count; i++) {
 			unlock( *locks[i] );
@@ -282,7 +279,8 @@
 			ScheduleThread( thrds[i] );
 		}
-	}
-	else {
-		assert(action_code == No_Action);
+	case Callback:
+		callback();
+	default:
+		abort("KERNEL ERROR: Unexpected action to run after thread");
 	}
 }
@@ -497,4 +495,18 @@
 }
 
+void BlockInternal(__finish_callback_fptr_t callback) {
+	disable_interrupts();
+	with( *kernelTLS.this_processor ) {
+		finish.action_code = Callback;
+		finish.callback    = callback;
+	}
+
+	verify( ! kernelTLS.preemption_state.enabled );
+	returnToKernel();
+	verify( ! kernelTLS.preemption_state.enabled );
+
+	enable_interrupts( __cfaabi_dbg_ctx );
+}
+
 // KERNEL ONLY
 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 1f81d61c0a8d53662c51e645fbbb076881fd849c)
+++ src/libcfa/concurrency/kernel_private.h	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -48,4 +48,5 @@
 void BlockInternal(__spinlock_t * locks [], unsigned short count);
 void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
+void BlockInternal(__finish_callback_fptr_t callback);
 void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 1f81d61c0a8d53662c51e645fbbb076881fd849c)
+++ src/libcfa/concurrency/monitor.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -297,5 +297,5 @@
 	this.count = count;
 
-	// Sort monitors based on address -> TODO use a sort specialized for small numbers
+	// Sort monitors based on address
 	__libcfa_small_sort(this.m, count);
 
Index: src/libcfa/concurrency/mutex
===================================================================
--- src/libcfa/concurrency/mutex	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
+++ src/libcfa/concurrency/mutex	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -0,0 +1,171 @@
+
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// mutex --
+//
+// Author           : Thierry Delisle
+// Created On       : Fri May 25 01:24:09 2018
+// Last Modified By : Thierry Delisle
+// Last Modified On : Fri May 25 01:24:12 2018
+// Update Count     : 0
+//
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "bits/algorithms.h"
+#include "bits/locks.h"
+
+#include "invoke.h"
+#include "time_t.h"
+
+//-----------------------------------------------------------------------------
+// Locks
+
+// Exclusive lock - non-recursive
+// ---
+struct mutex_lock {
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t(struct thread_desc) blocked_threads;
+
+	// Locked flag
+	bool is_locked;
+};
+
+void ?{}(mutex_lock & this);
+void ^?{}(mutex_lock & this);
+void lock(mutex_lock & this);
+bool try_lock(mutex_lock & this);
+void unlock(mutex_lock & this);
+
+// Exclusive lock - recursive
+// ---
+struct recursive_mutex_lock{
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t(struct thread_desc) blocked_threads;
+
+	// Current thread owning the lock
+	struct thread_desc * owner;
+
+	// Number of recursion level
+	size_t recursion_count;
+};
+
+void ?{}(recursive_mutex_lock & this);
+void ^?{}(recursive_mutex_lock & this);
+void lock(recursive_mutex_lock & this);
+bool try_lock(recursive_mutex_lock & this);
+void unlock(recursive_mutex_lock & this);
+
+trait is_lock(dtype L | sized(L)) {
+	void lock  (L &);
+	void unlock(L &);
+};
+
+//-----------------------------------------------------------------------------
+// Condition variables
+
+struct condition_variable {
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t(struct thread_desc) blocked_threads;
+};
+
+void ?{}(condition_variable & this);
+void ^?{}(condition_variable & this);
+
+void notify_one(condition_variable & this);
+void notify_all(condition_variable & this);
+
+void wait(condition_variable & this);
+
+forall(dtype L | is_lock(L))
+void wait(condition_variable & this, L & l);
+
+//-----------------------------------------------------------------------------
+// Scopes
+forall(dtype L | is_lock(L)) {
+	#if !defined( __TUPLE_ARRAYS_EXIST__ )
+	void lock  ( L * locks [], size_t count);
+	void unlock( L * locks [], size_t count);
+
+	struct lock_scope {
+		L **   locks;
+		size_t count;
+	};
+
+	static inline void ?{}(lock_scope(L) & this) {
+		this.locks = NULL;
+		this.count = 0;
+	}
+
+	static inline void ^?{}(lock_scope(L) & this) {
+		if(this.count > 0) {
+			unlock(this.locks, this.count);
+		}
+	}
+
+	static inline lock_scope(L) lock( L * locks [], size_t count, lock_scope(L) & scope) {
+		lock(locks, count);
+		scope.locks = locks;
+		scope.count = count;
+	}
+
+	static inline void unlock( lock_scope(L) & this ) {
+		unlock(this.locks, this.count);
+		this.count = 0;
+	}
+
+	static inline void release( lock_scope(L) & this ) {
+		this.count = 0;
+	}
+	#else
+	void lock( [L &...] locks );
+	void unlock( [L &...] locks );
+
+	forall(size_t N)
+	struct lock_scope {
+		bool released;
+		[L &... N] locks;
+	};
+
+	void ?{}(lock_scope(L) & this) = void;
+	void ?{}(lock_scope(L) & this, lock_scope(L) other) = void;
+	void ?move?(lock_scope(L) & this, lock_scope(L) & other) = default;
+
+	static inline void ^?{}(lock_scope(L) & this) {
+		if( !this.released ) {
+			unlock(this.locks);
+		}
+	}
+
+	forall(size_t N)
+	static inline lock_scope(L, N) lock( [L &...] locks ) {
+		lock(locks);
+		return @{false, locks};
+	}
+
+	static inline void unlock( lock_scope(L) & this ) {
+		unlock(this.locks);
+		this.released = true
+	}
+
+	static inline void release( lock_scope(L) & this ) {
+		this.released = true;
+	}
+	#endif
+}
Index: src/libcfa/concurrency/mutex.c
===================================================================
--- src/libcfa/concurrency/mutex.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
+++ src/libcfa/concurrency/mutex.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -0,0 +1,192 @@
+
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// mutex.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Fri May 25 01:37:11 2018
+// Last Modified By : Thierry Delisle
+// Last Modified On : Fri May 25 01:37:51 2018
+// Update Count     : 0
+//
+
+#include "mutex"
+
+#include "kernel_private.h"
+
+//-----------------------------------------------------------------------------
+// Locks
+
+// Exclusive lock - non-recursive
+// ---
+void ?{}(mutex_lock & this) {
+	this.lock{};
+	this.blocked_threads{};
+}
+
+void ^?{}(mutex_lock & this) {
+	// default
+}
+
+void lock(mutex_lock & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( is_locked ) {
+		append( blocked_threads, kernelTLS.this_thread );
+		BlockInternal( &lock );
+	}
+	else {
+		is_locked = true;
+		unlock( lock );
+	}
+}
+
+bool try_lock(mutex_lock & this) with(this) {
+	bool ret = false;
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( !is_locked ) {
+		ret = true;
+		is_locked = true;
+	}
+	unlock( lock );
+	return ret;
+}
+
+void unlock(mutex_lock & this) {
+	lock( this.lock __cfaabi_dbg_ctx2 );
+	this.is_locked = this.blocked_threads;
+	WakeThread(
+		pop_head( this.blocked_threads )
+	);
+	unlock( this.lock );
+}
+
+// Exclusive lock - non-recursive
+// ---
+void ?{}(recursive_mutex_lock & this) {
+	this.lock{};
+	this.blocked_threads{};
+	this.owner = NULL;
+	this.recursion_count = 0;
+}
+
+void ^?{}(recursive_mutex_lock & this) {
+	// default
+}
+
+void lock(recursive_mutex_lock & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( owner == NULL ) {
+		owner = kernelTLS.this_thread;
+		recursion_count = 1;
+		unlock( lock );
+	}
+	else if( owner == kernelTLS.this_thread ) {
+		recursion_count++;
+		unlock( lock );
+	}
+	else {
+		append( blocked_threads, kernelTLS.this_thread );
+		BlockInternal( &lock );
+	}
+}
+
+bool try_lock(recursive_mutex_lock & this) with(this) {
+	bool ret = false;
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( owner == NULL ) {
+		owner = kernelTLS.this_thread;
+		recursion_count = 1;
+		ret = true;
+	}
+	else if( owner == kernelTLS.this_thread ) {
+		recursion_count++;
+		ret = true;
+	}
+	unlock( lock );
+	return ret;
+}
+
+void unlock(recursive_mutex_lock & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	recursion_count--;
+	if( recursion_count == 0 ) {
+		thread_desc * thrd = pop_head( blocked_threads );
+		owner = thrd;
+		recursion_count = (thrd ? 1 : 0);
+		WakeThread( thrd );
+	}
+	unlock( lock );
+}
+
+//-----------------------------------------------------------------------------
+// Conditions
+void ?{}(condition_variable & this) {
+	this.blocked_threads{};
+}
+
+void ^?{}(condition_variable & this) {
+	// default
+}
+
+void notify_one(condition_variable & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	WakeThread(
+		pop_head( this.blocked_threads )
+	);
+	unlock( lock );
+}
+
+void notify_all(condition_variable & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	while(this.blocked_threads) {
+		WakeThread(
+			pop_head( this.blocked_threads )
+		);
+	}
+	unlock( lock );
+}
+
+void wait(condition_variable & this) {
+	lock( this.lock __cfaabi_dbg_ctx2 );
+	append( this.blocked_threads, kernelTLS.this_thread );
+	BlockInternal( &this.lock );
+}
+
+forall(dtype L | is_lock(L))
+void wait(condition_variable & this, L & l) {
+	lock( this.lock __cfaabi_dbg_ctx2 );
+	append( this.blocked_threads, kernelTLS.this_thread );
+	void __unlock(void) {
+		unlock(l);
+		unlock(this.lock);
+	}
+	BlockInternal( __unlock );
+}
+
+//-----------------------------------------------------------------------------
+// Scopes
+forall(dtype L | is_lock(L))
+void lock_all  ( L * locks[], size_t count) {
+	// Sort locks based on addresses
+	__libcfa_small_sort(locks, count);
+
+	// Lock all
+	for(size_t i = 0; i < count; i++) {
+		L * l = locks[i];
+		lock( *l );
+	}
+}
+
+forall(dtype L | is_lock(L))
+void unlock_all( L * locks[], size_t count) {
+	// Lock all
+	for(size_t i = 0; i < count; i++) {
+		L * l = locks[i];
+		unlock( *l );
+	}
+}
