Index: src/libcfa/Makefile.am
===================================================================
--- src/libcfa/Makefile.am	(revision 8ebbfc4c887a4b7e35a28345e0a8a272f1abb11f)
+++ src/libcfa/Makefile.am	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -51,5 +51,5 @@
 # not all platforms support concurrency, add option do disable it
 if BUILD_CONCURRENCY
-headers += concurrency/coroutine concurrency/thread concurrency/kernel concurrency/monitor
+headers += concurrency/coroutine concurrency/thread concurrency/kernel concurrency/monitor concurrency/mutex
 endif
 
Index: src/libcfa/Makefile.in
===================================================================
--- src/libcfa/Makefile.in	(revision 8ebbfc4c887a4b7e35a28345e0a8a272f1abb11f)
+++ src/libcfa/Makefile.in	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -97,5 +97,5 @@
 
 # not all platforms support concurrency, add option do disable it
-@BUILD_CONCURRENCY_TRUE@am__append_3 = concurrency/coroutine concurrency/thread concurrency/kernel concurrency/monitor
+@BUILD_CONCURRENCY_TRUE@am__append_3 = concurrency/coroutine concurrency/thread concurrency/kernel concurrency/monitor concurrency/mutex
 
 # not all platforms support concurrency, add option do disable it
@@ -153,13 +153,14 @@
 	containers/pair.c containers/result.c containers/vector.c \
 	concurrency/coroutine.c concurrency/thread.c \
-	concurrency/kernel.c concurrency/monitor.c assert.c \
-	exception.c virtual.c concurrency/CtxSwitch-@MACHINE_TYPE@.S \
-	concurrency/alarm.c concurrency/invoke.c \
-	concurrency/preemption.c
+	concurrency/kernel.c concurrency/monitor.c concurrency/mutex.c \
+	assert.c exception.c virtual.c \
+	concurrency/CtxSwitch-@MACHINE_TYPE@.S concurrency/alarm.c \
+	concurrency/invoke.c concurrency/preemption.c
 am__dirstamp = $(am__leading_dot)dirstamp
 @BUILD_CONCURRENCY_TRUE@am__objects_1 = concurrency/libcfa_d_a-coroutine.$(OBJEXT) \
 @BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_d_a-thread.$(OBJEXT) \
 @BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_d_a-kernel.$(OBJEXT) \
-@BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_d_a-monitor.$(OBJEXT)
+@BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_d_a-monitor.$(OBJEXT) \
+@BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_d_a-mutex.$(OBJEXT)
 am__objects_2 = libcfa_d_a-fstream.$(OBJEXT) \
 	libcfa_d_a-iostream.$(OBJEXT) libcfa_d_a-iterator.$(OBJEXT) \
@@ -188,12 +189,13 @@
 	containers/result.c containers/vector.c \
 	concurrency/coroutine.c concurrency/thread.c \
-	concurrency/kernel.c concurrency/monitor.c assert.c \
-	exception.c virtual.c concurrency/CtxSwitch-@MACHINE_TYPE@.S \
-	concurrency/alarm.c concurrency/invoke.c \
-	concurrency/preemption.c
+	concurrency/kernel.c concurrency/monitor.c concurrency/mutex.c \
+	assert.c exception.c virtual.c \
+	concurrency/CtxSwitch-@MACHINE_TYPE@.S concurrency/alarm.c \
+	concurrency/invoke.c concurrency/preemption.c
 @BUILD_CONCURRENCY_TRUE@am__objects_5 = concurrency/libcfa_a-coroutine.$(OBJEXT) \
 @BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_a-thread.$(OBJEXT) \
 @BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_a-kernel.$(OBJEXT) \
-@BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_a-monitor.$(OBJEXT)
+@BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_a-monitor.$(OBJEXT) \
+@BUILD_CONCURRENCY_TRUE@	concurrency/libcfa_a-mutex.$(OBJEXT)
 am__objects_6 = libcfa_a-fstream.$(OBJEXT) libcfa_a-iostream.$(OBJEXT) \
 	libcfa_a-iterator.$(OBJEXT) libcfa_a-limits.$(OBJEXT) \
@@ -264,7 +266,7 @@
 	containers/result containers/vector concurrency/coroutine \
 	concurrency/thread concurrency/kernel concurrency/monitor \
-	${shell find stdhdr -type f -printf "%p "} math gmp time_t.h \
-	clock bits/align.h bits/containers.h bits/defs.h bits/debug.h \
-	bits/locks.h concurrency/invoke.h
+	concurrency/mutex ${shell find stdhdr -type f -printf "%p "} \
+	math gmp time_t.h clock bits/align.h bits/containers.h \
+	bits/defs.h bits/debug.h bits/locks.h concurrency/invoke.h
 HEADERS = $(nobase_cfa_include_HEADERS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
@@ -548,4 +550,6 @@
 concurrency/libcfa_d_a-monitor.$(OBJEXT): concurrency/$(am__dirstamp) \
 	concurrency/$(DEPDIR)/$(am__dirstamp)
+concurrency/libcfa_d_a-mutex.$(OBJEXT): concurrency/$(am__dirstamp) \
+	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/CtxSwitch-@MACHINE_TYPE@.$(OBJEXT):  \
 	concurrency/$(am__dirstamp) \
@@ -580,4 +584,6 @@
 	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/libcfa_a-monitor.$(OBJEXT): concurrency/$(am__dirstamp) \
+	concurrency/$(DEPDIR)/$(am__dirstamp)
+concurrency/libcfa_a-mutex.$(OBJEXT): concurrency/$(am__dirstamp) \
 	concurrency/$(DEPDIR)/$(am__dirstamp)
 concurrency/libcfa_a-alarm.$(OBJEXT): concurrency/$(am__dirstamp) \
@@ -635,4 +641,5 @@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-kernel.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-monitor.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-mutex.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-preemption.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_a-thread.Po@am__quote@
@@ -642,4 +649,5 @@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-kernel.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-monitor.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-mutex.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-preemption.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@concurrency/$(DEPDIR)/libcfa_d_a-thread.Po@am__quote@
@@ -930,4 +938,18 @@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-monitor.obj `if test -f 'concurrency/monitor.c'; then $(CYGPATH_W) 'concurrency/monitor.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/monitor.c'; fi`
 
+concurrency/libcfa_d_a-mutex.o: concurrency/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-mutex.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-mutex.Tpo -c -o concurrency/libcfa_d_a-mutex.o `test -f 'concurrency/mutex.c' || echo '$(srcdir)/'`concurrency/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_d_a-mutex.Tpo concurrency/$(DEPDIR)/libcfa_d_a-mutex.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/mutex.c' object='concurrency/libcfa_d_a-mutex.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-mutex.o `test -f 'concurrency/mutex.c' || echo '$(srcdir)/'`concurrency/mutex.c
+
+concurrency/libcfa_d_a-mutex.obj: concurrency/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_d_a-mutex.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_d_a-mutex.Tpo -c -o concurrency/libcfa_d_a-mutex.obj `if test -f 'concurrency/mutex.c'; then $(CYGPATH_W) 'concurrency/mutex.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/mutex.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_d_a-mutex.Tpo concurrency/$(DEPDIR)/libcfa_d_a-mutex.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/mutex.c' object='concurrency/libcfa_d_a-mutex.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_d_a-mutex.obj `if test -f 'concurrency/mutex.c'; then $(CYGPATH_W) 'concurrency/mutex.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/mutex.c'; fi`
+
 libcfa_d_a-assert.o: assert.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_d_a_CFLAGS) $(CFLAGS) -MT libcfa_d_a-assert.o -MD -MP -MF $(DEPDIR)/libcfa_d_a-assert.Tpo -c -o libcfa_d_a-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
@@ -1237,4 +1259,18 @@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-monitor.obj `if test -f 'concurrency/monitor.c'; then $(CYGPATH_W) 'concurrency/monitor.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/monitor.c'; fi`
+
+concurrency/libcfa_a-mutex.o: concurrency/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-mutex.o -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-mutex.Tpo -c -o concurrency/libcfa_a-mutex.o `test -f 'concurrency/mutex.c' || echo '$(srcdir)/'`concurrency/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_a-mutex.Tpo concurrency/$(DEPDIR)/libcfa_a-mutex.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/mutex.c' object='concurrency/libcfa_a-mutex.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-mutex.o `test -f 'concurrency/mutex.c' || echo '$(srcdir)/'`concurrency/mutex.c
+
+concurrency/libcfa_a-mutex.obj: concurrency/mutex.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -MT concurrency/libcfa_a-mutex.obj -MD -MP -MF concurrency/$(DEPDIR)/libcfa_a-mutex.Tpo -c -o concurrency/libcfa_a-mutex.obj `if test -f 'concurrency/mutex.c'; then $(CYGPATH_W) 'concurrency/mutex.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/mutex.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) concurrency/$(DEPDIR)/libcfa_a-mutex.Tpo concurrency/$(DEPDIR)/libcfa_a-mutex.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='concurrency/mutex.c' object='concurrency/libcfa_a-mutex.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcfa_a_CFLAGS) $(CFLAGS) -c -o concurrency/libcfa_a-mutex.obj `if test -f 'concurrency/mutex.c'; then $(CYGPATH_W) 'concurrency/mutex.c'; else $(CYGPATH_W) '$(srcdir)/concurrency/mutex.c'; fi`
 
 libcfa_a-assert.o: assert.c
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision 8ebbfc4c887a4b7e35a28345e0a8a272f1abb11f)
+++ src/libcfa/concurrency/kernel	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -43,15 +43,45 @@
 extern struct cluster * mainCluster;
 
-enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule };
+enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule, Callback };
+
+typedef void (*__finish_callback_fptr_t)(void);
 
 //TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
 struct FinishAction {
 	FinishOpCode action_code;
+	/*
+	// Union of possible actions
+	union {
+		// Option 1 : locks and threads
+		struct {
+			// 1 thread or N thread
+			union {
+				thread_desc * thrd;
+				struct {
+					thread_desc ** thrds;
+					unsigned short thrd_count;
+				};
+			};
+			// 1 lock or N lock
+			union {
+				__spinlock_t * lock;
+				struct {
+					__spinlock_t ** locks;
+					unsigned short lock_count;
+				};
+			};
+		};
+		// Option 2 : action pointer
+		__finish_callback_fptr_t callback;
+	};
+	/*/
 	thread_desc * thrd;
+	thread_desc ** thrds;
+	unsigned short thrd_count;
 	__spinlock_t * lock;
 	__spinlock_t ** locks;
 	unsigned short lock_count;
-	thread_desc ** thrds;
-	unsigned short thrd_count;
+	__finish_callback_fptr_t callback;
+	//*/
 };
 static inline void ?{}(FinishAction & this) {
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision 8ebbfc4c887a4b7e35a28345e0a8a272f1abb11f)
+++ src/libcfa/concurrency/kernel.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -49,5 +49,5 @@
 thread_desc * mainThread;
 
-struct { __dllist_t(cluster    ) list; __spinlock_t lock; } global_clusters;
+struct { __dllist_t(cluster) list; __spinlock_t lock; } global_clusters;
 
 //-----------------------------------------------------------------------------
@@ -257,23 +257,20 @@
 // its final actions must be executed from the kernel
 void finishRunning(processor * this) with( this->finish ) {
-	if( action_code == Release ) {
-		verify( ! kernelTLS.preemption_state.enabled );
+	verify( ! kernelTLS.preemption_state.enabled );
+	choose( action_code ) {
+	case No_Action:
+		break;
+	case Release:
 		unlock( *lock );
-	}
-	else if( action_code == Schedule ) {
+	case Schedule:
 		ScheduleThread( thrd );
-	}
-	else if( action_code == Release_Schedule ) {
-		verify( ! kernelTLS.preemption_state.enabled );
+	case Release_Schedule:
 		unlock( *lock );
 		ScheduleThread( thrd );
-	}
-	else if( action_code == Release_Multi ) {
-		verify( ! kernelTLS.preemption_state.enabled );
+	case Release_Multi:
 		for(int i = 0; i < lock_count; i++) {
 			unlock( *locks[i] );
 		}
-	}
-	else if( action_code == Release_Multi_Schedule ) {
+	case Release_Multi_Schedule:
 		for(int i = 0; i < lock_count; i++) {
 			unlock( *locks[i] );
@@ -282,7 +279,8 @@
 			ScheduleThread( thrds[i] );
 		}
-	}
-	else {
-		assert(action_code == No_Action);
+	case Callback:
+		callback();
+	default:
+		abort("KERNEL ERROR: Unexpected action to run after thread");
 	}
 }
@@ -497,4 +495,18 @@
 }
 
+void BlockInternal(__finish_callback_fptr_t callback) {
+	disable_interrupts();
+	with( *kernelTLS.this_processor ) {
+		finish.action_code = Callback;
+		finish.callback    = callback;
+	}
+
+	verify( ! kernelTLS.preemption_state.enabled );
+	returnToKernel();
+	verify( ! kernelTLS.preemption_state.enabled );
+
+	enable_interrupts( __cfaabi_dbg_ctx );
+}
+
 // KERNEL ONLY
 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision 8ebbfc4c887a4b7e35a28345e0a8a272f1abb11f)
+++ src/libcfa/concurrency/kernel_private.h	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -48,4 +48,5 @@
 void BlockInternal(__spinlock_t * locks [], unsigned short count);
 void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
+void BlockInternal(__finish_callback_fptr_t callback);
 void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision 8ebbfc4c887a4b7e35a28345e0a8a272f1abb11f)
+++ src/libcfa/concurrency/monitor.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -297,5 +297,5 @@
 	this.count = count;
 
-	// Sort monitors based on address -> TODO use a sort specialized for small numbers
+	// Sort monitors based on address
 	__libcfa_small_sort(this.m, count);
 
Index: src/libcfa/concurrency/mutex
===================================================================
--- src/libcfa/concurrency/mutex	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
+++ src/libcfa/concurrency/mutex	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -0,0 +1,171 @@
+
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// mutex --
+//
+// Author           : Thierry Delisle
+// Created On       : Fri May 25 01:24:09 2018
+// Last Modified By : Thierry Delisle
+// Last Modified On : Fri May 25 01:24:12 2018
+// Update Count     : 0
+//
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "bits/algorithms.h"
+#include "bits/locks.h"
+
+#include "invoke.h"
+#include "time_t.h"
+
+//-----------------------------------------------------------------------------
+// Locks
+
+// Exclusive lock - non-recursive
+// ---
+struct mutex_lock {
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t(struct thread_desc) blocked_threads;
+
+	// Locked flag
+	bool is_locked;
+};
+
+void ?{}(mutex_lock & this);
+void ^?{}(mutex_lock & this);
+void lock(mutex_lock & this);
+bool try_lock(mutex_lock & this);
+void unlock(mutex_lock & this);
+
+// Exclusive lock - recursive
+// ---
+struct recursive_mutex_lock{
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t(struct thread_desc) blocked_threads;
+
+	// Current thread owning the lock
+	struct thread_desc * owner;
+
+	// Number of recursion level
+	size_t recursion_count;
+};
+
+void ?{}(recursive_mutex_lock & this);
+void ^?{}(recursive_mutex_lock & this);
+void lock(recursive_mutex_lock & this);
+bool try_lock(recursive_mutex_lock & this);
+void unlock(recursive_mutex_lock & this);
+
+trait is_lock(dtype L | sized(L)) {
+	void lock  (L &);
+	void unlock(L &);
+};
+
+//-----------------------------------------------------------------------------
+// Condition variables
+
+struct condition_variable {
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t(struct thread_desc) blocked_threads;
+};
+
+void ?{}(condition_variable & this);
+void ^?{}(condition_variable & this);
+
+void notify_one(condition_variable & this);
+void notify_all(condition_variable & this);
+
+void wait(condition_variable & this);
+
+forall(dtype L | is_lock(L))
+void wait(condition_variable & this, L & l);
+
+//-----------------------------------------------------------------------------
+// Scopes
+forall(dtype L | is_lock(L)) {
+	#if !defined( __TUPLE_ARRAYS_EXIST__ )
+	void lock  ( L * locks [], size_t count);
+	void unlock( L * locks [], size_t count);
+
+	struct lock_scope {
+		L **   locks;
+		size_t count;
+	};
+
+	static inline void ?{}(lock_scope(L) & this) {
+		this.locks = NULL;
+		this.count = 0;
+	}
+
+	static inline void ^?{}(lock_scope(L) & this) {
+		if(this.count > 0) {
+			unlock(this.locks, this.count);
+		}
+	}
+
+	static inline lock_scope(L) lock( L * locks [], size_t count, lock_scope(L) & scope) {
+		lock(locks, count);
+		scope.locks = locks;
+		scope.count = count;
+	}
+
+	static inline void unlock( lock_scope(L) & this ) {
+		unlock(this.locks, this.count);
+		this.count = 0;
+	}
+
+	static inline void release( lock_scope(L) & this ) {
+		this.count = 0;
+	}
+	#else
+	void lock( [L &...] locks );
+	void unlock( [L &...] locks );
+
+	forall(size_t N)
+	struct lock_scope {
+		bool released;
+		[L &... N] locks;
+	};
+
+	void ?{}(lock_scope(L) & this) = void;
+	void ?{}(lock_scope(L) & this, lock_scope(L) other) = void;
+	void ?move?(lock_scope(L) & this, lock_scope(L) & other) = default;
+
+	static inline void ^?{}(lock_scope(L) & this) {
+		if( !this.released ) {
+			unlock(this.locks);
+		}
+	}
+
+	forall(size_t N)
+	static inline lock_scope(L, N) lock( [L &...] locks ) {
+		lock(locks);
+		return @{false, locks};
+	}
+
+	static inline void unlock( lock_scope(L) & this ) {
+		unlock(this.locks);
+		this.released = true
+	}
+
+	static inline void release( lock_scope(L) & this ) {
+		this.released = true;
+	}
+	#endif
+}
Index: src/libcfa/concurrency/mutex.c
===================================================================
--- src/libcfa/concurrency/mutex.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
+++ src/libcfa/concurrency/mutex.c	(revision 09800e9b5b749cee9e6635061d104d0b3b63c527)
@@ -0,0 +1,192 @@
+
+//                              -*- Mode: CFA -*-
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// mutex.c --
+//
+// Author           : Thierry Delisle
+// Created On       : Fri May 25 01:37:11 2018
+// Last Modified By : Thierry Delisle
+// Last Modified On : Fri May 25 01:37:51 2018
+// Update Count     : 0
+//
+
+#include "mutex"
+
+#include "kernel_private.h"
+
+//-----------------------------------------------------------------------------
+// Locks
+
+// Exclusive lock - non-recursive
+// ---
+void ?{}(mutex_lock & this) {
+	this.lock{};
+	this.blocked_threads{};
+}
+
+void ^?{}(mutex_lock & this) {
+	// default
+}
+
+void lock(mutex_lock & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( is_locked ) {
+		append( blocked_threads, kernelTLS.this_thread );
+		BlockInternal( &lock );
+	}
+	else {
+		is_locked = true;
+		unlock( lock );
+	}
+}
+
+bool try_lock(mutex_lock & this) with(this) {
+	bool ret = false;
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( !is_locked ) {
+		ret = true;
+		is_locked = true;
+	}
+	unlock( lock );
+	return ret;
+}
+
+void unlock(mutex_lock & this) {
+	lock( this.lock __cfaabi_dbg_ctx2 );
+	this.is_locked = this.blocked_threads;
+	WakeThread(
+		pop_head( this.blocked_threads )
+	);
+	unlock( this.lock );
+}
+
+// Exclusive lock - non-recursive
+// ---
+void ?{}(recursive_mutex_lock & this) {
+	this.lock{};
+	this.blocked_threads{};
+	this.owner = NULL;
+	this.recursion_count = 0;
+}
+
+void ^?{}(recursive_mutex_lock & this) {
+	// default
+}
+
+void lock(recursive_mutex_lock & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( owner == NULL ) {
+		owner = kernelTLS.this_thread;
+		recursion_count = 1;
+		unlock( lock );
+	}
+	else if( owner == kernelTLS.this_thread ) {
+		recursion_count++;
+		unlock( lock );
+	}
+	else {
+		append( blocked_threads, kernelTLS.this_thread );
+		BlockInternal( &lock );
+	}
+}
+
+bool try_lock(recursive_mutex_lock & this) with(this) {
+	bool ret = false;
+	lock( lock __cfaabi_dbg_ctx2 );
+	if( owner == NULL ) {
+		owner = kernelTLS.this_thread;
+		recursion_count = 1;
+		ret = true;
+	}
+	else if( owner == kernelTLS.this_thread ) {
+		recursion_count++;
+		ret = true;
+	}
+	unlock( lock );
+	return ret;
+}
+
+void unlock(recursive_mutex_lock & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	recursion_count--;
+	if( recursion_count == 0 ) {
+		thread_desc * thrd = pop_head( blocked_threads );
+		owner = thrd;
+		recursion_count = (thrd ? 1 : 0);
+		WakeThread( thrd );
+	}
+	unlock( lock );
+}
+
+//-----------------------------------------------------------------------------
+// Conditions
+void ?{}(condition_variable & this) {
+	this.blocked_threads{};
+}
+
+void ^?{}(condition_variable & this) {
+	// default
+}
+
+void notify_one(condition_variable & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	WakeThread(
+		pop_head( this.blocked_threads )
+	);
+	unlock( lock );
+}
+
+void notify_all(condition_variable & this) with(this) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	while(this.blocked_threads) {
+		WakeThread(
+			pop_head( this.blocked_threads )
+		);
+	}
+	unlock( lock );
+}
+
+void wait(condition_variable & this) {
+	lock( this.lock __cfaabi_dbg_ctx2 );
+	append( this.blocked_threads, kernelTLS.this_thread );
+	BlockInternal( &this.lock );
+}
+
+forall(dtype L | is_lock(L))
+void wait(condition_variable & this, L & l) {
+	lock( this.lock __cfaabi_dbg_ctx2 );
+	append( this.blocked_threads, kernelTLS.this_thread );
+	void __unlock(void) {
+		unlock(l);
+		unlock(this.lock);
+	}
+	BlockInternal( __unlock );
+}
+
+//-----------------------------------------------------------------------------
+// Scopes
+forall(dtype L | is_lock(L))
+void lock_all  ( L * locks[], size_t count) {
+	// Sort locks based on addresses
+	__libcfa_small_sort(locks, count);
+
+	// Lock all
+	for(size_t i = 0; i < count; i++) {
+		L * l = locks[i];
+		lock( *l );
+	}
+}
+
+forall(dtype L | is_lock(L))
+void unlock_all( L * locks[], size_t count) {
+	// Lock all
+	for(size_t i = 0; i < count; i++) {
+		L * l = locks[i];
+		unlock( *l );
+	}
+}
