Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision d3314aea19484559c4a184b7c8839df26f2eddc6)
+++ libcfa/src/concurrency/locks.hfa	(revision f4ec5e45adb2e571c2ed71c2f859c899cfe51d5c)
@@ -20,7 +20,122 @@
 
 #include "bits/weakso_locks.hfa"
+#include "containers/queueLockFree.hfa"
+
+#include "thread.hfa"
 
 #include "time_t.hfa"
 #include "time.hfa"
+
+//-----------------------------------------------------------------------------
+// Semaphores
+
+// '0-nary' semaphore
+// Similar to a counting semaphore except the value of one is never reached
+// as a consequence, a V() that would bring the value to 1 *spins* until
+// a P consumes it
+struct Semaphore0nary {
+	__spinlock_t lock; // needed to protect
+	mpsc_queue($thread) queue;
+};
+
+static inline bool P(Semaphore0nary & this, $thread * thrd) __attribute__((artificial));
+static inline bool P(Semaphore0nary & this, $thread * thrd) {
+	/* paranoid */ verify(!(thrd->seqable.next));
+	/* paranoid */ verify(!(thrd`next));
+
+	push(this.queue, thrd);
+	return true;
+}
+
+static inline bool P(Semaphore0nary & this) __attribute__((artificial));
+static inline bool P(Semaphore0nary & this) {
+    $thread * thrd = active_thread();
+    P(this, thrd);
+    park();
+    return true;
+}
+
+static inline $thread * V(Semaphore0nary & this, const bool doUnpark = true) __attribute__((artificial));
+static inline $thread * V(Semaphore0nary & this, const bool doUnpark = true) {
+	$thread * next;
+	lock(this.lock __cfaabi_dbg_ctx2);
+		for (;;) {
+			next = pop(this.queue);
+			if (next) break;
+			Pause();
+		}
+	unlock(this.lock);
+
+	if (doUnpark) unpark(next);
+	return next;
+}
+
+// Wrapper used on top of any sempahore to avoid potential locking
+struct BinaryBenaphore {
+	volatile ssize_t counter;
+};
+
+static inline {
+	void ?{}(BinaryBenaphore & this) { this.counter = 0; }
+	void ?{}(BinaryBenaphore & this, zero_t) { this.counter = 0; }
+	void ?{}(BinaryBenaphore & this, one_t ) { this.counter = 1; }
+
+	// returns true if no blocking needed
+	bool P(BinaryBenaphore & this) { return __atomic_fetch_sub(&this.counter, 1, __ATOMIC_SEQ_CST) > 0; }
+	bool tryP(BinaryBenaphore & this) {
+		ssize_t c = this.counter;
+		return (c >= 1) && __atomic_compare_exchange_n(&this.counter, &c, c-1, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+	}
+
+	// returns true if notify needed
+	bool V(BinaryBenaphore & this) {
+		ssize_t c = 0;
+		for () {
+			if (__atomic_compare_exchange_n(&this.counter, &c, c+1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+				if (c == 0) return true;
+				/* paranoid */ verify(c < 0);
+				return false;
+			} else {
+				if (c == 1) return true;
+				/* paranoid */ verify(c < 1);
+				Pause();
+			}
+		}
+	}
+}
+
+// Binary Semaphore based on the BinaryBenaphore on top of the 0-nary Semaphore
+struct ThreadBenaphore {
+	BinaryBenaphore ben;
+	Semaphore0nary  sem;
+};
+
+static inline void ?{}(ThreadBenaphore & this) {}
+static inline void ?{}(ThreadBenaphore & this, zero_t) { (this.ben){ 0 }; }
+static inline void ?{}(ThreadBenaphore & this, one_t ) { (this.ben){ 1 }; }
+
+static inline bool P(ThreadBenaphore & this)              { return /* P(this.ben) ? false : */ P(this.sem); }
+static inline bool P(ThreadBenaphore & this, $thread * t) { return /* P(this.ben) ? false : */ P(this.sem, t ); }
+static inline bool tryP(ThreadBenaphore & this)           { return tryP(this.ben); }
+static inline bool P(ThreadBenaphore & this, bool wait)   { return wait ? P(this) : tryP(this); }
+
+static inline $thread * V(ThreadBenaphore & this, const bool doUnpark = true) {
+	// if (V(this.ben)) return 0p;
+	return V(this.sem, doUnpark);
+}
+
+//-----------------------------------------------------------------------------
+// Semaphore
+struct semaphore {
+	__spinlock_t lock;
+	int count;
+	__queue_t($thread) waiting;
+};
+
+void  ?{}(semaphore & this, int count = 1);
+void ^?{}(semaphore & this);
+bool   P (semaphore & this);
+bool   V (semaphore & this);
+bool   V (semaphore & this, unsigned count);
 
 //----------
@@ -54,4 +169,88 @@
 static inline size_t get_recursion_count( owner_lock & this ) { return get_recursion_count( (blocking_lock &)this ); }
 
+struct fast_lock {
+	$thread * volatile owner;
+	ThreadBenaphore sem;
+};
+
+static inline bool $try_lock(fast_lock & this, $thread * thrd) {
+    $thread * exp = 0p;
+    return __atomic_compare_exchange_n(&this.owner, &exp, thrd, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+}
+
+static inline void $lock(fast_lock & this, $thread * thrd) {
+	/* paranoid */verify(thrd != this.owner);
+
+	for (;;) {
+		if ($try_lock(this, thrd)) return;
+		P(this.sem, thrd);
+	}
+}
+
+static inline void lock( fast_lock & this ) {
+	$thread * thrd = active_thread();
+	/* paranoid */verify(thrd != this.owner);
+
+	for (;;) {
+		if ($try_lock(this, thrd)) return;
+		P(this.sem);
+	}
+}
+
+static inline void try_lock ( fast_lock & this ) {
+	$thread * thrd = active_thread();
+	/* paranoid */ verify(thrd != this.owner);
+	return $try_lock(this, thrd);
+}
+
+static inline void unlock( fast_lock & this ) {
+	$thread * thrd = active_thread();
+	/* paranoid */ verify(thrd == this.owner);
+	$thread * next = V(this.sem, false); // implicit fence
+	// open 'owner' only after fence
+	this.owner = 0p;
+
+	// Unpark the next person (can be 0p, unpark handles it)
+	unpark(next);
+}
+
+static inline void on_wait( fast_lock & this ) {
+	unlock(this);
+	#warning this is broken
+}
+
+static inline void on_notify( fast_lock & this, struct $thread * t ) {
+	$lock(this, t);
+	#warning this is broken
+}
+
+static inline void   set_recursion_count( fast_lock & this, size_t recursion ) {}
+static inline size_t get_recursion_count( fast_lock & this ) { return 0; }
+
+struct mcs_node {
+	mcs_node * volatile next;
+	single_sem sem;
+};
+
+void ?{}(mcs_node & this) { this.next = 0p; }
+
+static inline mcs_node * volatile & ?`next ( mcs_node * node ) {
+	return node->next;
+}
+
+struct mcs_lock {
+	mcs_queue(mcs_node) queue;
+};
+
+static inline void lock(mcs_lock & l, mcs_node & n) {
+	if(push(l.queue, &n))
+		wait(n.sem);
+}
+
+static inline void unlock(mcs_lock & l, mcs_node & n) {
+	mcs_node * next = advance(l.queue, &n);
+	if(next) post(next->sem);
+}
+
 //-----------------------------------------------------------------------------
 // is_blocking_lock
@@ -121,16 +320,2 @@
 	bool wait( condition_variable(L) & this, L & l, uintptr_t info, Time time );
 }
-
-//-----------------------------------------------------------------------------
-// Semaphore
-struct semaphore {
-	__spinlock_t lock;
-	int count;
-	__queue_t($thread) waiting;
-};
-
-void  ?{}(semaphore & this, int count = 1);
-void ^?{}(semaphore & this);
-bool   P (semaphore & this);
-bool   V (semaphore & this);
-bool   V (semaphore & this, unsigned count);
