Index: libcfa/src/concurrency/channel.hfa
===================================================================
--- libcfa/src/concurrency/channel.hfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/channel.hfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -4,37 +4,21 @@
 #include <list.hfa>
 #include <mutex_stmt.hfa>
-
-// link field used for threads waiting on channel
-struct wait_link {
-    // used to put wait_link on a dl queue
-    inline dlink(wait_link);
-
-    // waiting thread
-    struct thread$ * t;
-
-    // shadow field
-    void * elem;
-};
-P9_EMBEDDED( wait_link, dlink(wait_link) )
-
-static inline void ?{}( wait_link & this, thread$ * t, void * elem ) {
-    this.t = t;
-    this.elem = elem;
-}
-
-// wake one thread from the list
-static inline void wake_one( dlist( wait_link ) & queue ) {
-    wait_link & popped = try_pop_front( queue );
-    unpark( popped.t );
-}
+#include "select.hfa"
 
 // returns true if woken due to shutdown
 // blocks thread on list and releases passed lock
-static inline bool block( dlist( wait_link ) & queue, void * elem_ptr, go_mutex & lock ) {
-    wait_link w{ active_thread(), elem_ptr };
-    insert_last( queue, w );
+static inline bool block( dlist( select_node ) & queue, void * elem_ptr, go_mutex & lock ) {
+    select_node sn{ active_thread(), elem_ptr };
+    insert_last( queue, sn );
     unlock( lock );
     park();
-    return w.elem == 0p;
+    return sn.extra == 0p;
+}
+
+// Waituntil support (un)register_select helper routine
+// Sets select node avail if not special OR case and then unlocks
+static inline void __set_avail_then_unlock( select_node & node, go_mutex & mutex_lock ) {
+    if ( node.park_counter ) __make_select_node_available( node );
+    unlock( mutex_lock );
 }
 
@@ -59,5 +43,5 @@
     size_t size, front, back, count;
     T * buffer;
-    dlist( wait_link ) prods, cons; // lists of blocked threads
+    dlist( select_node ) prods, cons; // lists of blocked threads
     go_mutex mutex_lock;            // MX lock
     bool closed;                    // indicates channel close/open
@@ -70,5 +54,5 @@
     size = _size;
     front = back = count = 0;
-    buffer = aalloc( size );
+    if ( size != 0 ) buffer = aalloc( size );
     prods{};
     cons{};
@@ -87,5 +71,5 @@
     #endif
     verifyf( cons`isEmpty && prods`isEmpty, "Attempted to delete channel with waiting threads (Deadlock).\n" );
-    delete( buffer );
+    if ( size != 0 ) delete( buffer );
 }
 static inline size_t get_count( channel(T) & chan ) with(chan) { return count; }
@@ -102,9 +86,13 @@
     // flush waiting consumers and producers
     while ( has_waiting_consumers( chan ) ) {
-        cons`first.elem = 0p;
+        if( !__handle_waituntil_OR( cons ) ) // ensure we only signal special OR case threads when they win the race
+            break;  // if __handle_waituntil_OR returns false cons is empty so break
+        cons`first.extra = 0p;
         wake_one( cons );
     }
     while ( has_waiting_producers( chan ) ) {
-        prods`first.elem = 0p;
+        if( !__handle_waituntil_OR( prods ) ) // ensure we only signal special OR case threads when they win the race
+            break;  // if __handle_waituntil_OR returns false prods is empty so break
+        prods`first.extra = 0p;
         wake_one( prods );
     }
@@ -114,9 +102,20 @@
 static inline void is_closed( channel(T) & chan ) with(chan) { return closed; }
 
+// used to hand an element to a blocked consumer and signal it
+static inline void __cons_handoff( channel(T) & chan, T & elem ) with(chan) {
+    memcpy( cons`first.extra, (void *)&elem, sizeof(T) ); // do waiting consumer work
+    wake_one( cons );
+}
+
+// used to hand an element to a blocked producer and signal it
+static inline void __prods_handoff( channel(T) & chan, T & retval ) with(chan) {
+    memcpy( (void *)&retval, prods`first.extra, sizeof(T) );
+    wake_one( prods );
+}
+
 static inline void flush( channel(T) & chan, T elem ) with(chan) {
     lock( mutex_lock );
     while ( count == 0 && !cons`isEmpty ) {
-        memcpy(cons`first.elem, (void *)&elem, sizeof(T)); // do waiting consumer work
-        wake_one( cons );
+        __cons_handoff( chan, elem );
     }
     unlock( mutex_lock );
@@ -125,5 +124,5 @@
 // handles buffer insert
 static inline void __buf_insert( channel(T) & chan, T & elem ) with(chan) {
-    memcpy((void *)&buffer[back], (void *)&elem, sizeof(T));
+    memcpy( (void *)&buffer[back], (void *)&elem, sizeof(T) );
     count += 1;
     back++;
@@ -131,12 +130,4 @@
 }
 
-// does the buffer insert or hands elem directly to consumer if one is waiting
-static inline void __do_insert( channel(T) & chan, T & elem ) with(chan) {
-    if ( count == 0 && !cons`isEmpty ) {
-        memcpy(cons`first.elem, (void *)&elem, sizeof(T)); // do waiting consumer work
-        wake_one( cons );
-    } else __buf_insert( chan, elem );
-}
-
 // needed to avoid an extra copy in closed case
 static inline bool __internal_try_insert( channel(T) & chan, T & elem ) with(chan) {
@@ -145,6 +136,15 @@
     operations++;
     #endif
+
+    ConsEmpty: if ( !cons`isEmpty ) {
+        if ( !__handle_waituntil_OR( cons ) ) break ConsEmpty;
+        __cons_handoff( chan, elem );
+        unlock( mutex_lock );
+        return true;
+    }
+
     if ( count == size ) { unlock( mutex_lock ); return false; }
-    __do_insert( chan, elem );
+
+    __buf_insert( chan, elem );
     unlock( mutex_lock );
     return true;
@@ -157,5 +157,5 @@
 // handles closed case of insert routine
 static inline void __closed_insert( channel(T) & chan, T & elem ) with(chan) {
-    channel_closed except{&channel_closed_vt, &elem, &chan };
+    channel_closed except{ &channel_closed_vt, &elem, &chan };
     throwResume except; // throw closed resumption
     if ( !__internal_try_insert( chan, elem ) ) throw except; // if try to insert fails (would block), throw termination
@@ -182,10 +182,10 @@
     }
 
-    // have to check for the zero size channel case
-    if ( size == 0 && !cons`isEmpty ) {
-        memcpy(cons`first.elem, (void *)&elem, sizeof(T));
-        wake_one( cons );
-        unlock( mutex_lock );
-        return true;
+    // buffer count must be zero if cons are blocked (also handles zero-size case)
+    ConsEmpty: if ( !cons`isEmpty ) {
+        if ( !__handle_waituntil_OR( cons ) ) break ConsEmpty;
+        __cons_handoff( chan, elem );
+        unlock( mutex_lock );
+        return;
     }
 
@@ -202,25 +202,16 @@
     } // if
 
-    if ( count == 0 && !cons`isEmpty ) {
-        memcpy(cons`first.elem, (void *)&elem, sizeof(T)); // do waiting consumer work
-        wake_one( cons );
-    } else __buf_insert( chan, elem );
-    
-    unlock( mutex_lock );
-    return;
-}
-
-// handles buffer remove
-static inline void __buf_remove( channel(T) & chan, T & retval ) with(chan) {
-    memcpy((void *)&retval, (void *)&buffer[front], sizeof(T));
+    __buf_insert( chan, elem );
+    unlock( mutex_lock );
+}
+
+// does the buffer remove and potentially does waiting producer work
+static inline void __do_remove( channel(T) & chan, T & retval ) with(chan) {
+    memcpy( (void *)&retval, (void *)&buffer[front], sizeof(T) );
     count -= 1;
     front = (front + 1) % size;
-}
-
-// does the buffer remove and potentially does waiting producer work
-static inline void __do_remove( channel(T) & chan, T & retval ) with(chan) {
-    __buf_remove( chan, retval );
     if (count == size - 1 && !prods`isEmpty ) {
-        __buf_insert( chan, *(T *)prods`first.elem );  // do waiting producer work
+        if ( !__handle_waituntil_OR( prods ) ) return;
+        __buf_insert( chan, *(T *)prods`first.extra );  // do waiting producer work
         wake_one( prods );
     }
@@ -233,5 +224,14 @@
     operations++;
     #endif
+
+    ZeroSize: if ( size == 0 && !prods`isEmpty ) {
+        if ( !__handle_waituntil_OR( prods ) ) break ZeroSize;
+        __prods_handoff( chan, retval );
+        unlock( mutex_lock );
+        return true;
+    }
+
     if ( count == 0 ) { unlock( mutex_lock ); return false; }
+
     __do_remove( chan, retval );
     unlock( mutex_lock );
@@ -244,8 +244,9 @@
 static inline [T, bool] try_remove( channel(T) & chan ) {
     T retval;
-    return [ retval, __internal_try_remove( chan, retval ) ];
-}
-
-static inline T try_remove( channel(T) & chan, T elem ) {
+    bool success = __internal_try_remove( chan, retval );
+    return [ retval, success ];
+}
+
+static inline T try_remove( channel(T) & chan ) {
     T retval;
     __internal_try_remove( chan, retval );
@@ -255,5 +256,5 @@
 // handles closed case of insert routine
 static inline void __closed_remove( channel(T) & chan, T & retval ) with(chan) {
-    channel_closed except{&channel_closed_vt, 0p, &chan };
+    channel_closed except{ &channel_closed_vt, 0p, &chan };
     throwResume except; // throw resumption
     if ( !__internal_try_remove( chan, retval ) ) throw except; // if try to remove fails (would block), throw termination
@@ -279,7 +280,7 @@
 
     // have to check for the zero size channel case
-    if ( size == 0 && !prods`isEmpty ) {
-        memcpy((void *)&retval, (void *)prods`first.elem, sizeof(T));
-        wake_one( prods );
+    ZeroSize: if ( size == 0 && !prods`isEmpty ) {
+        if ( !__handle_waituntil_OR( prods ) ) break ZeroSize;
+        __prods_handoff( chan, retval );
         unlock( mutex_lock );
         return retval;
@@ -287,5 +288,5 @@
 
     // wait if buffer is empty, work will be completed by someone else
-    if (count == 0) {
+    if ( count == 0 ) {
         #ifdef CHAN_STATS
         blocks++;
@@ -299,7 +300,184 @@
     // Remove from buffer
     __do_remove( chan, retval );
-
     unlock( mutex_lock );
     return retval;
 }
+
+///////////////////////////////////////////////////////////////////////////////////////////
+// The following is support for waituntil (select) statements
+///////////////////////////////////////////////////////////////////////////////////////////
+static inline bool unregister_chan( channel(T) & chan, select_node & node ) with(chan) {
+    if ( !node`isListed && !node.park_counter ) return false; // handle special OR case
+    lock( mutex_lock );
+    if ( node`isListed ) { // op wasn't performed
+        #ifdef CHAN_STATS
+        operations--;
+        #endif
+        remove( node );
+        unlock( mutex_lock );
+        return false;
+    }
+    unlock( mutex_lock );
+
+    // only return true when not special OR case, not exceptional calse and status is SAT
+    return ( node.extra == 0p || !node.park_counter ) ? false : *node.clause_status == __SELECT_SAT;
+}
+
+// type used by select statement to capture a chan read as the selected operation
+struct chan_read {
+    channel(T) & chan;
+    T & ret;
+};
+
+static inline void ?{}( chan_read(T) & cr, channel(T) & chan, T & ret ) {
+    &cr.chan = &chan;
+    &cr.ret = &ret;
+}
+static inline chan_read(T) ?<<?( T & ret, channel(T) & chan ) { chan_read(T) cr{ chan, ret }; return cr; }
+
+static inline void __handle_select_closed_read( chan_read(T) & this, select_node & node ) with(this.chan, this) {
+    __closed_remove( chan, ret );
+    // if we get here then the insert succeeded
+    __make_select_node_available( node );
+}
+
+static inline bool register_select( chan_read(T) & this, select_node & node ) with(this.chan, this) {
+    // mutex(sout) sout | "register_read";
+    lock( mutex_lock );
+    node.extra = &ret; // set .extra so that if it == 0p later in on_selected it is due to channel close
+
+    #ifdef CHAN_STATS
+    if ( !closed ) operations++;
+    #endif
+
+    // check if we can complete operation. If so race to establish winner in special OR case
+    if ( !node.park_counter && ( count != 0 || !prods`isEmpty || unlikely(closed) ) ) {
+        if ( !__make_select_node_available( node ) ) { // we didn't win the race so give up on registering
+           unlock( mutex_lock );
+           return false;
+        }
+    }
+
+    if ( unlikely(closed) ) {
+        unlock( mutex_lock );
+        __handle_select_closed_read( this, node );
+        return true;
+    }
+
+    // have to check for the zero size channel case
+    ZeroSize: if ( size == 0 && !prods`isEmpty ) {
+        if ( !__handle_waituntil_OR( prods ) ) break ZeroSize;
+        __prods_handoff( chan, ret );
+        __set_avail_then_unlock( node, mutex_lock );
+        return true;
+    }
+
+    // wait if buffer is empty, work will be completed by someone else
+    if ( count == 0 ) {
+        #ifdef CHAN_STATS
+        blocks++;
+        #endif
+        
+        insert_last( cons, node );
+        unlock( mutex_lock );
+        return false;
+    }
+
+    // Remove from buffer
+    __do_remove( chan, ret );
+    __set_avail_then_unlock( node, mutex_lock );
+    return true;
+}
+static inline bool unregister_select( chan_read(T) & this, select_node & node ) { return unregister_chan( this.chan, node ); }
+static inline bool on_selected( chan_read(T) & this, select_node & node ) with(this) {
+    if ( node.extra == 0p ) // check if woken up due to closed channel
+        __closed_remove( chan, ret );
+    // This is only reachable if not closed or closed exception was handled
+    return true;
+}
+
+// type used by select statement to capture a chan write as the selected operation
+struct chan_write {
+    channel(T) & chan;
+    T elem;
+};
+
+static inline void ?{}( chan_write(T) & cw, channel(T) & chan, T elem ) {
+    &cw.chan = &chan;
+    memcpy( (void *)&cw.elem, (void *)&elem, sizeof(T) );
+}
+static inline chan_write(T) ?>>?( T elem, channel(T) & chan ) { chan_write(T) cw{ chan, elem }; return cw; }
+
+static inline void __handle_select_closed_write( chan_write(T) & this, select_node & node ) with(this.chan, this) {
+    __closed_insert( chan, elem );
+    // if we get here then the insert succeeded
+    __make_select_node_available( node );
+}
+
+static inline bool register_select( chan_write(T) & this, select_node & node ) with(this.chan, this) {
+    // mutex(sout) sout | "register_write";
+    lock( mutex_lock );
+    node.extra = &elem; // set .extra so that if it == 0p later in on_selected it is due to channel close
+
+    #ifdef CHAN_STATS
+    if ( !closed ) operations++;
+    #endif
+
+    // check if we can complete operation. If so race to establish winner in special OR case
+    if ( !node.park_counter && ( count != size || !cons`isEmpty || unlikely(closed) ) ) {
+        if ( !__make_select_node_available( node ) ) { // we didn't win the race so give up on registering
+           unlock( mutex_lock );
+           return false;
+        }
+    }
+
+    // if closed handle
+    if ( unlikely(closed) ) {
+        unlock( mutex_lock );
+        __handle_select_closed_write( this, node );
+        return true;
+    }
+
+    // handle blocked consumer case via handoff (buffer is implicitly empty)
+    ConsEmpty: if ( !cons`isEmpty ) {
+        if ( !__handle_waituntil_OR( cons ) ) {
+            // mutex(sout) sout | "empty";
+            break ConsEmpty;
+        }
+        // mutex(sout) sout | "signal";
+        __cons_handoff( chan, elem );
+        __set_avail_then_unlock( node, mutex_lock );
+        return true;
+    }
+
+    // insert node in list if buffer is full, work will be completed by someone else
+    if ( count == size ) {
+        #ifdef CHAN_STATS
+        blocks++;
+        #endif
+
+        insert_last( prods, node );
+        unlock( mutex_lock );
+        return false;
+    } // if
+
+    // otherwise carry out write either via normal insert
+    __buf_insert( chan, elem );
+    __set_avail_then_unlock( node, mutex_lock );
+    return true;
+}
+static inline bool unregister_select( chan_write(T) & this, select_node & node ) { return unregister_chan( this.chan, node ); }
+
+static inline bool on_selected( chan_write(T) & this, select_node & node ) with(this) { 
+    if ( node.extra == 0p ) // check if woken up due to closed channel
+        __closed_insert( chan, elem );
+
+    // This is only reachable if not closed or closed exception was handled
+    return true;
+}
+
+
 } // forall( T )
+
+
+
Index: libcfa/src/concurrency/future.hfa
===================================================================
--- libcfa/src/concurrency/future.hfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/future.hfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -19,4 +19,5 @@
 #include "monitor.hfa"
 #include "select.hfa"
+#include "locks.hfa"
 
 //----------------------------------------------------------------------------
@@ -26,5 +27,5 @@
 //  future_t is lockfree and uses atomics which aren't needed given we use locks here
 forall( T ) {
-    // enum(int) { FUTURE_EMPTY = 0, FUTURE_FULFILLED = 1 }; // Enums seem to be broken so feel free to add this back afterwards
+    // enum { FUTURE_EMPTY = 0, FUTURE_FULFILLED = 1 }; // Enums seem to be broken so feel free to add this back afterwards
 
     // temporary enum replacement
@@ -44,6 +45,4 @@
     };
 
-    // C_TODO: perhaps allow exceptions to be inserted like uC++?
-
 	static inline {
 
@@ -82,11 +81,12 @@
         void _internal_flush( future(T) & this ) with(this) {
             while( ! waiters`isEmpty ) {
+                if ( !__handle_waituntil_OR( waiters ) ) // handle special waituntil OR case
+                    break; // if handle_OR returns false then waiters is empty so break
                 select_node &s = try_pop_front( waiters );
 
-                if ( s.race_flag == 0p )
+                if ( s.clause_status == 0p )
                     // poke in result so that woken threads do not need to reacquire any locks
-                    // *(((future_node(T) &)s).my_result) = result;
                     copy_T( result, *(((future_node(T) &)s).my_result) );
-                else if ( !install_select_winner( s, &this ) ) continue;
+                else if ( !__make_select_node_available( s ) ) continue;
                 
                 // only unpark if future is not selected
@@ -97,5 +97,5 @@
 
 		// Fulfil the future, returns whether or not someone was unblocked
-		bool fulfil( future(T) & this, T & val ) with(this) {
+		bool fulfil( future(T) & this, T val ) with(this) {
             lock( lock );
             if( state != FUTURE_EMPTY )
@@ -153,31 +153,36 @@
         }
 
-        void * register_select( future(T) & this, select_node & s ) with(this) {
-            lock( lock );
-
-            // future not ready -> insert select node and return 0p
+        bool register_select( future(T) & this, select_node & s ) with(this) {
+            lock( lock );
+
+            // check if we can complete operation. If so race to establish winner in special OR case
+            if ( !s.park_counter && state != FUTURE_EMPTY ) { 
+                if ( !__make_select_node_available( s ) ) { // we didn't win the race so give up on registering
+                    unlock( lock );
+                    return false;
+                }
+            }
+
+            // future not ready -> insert select node and return
             if( state == FUTURE_EMPTY ) {
                 insert_last( waiters, s );
                 unlock( lock );
-                return 0p;
-            }
-
-            // future ready and we won race to install it as the select winner return 1p
-            if ( install_select_winner( s, &this ) ) {
-                unlock( lock );
-                return 1p;
-            }
-
-            unlock( lock );
-            // future ready and we lost race to install it as the select winner
-            return 2p;
-        }
-
-        void unregister_select( future(T) & this, select_node & s ) with(this) {
+                return false;
+            }
+
+            __make_select_node_available( s );
+            unlock( lock );
+            return true;
+        }
+
+        bool unregister_select( future(T) & this, select_node & s ) with(this) {
+            if ( ! s`isListed ) return false;
             lock( lock );
             if ( s`isListed ) remove( s );
             unlock( lock );
+            return false;
         }
 		
+        bool on_selected( future(T) & this, select_node & node ) { return true; }
 	}
 }
@@ -186,5 +191,5 @@
 // These futures below do not support select statements so they may not be as useful as 'future'
 //  however the 'single_future' is cheap and cheerful and is most likely more performant than 'future'
-//  since it uses raw atomics and no locks afaik
+//  since it uses raw atomics and no locks
 //
 // As far as 'multi_future' goes I can't see many use cases as it will be less performant than 'future'
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/invoke.h	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -217,8 +217,9 @@
 		struct __thread_user_link cltr_link;
 
-		// used to point to this thd's current clh node
-		volatile bool * clh_node;
-
 		struct processor * last_proc;
+
+        // ptr used during handover between blocking lists to allow for stack allocation of intrusive nodes
+        // main use case is wait-morphing to allow a different node to be used to block on condvar vs lock
+        void * link_node;
 
 		PRNG_STATE_T random_state;						// fast random numbers
Index: libcfa/src/concurrency/locks.cfa
===================================================================
--- libcfa/src/concurrency/locks.cfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/locks.cfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -79,20 +79,17 @@
 	// lock is held by some other thread
 	if ( owner != 0p && owner != thrd ) {
-		insert_last( blocked_threads, *thrd );
+        select_node node;
+		insert_last( blocked_threads, node );
 		wait_count++;
 		unlock( lock );
 		park( );
-	}
-	// multi acquisition lock is held by current thread
-	else if ( owner == thrd && multi_acquisition ) {
+        return;
+	} else if ( owner == thrd && multi_acquisition ) { // multi acquisition lock is held by current thread
 		recursion_count++;
-		unlock( lock );
-	}
-	// lock isn't held
-	else {
+	} else {  // lock isn't held
 		owner = thrd;
 		recursion_count = 1;
-		unlock( lock );
-	}
+	}
+    unlock( lock );
 }
 
@@ -117,10 +114,25 @@
 }
 
-static void pop_and_set_new_owner( blocking_lock & this ) with( this ) {
-	thread$ * t = &try_pop_front( blocked_threads );
-	owner = t;
-	recursion_count = ( t ? 1 : 0 );
-	if ( t ) wait_count--;
-	unpark( t );
+// static void pop_and_set_new_owner( blocking_lock & this ) with( this ) {
+// 	thread$ * t = &try_pop_front( blocked_threads );
+// 	owner = t;
+// 	recursion_count = ( t ? 1 : 0 );
+// 	if ( t ) wait_count--;
+// 	unpark( t );
+// }
+
+static inline void pop_node( blocking_lock & this ) with( this ) {
+    __handle_waituntil_OR( blocked_threads );
+    select_node * node = &try_pop_front( blocked_threads );
+    if ( node ) {
+        wait_count--;
+        owner = node->blocked_thread;
+        recursion_count = 1;
+        // if ( !node->clause_status || __make_select_node_available( *node ) ) unpark( node->blocked_thread );
+        wake_one( blocked_threads, *node );
+    } else {
+        owner = 0p;
+        recursion_count = 0;
+    }
 }
 
@@ -134,5 +146,5 @@
 	recursion_count--;
 	if ( recursion_count == 0 ) {
-		pop_and_set_new_owner( this );
+		pop_node( this );
 	}
 	unlock( lock );
@@ -147,7 +159,6 @@
 	// lock held
 	if ( owner != 0p ) {
-		insert_last( blocked_threads, *t );
+		insert_last( blocked_threads, *(select_node *)t->link_node );
 		wait_count++;
-		unlock( lock );
 	}
 	// lock not held
@@ -156,6 +167,6 @@
 		recursion_count = 1;
 		unpark( t );
-		unlock( lock );
-	}
+	}
+    unlock( lock );
 }
 
@@ -167,6 +178,12 @@
 	size_t ret = recursion_count;
 
-	pop_and_set_new_owner( this );
+	pop_node( this );
+
+    select_node node;
+    active_thread()->link_node = (void *)&node;
 	unlock( lock );
+
+    park();
+
 	return ret;
 }
@@ -175,4 +192,60 @@
 	recursion_count = recursion;
 }
+
+// waituntil() support
+bool register_select( blocking_lock & this, select_node & node ) with(this) {
+    lock( lock __cfaabi_dbg_ctx2 );
+	thread$ * thrd = active_thread();
+
+	// single acquisition lock is held by current thread
+	/* paranoid */ verifyf( owner != thrd || multi_acquisition, "Single acquisition lock holder (%p) attempted to reacquire the lock %p resulting in a deadlock.", owner, &this );
+
+    if ( !node.park_counter && ( (owner == thrd && multi_acquisition) || owner == 0p ) ) { // OR special case
+        if ( !__make_select_node_available( node ) ) { // we didn't win the race so give up on registering
+           unlock( lock );
+           return false;
+        }
+    }
+
+	// lock is held by some other thread
+	if ( owner != 0p && owner != thrd ) {
+		insert_last( blocked_threads, node );
+		wait_count++;
+		unlock( lock );
+        return false;
+	} else if ( owner == thrd && multi_acquisition ) { // multi acquisition lock is held by current thread
+		recursion_count++;
+	} else {  // lock isn't held
+		owner = thrd;
+		recursion_count = 1;
+	}
+
+    if ( node.park_counter ) __make_select_node_available( node );
+    unlock( lock );
+    return true;
+}
+
+bool unregister_select( blocking_lock & this, select_node & node ) with(this) {
+    lock( lock __cfaabi_dbg_ctx2 );
+    if ( node`isListed ) {
+        remove( node );
+        wait_count--;
+        unlock( lock );
+        return false;
+    }
+    
+    if ( owner == active_thread() ) {
+        /* paranoid */ verifyf( recursion_count == 1 || multi_acquisition, "Thread %p attempted to unlock owner lock %p in waituntil unregister, which is not recursive but has a recursive count of %zu", active_thread(), &this, recursion_count );
+        // if recursion count is zero release lock and set new owner if one is waiting
+        recursion_count--;
+        if ( recursion_count == 0 ) {
+            pop_node( this );
+        }
+    }
+	unlock( lock );
+    return false;
+}
+
+bool on_selected( blocking_lock & this, select_node & node ) { return true; }
 
 //-----------------------------------------------------------------------------
@@ -311,27 +384,38 @@
 	int counter( condition_variable(L) & this ) with(this) { return count; }
 
-	static size_t queue_and_get_recursion( condition_variable(L) & this, info_thread(L) * i ) with(this) {
+	static void enqueue_thread( condition_variable(L) & this, info_thread(L) * i ) with(this) {
 		// add info_thread to waiting queue
 		insert_last( blocked_threads, *i );
 		count++;
-		size_t recursion_count = 0;
-		if (i->lock) {
+		// size_t recursion_count = 0;
+		// if (i->lock) {
+		// 	// if lock was passed get recursion count to reset to after waking thread
+		// 	recursion_count = on_wait( *i->lock );
+		// }
+		// return recursion_count;
+	}
+
+    static size_t block_and_get_recursion( info_thread(L) & i ) {
+        size_t recursion_count = 0;
+		if ( i.lock ) {
 			// if lock was passed get recursion count to reset to after waking thread
-			recursion_count = on_wait( *i->lock );
-		}
-		return recursion_count;
-	}
+			recursion_count = on_wait( *i.lock ); // this call blocks
+		} else park( );
+        return recursion_count;
+    }
 
 	// helper for wait()'s' with no timeout
 	static void queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) {
 		lock( lock __cfaabi_dbg_ctx2 );
-		size_t recursion_count = queue_and_get_recursion(this, &i);
+        enqueue_thread( this, &i );
+		// size_t recursion_count = queue_and_get_recursion( this, &i );
 		unlock( lock );
 
 		// blocks here
-		park( );
+        size_t recursion_count = block_and_get_recursion( i );
+		// park( );
 
 		// resets recursion count here after waking
-		if (i.lock) on_wakeup(*i.lock, recursion_count);
+		if ( i.lock ) on_wakeup( *i.lock, recursion_count );
 	}
 
@@ -343,5 +427,6 @@
 	static void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) {
 		lock( lock __cfaabi_dbg_ctx2 );
-		size_t recursion_count = queue_and_get_recursion(this, &info);
+        enqueue_thread( this, &info );
+		// size_t recursion_count = queue_and_get_recursion( this, &info );
 		alarm_node_wrap(L) node_wrap = { t, 0`s, callback, &this, &info };
 		unlock( lock );
@@ -351,5 +436,6 @@
 
 		// blocks here
-		park();
+        size_t recursion_count = block_and_get_recursion( info );
+		// park();
 
 		// unregisters alarm so it doesn't go off if this happens first
@@ -357,5 +443,5 @@
 
 		// resets recursion count here after waking
-		if (info.lock) on_wakeup(*info.lock, recursion_count);
+		if ( info.lock ) on_wakeup( *info.lock, recursion_count );
 	}
 
@@ -417,6 +503,6 @@
 		info_thread( L ) i = { active_thread(), info, &l };
 		insert_last( blocked_threads, i );
-		size_t recursion_count = on_wait( *i.lock );
-		park( );
+		size_t recursion_count = on_wait( *i.lock ); // blocks here
+		// park( );
 		on_wakeup(*i.lock, recursion_count);
 	}
@@ -459,15 +545,17 @@
 	bool empty ( pthread_cond_var(L) & this ) with(this) { return blocked_threads`isEmpty; }
 
-	static size_t queue_and_get_recursion( pthread_cond_var(L) & this, info_thread(L) * i ) with(this) {
-		// add info_thread to waiting queue
-		insert_last( blocked_threads, *i );
-		size_t recursion_count = 0;
-		recursion_count = on_wait( *i->lock );
-		return recursion_count;
-	}
+	// static size_t queue_and_get_recursion( pthread_cond_var(L) & this, info_thread(L) * i ) with(this) {
+	// 	// add info_thread to waiting queue
+	// 	insert_last( blocked_threads, *i );
+	// 	size_t recursion_count = 0;
+	// 	recursion_count = on_wait( *i->lock );
+	// 	return recursion_count;
+	// }
+
 	
 	static void queue_info_thread_timeout( pthread_cond_var(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) {
 		lock( lock __cfaabi_dbg_ctx2 );
-		size_t recursion_count = queue_and_get_recursion(this, &info);
+		// size_t recursion_count = queue_and_get_recursion(this, &info);
+        insert_last( blocked_threads, info );
 		pthread_alarm_node_wrap(L) node_wrap = { t, 0`s, callback, &this, &info };
 		unlock( lock );
@@ -477,5 +565,6 @@
 
 		// blocks here
-		park();
+        size_t recursion_count = block_and_get_recursion( info );
+		// park();
 
 		// unregisters alarm so it doesn't go off if this happens first
@@ -483,5 +572,5 @@
 
 		// resets recursion count here after waking
-		if (info.lock) on_wakeup(*info.lock, recursion_count);
+		if ( info.lock ) on_wakeup( *info.lock, recursion_count );
 	}
 
@@ -493,8 +582,12 @@
 		lock( lock __cfaabi_dbg_ctx2 );
 		info_thread( L ) i = { active_thread(), info, &l };
-		size_t recursion_count = queue_and_get_recursion(this, &i);
-		unlock( lock );
-		park( );
-		on_wakeup(*i.lock, recursion_count);
+        insert_last( blocked_threads, i );
+		// size_t recursion_count = queue_and_get_recursion( this, &i );
+		unlock( lock );
+
+        // blocks here
+		size_t recursion_count = block_and_get_recursion( i );
+		// park();
+		on_wakeup( *i.lock, recursion_count );
 	}
 
Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/locks.hfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -30,4 +30,6 @@
 #include "time.hfa"
 
+#include "select.hfa"
+
 #include <fstream.hfa>
 
@@ -70,4 +72,7 @@
 static inline void   on_wakeup( single_acquisition_lock & this, size_t v ) { on_wakeup ( (blocking_lock &)this, v ); }
 static inline void   on_notify( single_acquisition_lock & this, struct thread$ * t ) { on_notify( (blocking_lock &)this, t ); }
+static inline bool   register_select( single_acquisition_lock & this, select_node & node ) { return register_select( (blocking_lock &)this, node ); }
+static inline bool   unregister_select( single_acquisition_lock & this, select_node & node ) { return unregister_select( (blocking_lock &)this, node ); }
+static inline bool   on_selected( single_acquisition_lock & this, select_node & node ) { return on_selected( (blocking_lock &)this, node ); }
 
 //----------
@@ -84,4 +89,7 @@
 static inline void   on_wakeup( owner_lock & this, size_t v ) { on_wakeup ( (blocking_lock &)this, v ); }
 static inline void   on_notify( owner_lock & this, struct thread$ * t ) { on_notify( (blocking_lock &)this, t ); }
+static inline bool   register_select( owner_lock & this, select_node & node ) { return register_select( (blocking_lock &)this, node ); }
+static inline bool   unregister_select( owner_lock & this, select_node & node ) { return unregister_select( (blocking_lock &)this, node ); }
+static inline bool   on_selected( owner_lock & this, select_node & node ) { return on_selected( (blocking_lock &)this, node ); }
 
 //-----------------------------------------------------------------------------
@@ -180,5 +188,5 @@
 
 // if this is called recursively IT WILL DEADLOCK!!!!!
-static inline void lock(futex_mutex & this) with(this) {
+static inline void lock( futex_mutex & this ) with(this) {
 	int state;
 
@@ -190,7 +198,4 @@
 		for (int i = 0; i < spin; i++) Pause();
 	}
-
-	// // no contention try to acquire
-	// if (internal_try_lock(this, state)) return;
 	
 	// if not in contended state, set to be in contended state
@@ -213,5 +218,5 @@
 
 static inline void on_notify( futex_mutex & f, thread$ * t){ unpark(t); }
-static inline size_t on_wait( futex_mutex & f ) {unlock(f); return 0;}
+static inline size_t on_wait( futex_mutex & f ) { unlock(f); park(); return 0; }
 
 // to set recursion count after getting signalled;
@@ -244,5 +249,5 @@
 
 // if this is called recursively IT WILL DEADLOCK!!!!!
-static inline void lock(go_mutex & this) with(this) {
+static inline void lock( go_mutex & this ) with( this ) {
 	int state, init_state;
 
@@ -255,5 +260,5 @@
             while( !val ) { // lock unlocked
                 state = 0;
-                if (internal_try_lock(this, state, init_state)) return;
+                if ( internal_try_lock( this, state, init_state ) ) return;
             }
             for (int i = 0; i < 30; i++) Pause();
@@ -262,13 +267,13 @@
         while( !val ) { // lock unlocked
             state = 0;
-            if (internal_try_lock(this, state, init_state)) return;
+            if ( internal_try_lock( this, state, init_state ) ) return;
         }
         sched_yield();
         
         // if not in contended state, set to be in contended state
-        state = internal_exchange(this, 2);
+        state = internal_exchange( this, 2 );
         if ( !state ) return; // state == 0
         init_state = 2;
-        futex((int*)&val, FUTEX_WAIT, 2); // if val is not 2 this returns with EWOULDBLOCK
+        futex( (int*)&val, FUTEX_WAIT, 2 ); // if val is not 2 this returns with EWOULDBLOCK
     }
 }
@@ -276,43 +281,13 @@
 static inline void unlock( go_mutex & this ) with(this) {
 	// if uncontended do atomic unlock and then return
-    if (__atomic_exchange_n(&val, 0, __ATOMIC_RELEASE) == 1) return;
+    if ( __atomic_exchange_n(&val, 0, __ATOMIC_RELEASE) == 1 ) return;
 	
 	// otherwise threads are blocked so we must wake one
-	futex((int *)&val, FUTEX_WAKE, 1);
-}
-
-static inline void on_notify( go_mutex & f, thread$ * t){ unpark(t); }
-static inline size_t on_wait( go_mutex & f ) {unlock(f); return 0;}
+	futex( (int *)&val, FUTEX_WAKE, 1 );
+}
+
+static inline void on_notify( go_mutex & f, thread$ * t){ unpark( t ); }
+static inline size_t on_wait( go_mutex & f ) { unlock( f ); park(); return 0; }
 static inline void on_wakeup( go_mutex & f, size_t recursion ) {}
-
-//-----------------------------------------------------------------------------
-// CLH Spinlock
-// - No recursive acquisition
-// - Needs to be released by owner
-
-struct clh_lock {
-	volatile bool * volatile tail;
-    volatile bool * volatile head;
-};
-
-static inline void  ?{}( clh_lock & this ) { this.tail = malloc(); *this.tail = true; }
-static inline void ^?{}( clh_lock & this ) { free(this.tail); }
-
-static inline void lock(clh_lock & l) {
-	thread$ * curr_thd = active_thread();
-	*(curr_thd->clh_node) = false;
-	volatile bool * prev = __atomic_exchange_n((bool **)(&l.tail), (bool *)(curr_thd->clh_node), __ATOMIC_SEQ_CST);
-	while(!__atomic_load_n(prev, __ATOMIC_SEQ_CST)) Pause();
-    __atomic_store_n((bool **)(&l.head), (bool *)curr_thd->clh_node, __ATOMIC_SEQ_CST);
-    curr_thd->clh_node = prev;
-}
-
-static inline void unlock(clh_lock & l) {
-	__atomic_store_n((bool *)(l.head), true, __ATOMIC_SEQ_CST);
-}
-
-static inline void on_notify(clh_lock & this, struct thread$ * t ) { unpark(t); }
-static inline size_t on_wait(clh_lock & this) { unlock(this); return 0; }
-static inline void on_wakeup(clh_lock & this, size_t recursion ) { lock(this); }
 
 //-----------------------------------------------------------------------------
@@ -337,15 +312,15 @@
 static inline void  ^?{}( exp_backoff_then_block_lock & this ){}
 
-static inline bool internal_try_lock(exp_backoff_then_block_lock & this, size_t & compare_val) with(this) {
+static inline bool internal_try_lock( exp_backoff_then_block_lock & this, size_t & compare_val ) with(this) {
 	return __atomic_compare_exchange_n(&lock_value, &compare_val, 1, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
 }
 
-static inline bool try_lock(exp_backoff_then_block_lock & this) { size_t compare_val = 0; return internal_try_lock(this, compare_val); }
-
-static inline bool try_lock_contention(exp_backoff_then_block_lock & this) with(this) {
-	return !__atomic_exchange_n(&lock_value, 2, __ATOMIC_ACQUIRE);
-}
-
-static inline bool block(exp_backoff_then_block_lock & this) with(this) {
+static inline bool try_lock( exp_backoff_then_block_lock & this ) { size_t compare_val = 0; return internal_try_lock( this, compare_val ); }
+
+static inline bool try_lock_contention( exp_backoff_then_block_lock & this ) with(this) {
+	return !__atomic_exchange_n( &lock_value, 2, __ATOMIC_ACQUIRE );
+}
+
+static inline bool block( exp_backoff_then_block_lock & this ) with(this) {
     lock( spinlock __cfaabi_dbg_ctx2 );
     if (__atomic_load_n( &lock_value, __ATOMIC_SEQ_CST) != 2) {
@@ -359,5 +334,5 @@
 }
 
-static inline void lock(exp_backoff_then_block_lock & this) with(this) {
+static inline void lock( exp_backoff_then_block_lock & this ) with(this) {
 	size_t compare_val = 0;
 	int spin = 4;
@@ -378,5 +353,5 @@
 }
 
-static inline void unlock(exp_backoff_then_block_lock & this) with(this) {
+static inline void unlock( exp_backoff_then_block_lock & this ) with(this) {
     if (__atomic_exchange_n(&lock_value, 0, __ATOMIC_RELEASE) == 1) return;
     lock( spinlock __cfaabi_dbg_ctx2 );
@@ -386,7 +361,7 @@
 }
 
-static inline void on_notify(exp_backoff_then_block_lock & this, struct thread$ * t ) { unpark(t); }
-static inline size_t on_wait(exp_backoff_then_block_lock & this) { unlock(this); return 0; }
-static inline void on_wakeup(exp_backoff_then_block_lock & this, size_t recursion ) { lock(this); }
+static inline void on_notify( exp_backoff_then_block_lock & this, struct thread$ * t ) { unpark( t ); }
+static inline size_t on_wait( exp_backoff_then_block_lock & this ) { unlock( this ); park(); return 0; }
+static inline void on_wakeup( exp_backoff_then_block_lock & this, size_t recursion ) { lock( this ); }
 
 //-----------------------------------------------------------------------------
@@ -418,5 +393,5 @@
 
 // if this is called recursively IT WILL DEADLOCK!!!!!
-static inline void lock(fast_block_lock & this) with(this) {
+static inline void lock( fast_block_lock & this ) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	if ( held ) {
@@ -430,5 +405,5 @@
 }
 
-static inline void unlock(fast_block_lock & this) with(this) {
+static inline void unlock( fast_block_lock & this ) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	/* paranoid */ verifyf( held != false, "Attempt to release lock %p that isn't held", &this );
@@ -439,11 +414,11 @@
 }
 
-static inline void on_notify(fast_block_lock & this, struct thread$ * t ) with(this) {
+static inline void on_notify( fast_block_lock & this, struct thread$ * t ) with(this) {
     lock( lock __cfaabi_dbg_ctx2 );
     insert_last( blocked_threads, *t );
     unlock( lock );
 }
-static inline size_t on_wait(fast_block_lock & this) { unlock(this); return 0; }
-static inline void on_wakeup(fast_block_lock & this, size_t recursion ) { }
+static inline size_t on_wait( fast_block_lock & this) { unlock(this); park(); return 0; }
+static inline void on_wakeup( fast_block_lock & this, size_t recursion ) { }
 
 //-----------------------------------------------------------------------------
@@ -456,5 +431,5 @@
 struct simple_owner_lock {
 	// List of blocked threads
-	dlist( thread$ ) blocked_threads;
+	dlist( select_node ) blocked_threads;
 
 	// Spin lock used for mutual exclusion
@@ -477,6 +452,6 @@
 static inline void ?=?( simple_owner_lock & this, simple_owner_lock this2 ) = void;
 
-static inline void lock(simple_owner_lock & this) with(this) {
-	if (owner == active_thread()) {
+static inline void lock( simple_owner_lock & this ) with(this) {
+	if ( owner == active_thread() ) {
 		recursion_count++;
 		return;
@@ -484,6 +459,7 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 
-	if (owner != 0p) {
-		insert_last( blocked_threads, *active_thread() );
+	if ( owner != 0p ) {
+        select_node node;
+		insert_last( blocked_threads, node );
 		unlock( lock );
 		park( );
@@ -495,13 +471,19 @@
 }
 
-// TODO: fix duplicate def issue and bring this back
-// void pop_and_set_new_owner( simple_owner_lock & this ) with( this ) {
-	// thread$ * t = &try_pop_front( blocked_threads );
-	// owner = t;
-	// recursion_count = ( t ? 1 : 0 );
-	// unpark( t );
-// }
-
-static inline void unlock(simple_owner_lock & this) with(this) {
+static inline void pop_node( simple_owner_lock & this ) with(this) {
+    __handle_waituntil_OR( blocked_threads );
+    select_node * node = &try_pop_front( blocked_threads );
+    if ( node ) {
+        owner = node->blocked_thread;
+        recursion_count = 1;
+        // if ( !node->clause_status || __make_select_node_available( *node ) ) unpark( node->blocked_thread );
+        wake_one( blocked_threads, *node );
+    } else {
+        owner = 0p;
+        recursion_count = 0;
+    }
+}
+
+static inline void unlock( simple_owner_lock & this ) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	/* paranoid */ verifyf( owner != 0p, "Attempt to release lock %p that isn't held", &this );
@@ -510,18 +492,14 @@
 	recursion_count--;
 	if ( recursion_count == 0 ) {
-		// pop_and_set_new_owner( this );
-		thread$ * t = &try_pop_front( blocked_threads );
-		owner = t;
-		recursion_count = ( t ? 1 : 0 );
-		unpark( t );
+		pop_node( this );
 	}
 	unlock( lock );
 }
 
-static inline void on_notify(simple_owner_lock & this, struct thread$ * t ) with(this) {
+static inline void on_notify(simple_owner_lock & this, thread$ * t ) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	// lock held
 	if ( owner != 0p ) {
-		insert_last( blocked_threads, *t );
+		insert_last( blocked_threads, *(select_node *)t->link_node );
 	}
 	// lock not held
@@ -534,5 +512,5 @@
 }
 
-static inline size_t on_wait(simple_owner_lock & this) with(this) {
+static inline size_t on_wait( simple_owner_lock & this ) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
 	/* paranoid */ verifyf( owner != 0p, "Attempt to release lock %p that isn't held", &this );
@@ -541,16 +519,69 @@
 	size_t ret = recursion_count;
 
-	// pop_and_set_new_owner( this );
-
-	thread$ * t = &try_pop_front( blocked_threads );
-	owner = t;
-	recursion_count = ( t ? 1 : 0 );
-	unpark( t );
-
+	pop_node( this );
+
+    select_node node;
+    active_thread()->link_node = (void *)&node;
 	unlock( lock );
+    park();
+
 	return ret;
 }
 
-static inline void on_wakeup(simple_owner_lock & this, size_t recursion ) with(this) { recursion_count = recursion; }
+static inline void on_wakeup( simple_owner_lock & this, size_t recursion ) with(this) { recursion_count = recursion; }
+
+// waituntil() support
+static inline bool register_select( simple_owner_lock & this, select_node & node ) with(this) {
+    lock( lock __cfaabi_dbg_ctx2 );
+
+    // check if we can complete operation. If so race to establish winner in special OR case
+    if ( !node.park_counter && ( owner == active_thread() || owner == 0p ) ) {
+        if ( !__make_select_node_available( node ) ) { // we didn't win the race so give up on registering
+           unlock( lock );
+           return false;
+        }
+    }
+
+    if ( owner == active_thread() ) {
+		recursion_count++;
+        if ( node.park_counter ) __make_select_node_available( node );
+        unlock( lock );
+		return true;
+	}
+
+    if ( owner != 0p ) {
+		insert_last( blocked_threads, node );
+		unlock( lock );
+		return false;
+	}
+    
+	owner = active_thread();
+	recursion_count = 1;
+
+    if ( node.park_counter ) __make_select_node_available( node );
+    unlock( lock );
+    return true;
+}
+
+static inline bool unregister_select( simple_owner_lock & this, select_node & node ) with(this) {
+    lock( lock __cfaabi_dbg_ctx2 );
+    if ( node`isListed ) {
+        remove( node );
+        unlock( lock );
+        return false;
+    }
+
+    if ( owner == active_thread() ) {
+        recursion_count--;
+        if ( recursion_count == 0 ) {
+            pop_node( this );
+        }
+    }
+    unlock( lock );
+    return false;
+}
+
+static inline bool on_selected( simple_owner_lock & this, select_node & node ) { return true; }
+
 
 //-----------------------------------------------------------------------------
@@ -578,5 +609,5 @@
 
 // if this is called recursively IT WILL DEADLOCK!
-static inline void lock(spin_queue_lock & this) with(this) {
+static inline void lock( spin_queue_lock & this ) with(this) {
 	mcs_spin_node node;
 	lock( lock, node );
@@ -586,13 +617,13 @@
 }
 
-static inline void unlock(spin_queue_lock & this) with(this) {
+static inline void unlock( spin_queue_lock & this ) with(this) {
 	__atomic_store_n(&held, false, __ATOMIC_RELEASE);
 }
 
-static inline void on_notify(spin_queue_lock & this, struct thread$ * t ) {
+static inline void on_notify( spin_queue_lock & this, struct thread$ * t ) {
 	unpark(t);
 }
-static inline size_t on_wait(spin_queue_lock & this) { unlock(this); return 0; }
-static inline void on_wakeup(spin_queue_lock & this, size_t recursion ) { lock(this); }
+static inline size_t on_wait( spin_queue_lock & this ) { unlock( this ); park(); return 0; }
+static inline void on_wakeup( spin_queue_lock & this, size_t recursion ) { lock( this ); }
 
 
@@ -621,5 +652,5 @@
 
 // if this is called recursively IT WILL DEADLOCK!!!!!
-static inline void lock(mcs_block_spin_lock & this) with(this) {
+static inline void lock( mcs_block_spin_lock & this ) with(this) {
 	mcs_node node;
 	lock( lock, node );
@@ -633,7 +664,7 @@
 }
 
-static inline void on_notify(mcs_block_spin_lock & this, struct thread$ * t ) { unpark(t); }
-static inline size_t on_wait(mcs_block_spin_lock & this) { unlock(this); return 0; }
-static inline void on_wakeup(mcs_block_spin_lock & this, size_t recursion ) {lock(this); }
+static inline void on_notify( mcs_block_spin_lock & this, struct thread$ * t ) { unpark( t ); }
+static inline size_t on_wait( mcs_block_spin_lock & this) { unlock( this ); park(); return 0; }
+static inline void on_wakeup( mcs_block_spin_lock & this, size_t recursion ) {lock( this ); }
 
 //-----------------------------------------------------------------------------
@@ -661,5 +692,5 @@
 
 // if this is called recursively IT WILL DEADLOCK!!!!!
-static inline void lock(block_spin_lock & this) with(this) {
+static inline void lock( block_spin_lock & this ) with(this) {
 	lock( lock );
 	while(__atomic_load_n(&held, __ATOMIC_SEQ_CST)) Pause();
@@ -668,9 +699,9 @@
 }
 
-static inline void unlock(block_spin_lock & this) with(this) {
+static inline void unlock( block_spin_lock & this ) with(this) {
 	__atomic_store_n(&held, false, __ATOMIC_RELEASE);
 }
 
-static inline void on_notify(block_spin_lock & this, struct thread$ * t ) with(this.lock) {
+static inline void on_notify( block_spin_lock & this, struct thread$ * t ) with(this.lock) {
 	// first we acquire internal fast_block_lock
 	lock( lock __cfaabi_dbg_ctx2 );
@@ -686,6 +717,6 @@
 	unpark(t);
 }
-static inline size_t on_wait(block_spin_lock & this) { unlock(this); return 0; }
-static inline void on_wakeup(block_spin_lock & this, size_t recursion ) with(this) {
+static inline size_t on_wait( block_spin_lock & this ) { unlock( this ); park(); return 0; }
+static inline void on_wakeup( block_spin_lock & this, size_t recursion ) with(this) {
 	// now we acquire the entire block_spin_lock upon waking up
 	while(__atomic_load_n(&held, __ATOMIC_SEQ_CST)) Pause();
@@ -714,8 +745,4 @@
 forall(L & | is_blocking_lock(L)) {
 	struct info_thread;
-
-	// // for use by sequence
-	// info_thread(L) *& Back( info_thread(L) * this );
-	// info_thread(L) *& Next( info_thread(L) * this );
 }
 
Index: libcfa/src/concurrency/mutex_stmt.hfa
===================================================================
--- libcfa/src/concurrency/mutex_stmt.hfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/mutex_stmt.hfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -15,5 +15,4 @@
 };
 
-
 struct __mutex_stmt_lock_guard {
     void ** lockarr;
@@ -30,24 +29,6 @@
 
 forall(L & | is_lock(L)) {
-
-    struct scoped_lock {
-        L * internal_lock;
-    };
-
-    static inline void ?{}( scoped_lock(L) & this, L & internal_lock ) {
-        this.internal_lock = &internal_lock;
-        lock(internal_lock);
-    }
-    
-    static inline void ^?{}( scoped_lock(L) & this ) with(this) {
-        unlock(*internal_lock);
-    }
-
-    static inline void * __get_mutexstmt_lock_ptr( L & this ) {
-        return &this;
-    }
-
-    static inline L __get_mutexstmt_lock_type( L & this );
-
-    static inline L __get_mutexstmt_lock_type( L * this );
+    static inline void * __get_mutexstmt_lock_ptr( L & this ) { return &this; }
+    static inline L __get_mutexstmt_lock_type( L & this ) {}
+    static inline L __get_mutexstmt_lock_type( L * this ) {}
 }
Index: libcfa/src/concurrency/select.hfa
===================================================================
--- libcfa/src/concurrency/select.hfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/select.hfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -2,62 +2,128 @@
 
 #include "containers/list.hfa"
-#include <stdint.h>
-#include <kernel.hfa>
-#include <locks.hfa>
+#include "stdint.h"
+#include "kernel.hfa"
 
+struct select_node;
+
+// node status
+static const unsigned long int __SELECT_UNSAT = 0;
+static const unsigned long int __SELECT_SAT = 1;
+static const unsigned long int __SELECT_RUN = 2;
+
+static inline bool __CFA_has_clause_run( unsigned long int status ) { return status == __SELECT_RUN; }
+static inline void __CFA_maybe_park( int * park_counter ) {
+    if ( __atomic_sub_fetch( park_counter, 1, __ATOMIC_SEQ_CST) < 0 )
+        park();
+}
+
+// node used for coordinating waituntil synchronization
 struct select_node {
+    int * park_counter;                 // If this is 0p then the node is in a special OR case waituntil
+    unsigned long int * clause_status;  // needs to point at ptr sized location, if this is 0p then node is not part of a waituntil
+
+    void * extra;                       // used to store arbitrary data needed by some primitives
+
     thread$ * blocked_thread;
-    void ** race_flag;
     inline dlink(select_node);
 };
 P9_EMBEDDED( select_node, dlink(select_node) )
 
-void ?{}( select_node & this ) {
-    this.blocked_thread = 0p;
-    this.race_flag = 0p;
+static inline void ?{}( select_node & this ) {
+    this.blocked_thread = active_thread();
+    this.clause_status = 0p;
+    this.park_counter = 0p;
+    this.extra = 0p;
 }
 
-void ?{}( select_node & this, thread$ * blocked_thread ) {
+static inline void ?{}( select_node & this, thread$ * blocked_thread ) {
     this.blocked_thread = blocked_thread;
-    this.race_flag = 0p;
+    this.clause_status = 0p;
+    this.park_counter = 0p;
+    this.extra = 0p;
 }
 
-void ?{}( select_node & this, thread$ * blocked_thread, void ** race_flag ) {
+static inline void ?{}( select_node & this, thread$ * blocked_thread, void * extra ) {
     this.blocked_thread = blocked_thread;
-    this.race_flag = race_flag;
+    this.clause_status = 0p;
+    this.park_counter = 0p;
+    this.extra = extra;
 }
 
-void ^?{}( select_node & this ) {}
+static inline void ^?{}( select_node & this ) {}
 
+static inline unsigned long int * __get_clause_status( select_node & s ) { return s.clause_status; }
 
 //-----------------------------------------------------------------------------
 // is_selectable
-trait is_selectable(T & | sized(T)) {
-    // For registering a select on a selectable concurrency primitive
-    // return 0p if primitive not accessible yet
-    // return 1p if primitive gets acquired
-    // return 2p if primitive is accessible but some other primitive won the race
-    // C_TODO: add enum for return values
-    void * register_select( T &, select_node & );
+forall(T & | sized(T))
+trait is_selectable {
+    // For registering a select stmt on a selectable concurrency primitive
+    // Returns bool that indicates if operation is already SAT
+    bool register_select( T &, select_node & );
 
-    void unregister_select( T &, select_node &  );
+    // For unregistering a select stmt on a selectable concurrency primitive
+    // If true is returned then the corresponding code block is run (only in non-special OR case and only if node status is not RUN)
+    bool unregister_select( T &, select_node &  );
+
+    // This routine is run on the selecting thread prior to executing the statement corresponding to the select_node
+    //    passed as an arg to this routine
+    // If on_selected returns false, the statement is not run, if it returns true it is run.
+    bool on_selected( T &, select_node & );
 };
 
-static inline bool install_select_winner( select_node & this, void * primitive_ptr ) with(this) {
-    // temporary needed for atomic instruction
-    void * cmp_flag = 0p;
-    
-    // if we dont win the selector race we need to potentially 
-    //   ignore this node and move to the next one so we return accordingly
-    if ( *race_flag != 0p || 
-        !__atomic_compare_exchange_n(
-            race_flag, 
-            &cmp_flag, 
-            primitive_ptr, 
-            false,
-            __ATOMIC_SEQ_CST,
-            __ATOMIC_SEQ_CST
-        )
-    ) return false; // lost race and some other node triggered select
-    return true; // won race so this node is what the select proceeds with
+// this is used inside the compiler to attempt to establish an else clause as a winner in the OR special case race
+static inline bool __select_node_else_race( select_node & this ) with( this ) {
+    unsigned long int cmp_status = __SELECT_UNSAT;
+    return *clause_status == 0 
+            && __atomic_compare_exchange_n( clause_status, &cmp_status, 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST );
 }
+
+// when a primitive becomes available it calls the following routine on it's node to update the select state:
+// return true if we want to unpark the thd
+static inline bool __make_select_node_available( select_node & this ) with( this ) {
+    unsigned long int cmp_status = __SELECT_UNSAT;
+
+    if( !park_counter ) 
+        return *clause_status == 0 
+            && __atomic_compare_exchange_n( clause_status, &cmp_status, (unsigned long int)&this, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ); // OR specific case where race was won
+
+    return *clause_status == 0
+        && __atomic_compare_exchange_n( clause_status, &cmp_status, __SELECT_SAT, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) // can maybe just use atomic write
+        && !__atomic_add_fetch( park_counter, 1, __ATOMIC_SEQ_CST);
+}
+
+// Handles the special OR case of the waituntil statement
+// Since only one select node can win in the OR case, we need to race to set the node available BEFORE
+//    performing the operation since if we lose the race the operation should not be performed as it will be lost
+// Returns true if execution can continue normally and false if the queue has now been drained
+static inline bool __handle_waituntil_OR( dlist( select_node ) & queue ) {
+    if ( queue`isEmpty ) return false;
+    if ( queue`first.clause_status && !queue`first.park_counter ) {
+        while ( !queue`isEmpty ) {
+            // if node not a special OR case or if we win the special OR case race break
+            if ( !queue`first.clause_status || queue`first.park_counter || __make_select_node_available( queue`first ) ) { return true; }
+            // otherwise we lost the special OR race so discard node
+            try_pop_front( queue );
+        }
+        return false;
+    }
+    return true;
+}
+
+// wake one thread from the list
+static inline void wake_one( dlist( select_node ) & queue, select_node & popped ) {
+    if ( !popped.clause_status                              // normal case, node is not a select node
+        || ( popped.clause_status && !popped.park_counter ) // If popped link is special case OR selecting unpark but don't call __make_select_node_available
+        || __make_select_node_available( popped ) )         // check if popped link belongs to a selecting thread
+        unpark( popped.blocked_thread );
+}
+
+static inline void wake_one( dlist( select_node ) & queue ) { wake_one( queue, try_pop_front( queue ) ); }
+
+static inline void setup_clause( select_node & this, unsigned long int * clause_status, int * park_counter ) {
+    this.blocked_thread = active_thread();
+    this.clause_status = clause_status;
+    this.park_counter = park_counter;
+}
+
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision bd72c28427f616ee95dc08f6845fe25cfead577d)
+++ libcfa/src/concurrency/thread.cfa	(revision e20c0afc4c329948ce830db02af0461cdf7eda9f)
@@ -53,4 +53,5 @@
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
+    link_node = 0p;
 	PRNG_SET_SEED( random_state, __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl() );
 	#if defined( __CFA_WITH_VERIFY__ )
@@ -59,7 +60,4 @@
 	#endif
 
-	clh_node = malloc( );
-	*clh_node = false;
-
 	doregister(curr_cluster, this);
 	monitors{ &self_mon_p, 1, (fptr_t)0 };
@@ -70,5 +68,4 @@
 		canary = 0xDEADDEADDEADDEADp;
 	#endif
-	free(clh_node);
 	unregister(curr_cluster, this);
 	^self_cor{};
