Index: doc/working/unified_semaphores/semaphore.cfa
===================================================================
--- doc/working/unified_semaphores/semaphore.cfa	(revision ee56a4fc2af48e1f01d53b09c81b696e0c1aec20)
+++ doc/working/unified_semaphores/semaphore.cfa	(revision ee56a4fc2af48e1f01d53b09c81b696e0c1aec20)
@@ -0,0 +1,403 @@
+#include "semaphore.hfa"
+#include "kernel_private.hfa"
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <kernel.hfa>
+#include <stdlib.hfa>
+#include <thread.hfa>
+
+forall(dtype L | is_blocking_lock(L)) {
+
+	void ?{}(base_semaphore(L) & this, int count, bool is_binary) {
+		this.count = count;
+		this.lock{};
+		this.blocked_threads{};
+		this.is_binary = is_binary;
+	}
+
+	void ^?{}(base_semaphore(L) & this) {
+		// default
+	}
+
+	void ?{}(binary_semaphore(L) & this) {
+		((base_semaphore(L) &)this){ 1, true };
+	}
+
+	void ?{}(binary_semaphore(L) & this, int count) {
+		((base_semaphore(L) &)this){ count, true };
+	}
+
+	void ^?{}(binary_semaphore(L) & this) {
+		// default
+	}
+
+	void ?{}(counting_semaphore(L) & this) {
+		((base_semaphore(L) &)this){ 1, false };
+	}
+
+	void ?{}(counting_semaphore(L) & this, int count) {
+		((base_semaphore(L) &)this){ count, false };
+	}
+
+	void ^?{}(counting_semaphore(L) & this) {
+		// default
+	}
+
+	void ?{}( alarm_node_semaphore(L) & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback ) {
+		this.alarm_node{ thrd, alarm, period, callback };
+	}
+
+	void ^?{}( alarm_node_semaphore(L) & this ) {
+
+	}
+
+	void add_( base_semaphore(L) & this, struct $thread * t ) with( this ) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		#if !defined( __CFA_NO_STATISTICS__ )
+			kernelTLS.this_stats = t->curr_cluster->stats;
+		#endif
+		unpark( t );
+		unlock( lock );
+	}
+
+	void remove_( base_semaphore(L) & this ) with( this ) {
+		V(this);
+	}
+
+	////////////////////////////////////////////////////////////////////////////////
+	// These extras are needed since the inheritance is broken with traits
+	////////////////////////////////////////////////////////////////////////////////
+
+	void add_( binary_semaphore(L) & this, struct $thread * t ) with( this ) {
+		add_( (base_semaphore(L) &)this, t );
+	}
+
+	void remove_( binary_semaphore(L) & this ) with( this ) {
+		remove_( (base_semaphore(L) &)this );
+	}
+
+	void add_( counting_semaphore(L) & this, struct $thread * t ) with( this ) {
+		add_( (base_semaphore(L) &)this, t );
+	}
+
+	void remove_( counting_semaphore(L) & this ) with( this ) {
+		remove_( (base_semaphore(L) &)this );
+	}
+
+	////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////
+
+	void timeout_handler ( alarm_node_semaphore(L) & this ) with( this ) {
+    	// This condition_variable member is called from the kernel, and therefore, cannot block, but it can spin.
+	    lock( sem->lock __cfaabi_dbg_ctx2 );
+	    if ( (*i)->listed ) {			// is thread on queue
+	    	info_thread(L) * copy = *i;
+			remove( sem->blocked_threads, i );		 //remove this thread O(1)
+			sem->count++;
+			if( !copy->lock ) {
+				unlock( sem->lock );
+				#if !defined( __CFA_NO_STATISTICS__ )
+					kernelTLS.this_stats = copy->t->curr_cluster->stats;
+				#endif
+				unpark( copy->t );
+	    	} else {
+	    		add_(*copy->lock, copy->t);			// call lock's add_
+	    	}
+	    }
+	    unlock( sem->lock );
+	}
+
+	void alarm_node_sem_cast( alarm_node_t & a ) {
+		timeout_handler( (alarm_node_semaphore(L) &)a );
+	}
+
+	void handle_P(base_semaphore(L) & this, info_thread( L ) & i ) with( this ) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		if ( count <= 0) {
+			append( blocked_threads, &i );
+
+			if (!is_binary) count--;
+
+			i.listed = true;
+
+			size_t recursion_count;
+			if (i.lock) {
+				recursion_count = get_recursion_count( *i.lock );
+				remove_( *i.lock );
+			}
+
+			unlock( lock );
+			park( );
+
+			if (i.lock) set_recursion_count( *i.lock, recursion_count );
+		} else {
+			if (i.lock) {
+				size_t recursion_count = get_recursion_count( *i.lock );
+				remove_( *i.lock );
+				add_( *i.lock, i.t );
+				set_recursion_count( *i.lock, recursion_count );
+			}
+			count--;
+			unlock( lock );
+		}
+	}
+
+	void handle_P_timeout(base_semaphore(L) & this, info_thread( L ) & i, Time time) with( this ) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		if ( count <= 0) {
+			append( blocked_threads, &i );
+
+			if (!is_binary) count--;
+
+			info_thread(L) * queue_ptr = &i;
+			i.listed = true;
+
+			alarm_node_semaphore(L) node_wrap = { i.t, time, 0`s, alarm_node_sem_cast };
+			node_wrap.sem = &this;
+			node_wrap.i = &queue_ptr;
+
+			register_self( &node_wrap.alarm_node );
+
+			size_t recursion_count;
+			if (i.lock) {
+				recursion_count = get_recursion_count( *i.lock );
+				remove_( *i.lock );
+			}
+
+			unlock( lock );
+			park( );
+
+			if (i.lock) set_recursion_count( *i.lock, recursion_count );
+		} else {
+			count--;
+			unlock( lock );
+		}
+	}
+
+	void P(base_semaphore(L) & this) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		handle_P(this, i);
+	}
+
+	void P(base_semaphore(L) & this, uintptr_t info) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		handle_P(this, i);
+	}
+
+	void P(base_semaphore(L) & this, Duration duration) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		handle_P_timeout(this, i, __kernel_get_time() + duration);
+	}
+
+	void P(base_semaphore(L) & this, uintptr_t info, Duration duration) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		handle_P_timeout(this, i, __kernel_get_time() + duration);
+	}
+
+	void P(base_semaphore(L) & this, Time time) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		handle_P_timeout(this, i, time);
+	}
+
+	void P(base_semaphore(L) & this, uintptr_t info, Time time) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		handle_P_timeout(this, i, time);
+	}
+
+	void P(base_semaphore(L) & this, L & l ) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		i.lock = &l;
+		handle_P(this, i);
+	}
+
+	void P(base_semaphore(L) & this, L & l, uintptr_t info) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		i.lock = &l;
+		handle_P(this, i);
+	}
+
+	void P(base_semaphore(L) & this, L & l, Duration duration ) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		i.lock = &l;
+		handle_P_timeout(this, i, __kernel_get_time() + duration);
+	}
+
+	void P(base_semaphore(L) & this, L & l, uintptr_t info, Duration duration) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		i.lock = &l;
+		handle_P_timeout(this, i, __kernel_get_time() + duration);
+	}
+
+	void P(base_semaphore(L) & this, L & l, Time time) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		i.lock = &l;
+		handle_P_timeout(this, i, time);
+	}
+
+	void P(base_semaphore(L) & this, L & l, uintptr_t info, Time time) with( this ) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		i.lock = &l;
+		handle_P_timeout(this, i, time);
+	}
+
+	bool tryP(base_semaphore(L) & this) with( this ) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		if ( count <= 0) {
+			unlock( lock );
+			return false;
+		} else {
+			count--;
+		}
+		unlock( lock );
+	}
+
+	void V(base_semaphore(L) & this) with( this ) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		if( count < 0) {
+			info_thread(L) * i = pop_head( blocked_threads );
+			i->listed = false;
+			count++;
+			if ( i != 0p ) {
+				if (i->lock) {
+					add_(*i->lock, i->t);
+				} else {
+					unpark(i->t);
+				}
+			}
+		} else if (!is_binary || count == 0) {
+			count++;
+		} else {
+			fprintf( stderr, "A binary semaphore was V'd when it was already at 1" );
+		}
+		unlock( lock );
+	}
+
+
+	// TODO: Should we be able to V a binary semaphore multiple times to wake up multiple thds?
+	// right now we can't
+	void V(base_semaphore(L) & this, int times) with( this ) {
+		assert( times > 0 );
+		lock( lock __cfaabi_dbg_ctx2 );
+		while ( count < 0 && times > 0 ) {
+			info_thread(L) * i = pop_head( blocked_threads );
+			i->listed = false;
+			count++;
+			if ( i != 0p ) {
+				if (i->lock) {
+					add_(*i->lock, i->t);
+				} else {
+					unpark(i->t);
+				}
+			}
+			times--;
+		}
+		if(	!is_binary ) {
+			count += times;
+		} else if (count == 0 && times > 1) {
+			fprintf( stderr, "A binary semaphore was V'd when it was already at 1" );
+		} else {
+			count++;
+		}
+		unlock( lock );
+	}
+
+	// void ?++(base_semaphore(L) & this) with( this ) {
+	// 	V(this);
+	// }
+
+	// void ?--(base_semaphore(L) & this) with( this ) {
+	// 	P(this);
+	// }
+
+	// void ?`V(base_semaphore(L) & this) with( this ) {
+	// 	V(this);
+	// }
+
+	// void ?`P(base_semaphore(L) & this) with( this ) {
+	// 	P(this);
+	// }
+
+	uintptr_t front(base_semaphore(L) & this) with( this ) {
+		info_thread(L) *front = peek(blocked_threads);
+		if(!blocked_threads) return 0;
+		return front->info;
+	}
+
+	bool empty(base_semaphore(L) & this) with( this ) {
+		return blocked_threads ? false : true;
+	}
+
+	int counter(base_semaphore(L) & this) with( this ) {
+		return count;
+	}
+
+	// these are just to allow the semaphore to be a part of the is_blocking_lock trait
+	// they do nothing
+	void set_recursion_count( base_semaphore(L) & this, size_t recursion ) with( this ) {
+		// default
+	}
+
+	size_t get_recursion_count( base_semaphore(L) & this ) with( this ) {
+		return 0;
+	}
+
+	////////////////////////////////////////////////////////////////////////////////
+	// These extras are needed since the inheritance is broken with traits
+	// normally I'd cast to a semaphore & to call the parent but theres no need
+	// since these routines are so simple
+	////////////////////////////////////////////////////////////////////////////////
+
+	void set_recursion_count( binary_semaphore(L) & this, size_t recursion ) with( this ) {
+		// default
+	}
+
+	size_t get_recursion_count( binary_semaphore(L) & this ) with( this ) {
+		return 0;
+	}
+
+	void set_recursion_count( counting_semaphore(L) & this, size_t recursion ) with( this ) {
+		// default
+	}
+
+	size_t get_recursion_count( counting_semaphore(L) & this ) with( this ) {
+		return 0;
+	}
+
+	////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////
+}
+
+thread T1 {};
+thread T2 {};
+
+// counting_semaphore( ) s0, s1;
+
+// void main( T1 & this ) {
+// 	printf("T1 start\n");
+// 	V(s1);
+// 	P(s0);
+// 	P(s0);
+// 	V(s1, 2);
+	
+// 	printf("T1 done\n");
+// }
+
+// void main( T2 & this ) {
+// 	printf("T2 start\n");
+// 	V(s0);
+// 	P(s1);
+// 	P(s1, s0);
+// 	P(s1);
+// 	printf("T2 done\n");
+// }
+
+int main() {
+	printf("start\n");
+	// processor p[2];
+	// {
+	// 	T1 t1;
+	// 	T2 t2;
+	// }
+	printf("done\n");
+}
Index: doc/working/unified_semaphores/semaphore.hfa
===================================================================
--- doc/working/unified_semaphores/semaphore.hfa	(revision ee56a4fc2af48e1f01d53b09c81b696e0c1aec20)
+++ doc/working/unified_semaphores/semaphore.hfa	(revision ee56a4fc2af48e1f01d53b09c81b696e0c1aec20)
@@ -0,0 +1,107 @@
+#pragma once
+
+#include "locks.hfa"
+#include <stdbool.h>
+
+#include "bits/algorithm.hfa"
+#include "bits/locks.hfa"
+#include "bits/containers.hfa"
+
+#include "invoke.h"
+
+#include "time_t.hfa"
+#include "time.hfa"
+#include <sys/time.h>
+#include "alarm.hfa"
+
+///////////////////////////////////////////////////////////////////
+//// Semaphores
+///////////////////////////////////////////////////////////////////
+forall(dtype L | is_blocking_lock(L)) {
+
+	struct base_semaphore {
+		// internal counter for the semaphore
+		int count;
+
+		// Spin lock used for mutual exclusion
+		__spinlock_t lock;
+
+		// List of blocked threads
+		__queue_t(info_thread(L)) blocked_threads;
+
+		// Flag if the semaphor is binary
+		bool is_binary;
+	};
+
+	// A semaphore that is binary
+	// If the current owner P's this semaphore it will not block
+	struct binary_semaphore {
+		inline base_semaphore(L);
+	};
+
+	// A semaphore that maintains a counter.
+	// If a thread P's this semaphore it always decreases the counter
+	struct counting_semaphore {
+		inline base_semaphore(L);
+	};
+
+	void ?{}(base_semaphore(L) & this, int count, bool is_binary);
+	void ^?{}(base_semaphore(L) & this);
+
+	void ?{}(binary_semaphore(L) & this);
+	void ?{}(binary_semaphore(L) & this, int count);
+	void ^?{}(binary_semaphore(L) & this);
+
+	void ?{}(counting_semaphore(L) & this);
+	void ?{}(counting_semaphore(L) & this, int count);
+	void ^?{}(counting_semaphore(L) & this);
+
+	struct alarm_node_semaphore {
+		alarm_node_t alarm_node;
+
+		base_semaphore(L) * sem;
+
+		info_thread(L) ** i;
+	};
+
+	void ?{}( alarm_node_semaphore(L) & this, $thread * thrd, Time alarm, Duration period, Alarm_Callback callback );
+	void ^?{}( alarm_node_semaphore(L) & this );
+
+	void add_( base_semaphore(L) & this, struct $thread * t );
+	void remove_( base_semaphore(L) & this );
+
+	// TODO: look into changing timout routines to return bool showing if signalled or woken by kernel
+	void P(base_semaphore(L) & this);
+	void P(base_semaphore(L) & this, uintptr_t info);
+	void P(base_semaphore(L) & this, Duration duration);
+	void P(base_semaphore(L) & this, uintptr_t info, Duration duration);
+	void P(base_semaphore(L) & this, Time time);
+	void P(base_semaphore(L) & this, uintptr_t info, Time time);
+	void P(base_semaphore(L) & this, base_semaphore(L) & s);
+	void P(base_semaphore(L) & this, base_semaphore(L) & s, uintptr_t info);
+	void P(base_semaphore(L) & this, base_semaphore(L) & s, Duration duration );
+	void P(base_semaphore(L) & this, base_semaphore(L) & s, uintptr_t info, Duration duration);
+	void P(base_semaphore(L) & this, base_semaphore(L) & s, Time time);
+	void P(base_semaphore(L) & this, base_semaphore(L) & s, uintptr_t info, Time time);
+
+	bool tryP(base_semaphore(L) & this);
+
+	void V(base_semaphore(L) & this);
+	void V(base_semaphore(L) & this, int times);
+
+	// void ?`++(base_semaphore(L) & this);
+	// void ?`--(base_semaphore(L) & this);
+
+	// void ?`V(base_semaphore(L) & this);
+	// void ?`P(base_semaphore(L) & this);
+
+	uintptr_t front(base_semaphore(L) & this);
+	bool empty(base_semaphore(L) & this);
+	int counter(base_semaphore(L) & this);
+
+
+	// these are to satisfy the is_blocking_lock trait so that 
+	// semaphores can be released by condition variables and vice versa
+	void set_recursion_count( base_semaphore(L) & this, size_t recursion );
+	size_t get_recursion_count( base_semaphore(L) & this );
+}
