Index: src/libcfa/concurrency/invoke.h
===================================================================
--- src/libcfa/concurrency/invoke.h	(revision e464759b49c8d2ebccb89821c40321561033cefd)
+++ src/libcfa/concurrency/invoke.h	(revision 0c78741a61dd850124b501f52f3c3551c949c60d)
@@ -38,6 +38,6 @@
       };
 
-      struct __thread_stack_t {
-            struct thread_desc * top;
+      struct __condition_stack_t {
+            struct __condition_criterion_t * top;
       };
 
@@ -48,7 +48,7 @@
             struct thread_desc * pop_head( struct __thread_queue_t * );
 
-            void ?{}( struct __thread_stack_t * );
-            void push( struct __thread_stack_t *, struct thread_desc * );            
-            struct thread_desc * pop( struct __thread_stack_t * );
+            void ?{}( struct __condition_stack_t * );
+            void push( struct __condition_stack_t *, struct __condition_criterion_t * );
+            struct __condition_criterion_t * pop( struct __condition_stack_t * );
 
             void ?{}(spinlock * this);
@@ -82,5 +82,5 @@
             struct thread_desc * owner;               // current owner of the monitor
             struct __thread_queue_t entry_queue;      // queue of threads that are blocked waiting for the monitor
-            struct __thread_stack_t signal_stack;     // stack of threads to run next once we exit the monitor
+            struct __condition_stack_t signal_stack;  // stack of conditions to run next once we exit the monitor
             struct monitor_desc * stack_owner;        // if bulk acquiring was used we need to synchronize signals with an other monitor
             unsigned int recursion;                   // monitor routines can be called recursively, we need to keep track of that
Index: src/libcfa/concurrency/kernel
===================================================================
--- src/libcfa/concurrency/kernel	(revision e464759b49c8d2ebccb89821c40321561033cefd)
+++ src/libcfa/concurrency/kernel	(revision 0c78741a61dd850124b501f52f3c3551c949c60d)
@@ -55,9 +55,15 @@
 //-----------------------------------------------------------------------------
 // Processor
-enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule };
+enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule };
+
+//TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
 struct FinishAction {
 	FinishOpCode action_code;
 	thread_desc * thrd;
 	spinlock * lock;
+	spinlock ** locks;
+	unsigned short lock_count;
+	thread_desc ** thrds;
+	unsigned short thrd_count;
 };
 static inline void ?{}(FinishAction * this) { 
Index: src/libcfa/concurrency/kernel.c
===================================================================
--- src/libcfa/concurrency/kernel.c	(revision e464759b49c8d2ebccb89821c40321561033cefd)
+++ src/libcfa/concurrency/kernel.c	(revision 0c78741a61dd850124b501f52f3c3551c949c60d)
@@ -235,4 +235,17 @@
 		ScheduleThread( this->finish.thrd );
 	}
+	else if( this->finish.action_code == Release_Multi ) {
+		for(int i = 0; i < this->finish.lock_count; i++) {
+			unlock( this->finish.locks[i] );
+		}
+	}
+	else if( this->finish.action_code == Release_Multi_Schedule ) {
+		for(int i = 0; i < this->finish.lock_count; i++) {
+			unlock( this->finish.locks[i] );
+		}
+		for(int i = 0; i < this->finish.thrd_count; i++) {
+			ScheduleThread( this->finish.thrds[i] );
+		}
+	}
 	else {
 		assert(this->finish.action_code == No_Action);
@@ -335,4 +348,20 @@
 	this_processor->finish.lock = lock;
 	this_processor->finish.thrd = thrd;
+	suspend();
+}
+
+void ScheduleInternal(spinlock ** locks, unsigned short count) {
+	this_processor->finish.action_code = Release_Multi;
+	this_processor->finish.locks = locks;
+	this_processor->finish.lock_count = count;
+	suspend();
+}
+
+void ScheduleInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
+	this_processor->finish.action_code = Release_Multi_Schedule;
+	this_processor->finish.locks = locks;
+	this_processor->finish.lock_count = lock_count;
+	this_processor->finish.thrds = thrds;
+	this_processor->finish.thrd_count = thrd_count;
 	suspend();
 }
@@ -529,16 +558,16 @@
 }
 
-void ?{}( __thread_stack_t * this ) {
+void ?{}( __condition_stack_t * this ) {
 	this->top = NULL;
 }
 
-void push( __thread_stack_t * this, thread_desc * t ) {
-	assert(t->next != NULL);
+void push( __condition_stack_t * this, __condition_criterion_t * t ) {
+	assert( !t->next );
 	t->next = this->top;
 	this->top = t;
 }
 
-thread_desc * pop( __thread_stack_t * this ) {
-	thread_desc * top = this->top;
+__condition_criterion_t * pop( __condition_stack_t * this ) {
+	__condition_criterion_t * top = this->top;
 	if( top ) {
 		this->top = top->next;
Index: src/libcfa/concurrency/kernel_private.h
===================================================================
--- src/libcfa/concurrency/kernel_private.h	(revision e464759b49c8d2ebccb89821c40321561033cefd)
+++ src/libcfa/concurrency/kernel_private.h	(revision 0c78741a61dd850124b501f52f3c3551c949c60d)
@@ -26,8 +26,10 @@
 thread_desc * nextThread(cluster * this);
 
-void ScheduleInternal();
+void ScheduleInternal(void);
 void ScheduleInternal(spinlock * lock);
 void ScheduleInternal(thread_desc * thrd);
 void ScheduleInternal(spinlock * lock, thread_desc * thrd);
+void ScheduleInternal(spinlock ** locks, unsigned short count);
+void ScheduleInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
 
 //-----------------------------------------------------------------------------
Index: src/libcfa/concurrency/monitor
===================================================================
--- src/libcfa/concurrency/monitor	(revision e464759b49c8d2ebccb89821c40321561033cefd)
+++ src/libcfa/concurrency/monitor	(revision 0c78741a61dd850124b501f52f3c3551c949c60d)
@@ -46,8 +46,32 @@
 //-----------------------------------------------------------------------------
 // Internal scheduling
+
+struct __condition_criterion_t {
+	bool ready;						//Whether or not the criterion is met (True if met)
+	monitor_desc * target;				//The monitor this criterion concerns
+	struct __condition_node_t * owner;		//The parent node to which this criterion belongs
+	__condition_criterion_t * next;		//Intrusive linked list Next field
+};
+
+struct __condition_node_t {
+	thread_desc * waiting_thread;			//Thread that needs to be woken when all criteria are met
+	__condition_criterion_t * criteria; 	//Array of criteria (Criterions are contiguous in memory)
+	unsigned short count;				//Number of criterions in the criteria
+	__condition_node_t * next;			//Intrusive linked list Next field
+};
+
+struct __condition_blocked_queue_t {
+	__condition_node_t * head;
+	__condition_node_t ** tail;
+};
+
+void ?{}( __condition_blocked_queue_t * );
+void append( __condition_blocked_queue_t *, __condition_node_t * );
+__condition_node_t * pop_head( __condition_blocked_queue_t * );
+
 struct condition {
-	__thread_queue_t blocked;
-	monitor_desc ** monitors;
-	unsigned short monitor_count;
+	__condition_blocked_queue_t blocked;	//Link list which contains the blocked threads as-well as the information needed to unblock them
+	monitor_desc ** monitors;			//Array of monitor pointers (Monitors are NOT contiguous in memory)
+	unsigned short monitor_count;			//Number of monitors in the array
 };
 
Index: src/libcfa/concurrency/monitor.c
===================================================================
--- src/libcfa/concurrency/monitor.c	(revision e464759b49c8d2ebccb89821c40321561033cefd)
+++ src/libcfa/concurrency/monitor.c	(revision 0c78741a61dd850124b501f52f3c3551c949c60d)
@@ -20,21 +20,31 @@
 #include "libhdr.h"
 
-void set_owner( monitor_desc * this, thread_desc * owner ) {
-	//Pass the monitor appropriately
-	this->owner = owner;
-
-	//We are passing the monitor to someone else, which means recursion level is not 0
-	this->recursion = owner ? 1 : 0;
-}
+//-----------------------------------------------------------------------------
+// Forward declarations
+static inline void set_owner( monitor_desc * this, thread_desc * owner );
+static inline thread_desc * next_thread( monitor_desc * this );
+
+static inline void lock_all( spinlock ** locks, unsigned short count );
+static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count );
+static inline void unlock_all( spinlock ** locks, unsigned short count );
+static inline void unlock_all( monitor_desc ** locks, unsigned short count );
+
+static inline void save_recursion   ( monitor_desc ** ctx, unsigned int * /*out*/ recursions, unsigned short count );
+static inline void restore_recursion( monitor_desc ** ctx, unsigned int * /*in */ recursions, unsigned short count );
+
+static inline thread_desc * check_condition( __condition_criterion_t * );
+static inline void brand_condition( condition * );
+static inline unsigned short insert_unique( thread_desc ** thrds, unsigned short end, thread_desc * val );
+
+//-----------------------------------------------------------------------------
+// Enter/Leave routines
+
 
 extern "C" {
-	void __enter_monitor_desc(monitor_desc * this, monitor_desc * leader) {
+	void __enter_monitor_desc(monitor_desc * this) {
 		lock( &this->lock );
 		thread_desc * thrd = this_thread();
 
-		// //Update the stack owner
-		// this->stack_owner = leader;
-
-		LIB_DEBUG_PRINT_SAFE("Entering %p (o: %p, r: %i)\n", this, this->owner, this->recursion);
+		LIB_DEBUG_PRINT_SAFE("%p Entering %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
 
 		if( !this->owner ) {
@@ -61,25 +71,12 @@
 
 	// leave pseudo code :
-	// 	decrement level
-	// 	leve == 0 ?
-	// 		no : done
-	// 		yes :
-	// 			signal stack empty ?
-	//				has leader :
-	//					bulk acquiring means we don't own the signal stack
-	//					ignore it but don't release the monitor
-	// 				yes :
-	// 					next in entry queue is new owner
-	// 				no :
-	// 					top of the signal stack is the owner
-	//					context switch to him right away
-	//
-	void __leave_monitor_desc(monitor_desc * this, monitor_desc * leader) {
+	//	TODO
+	void __leave_monitor_desc(monitor_desc * this) {
 		lock( &this->lock );
 
-		LIB_DEBUG_PRINT_SAFE("Leaving %p (o: %p, r: %i)\n", this, this->owner, this->recursion);
-
 		thread_desc * thrd = this_thread();
-		assertf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", this->owner, thrd, this->recursion );
+
+		LIB_DEBUG_PRINT_SAFE("%p Leaving %p (o: %p, r: %i)\n", thrd, this, this->owner, this->recursion);
+		assertf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i)", thrd, this->owner, this->recursion );
 
 		//Leaving a recursion level, decrement the counter
@@ -89,38 +86,9 @@
 		//it means we don't need to do anything
 		if( this->recursion != 0) {
-			// this->stack_owner = leader;
 			unlock( &this->lock );
 			return;
 		}
-			
-		// //If we don't own the signal stack then just leave it to the owner
-		// if( this->stack_owner ) {
-		// 	this->stack_owner = leader;
-		// 	unlock( &this->lock );
-		// 	return;
-		// }
-
-		//We are the stack owner and have left the last recursion level.
-		//We are in charge of passing the monitor
-		thread_desc * new_owner = 0;
-
-		//Check the signaller stack
-		new_owner = pop( &this->signal_stack );
-		if( new_owner ) {
-			//The signaller stack is not empty,
-			//transfer control immediately
-			set_owner( this, new_owner );
-			// this->stack_owner = leader;
-			ScheduleInternal( &this->lock, new_owner );
-			return;
-		}
-		
-		// No signaller thread
-		// Get the next thread in the entry_queue
-		new_owner = pop_head( &this->entry_queue );
-		set_owner( this, new_owner );
-
-		// //Update the stack owner
-		// this->stack_owner = leader;
+
+		thread_desc * new_owner = next_thread( this );
 
 		//We can now let other threads in safely
@@ -133,14 +101,12 @@
 
 static inline void enter(monitor_desc ** monitors, int count) {
-	__enter_monitor_desc( monitors[0], NULL );
-	for(int i = 1; i < count; i++) {
-		__enter_monitor_desc( monitors[i], monitors[0] );
+	for(int i = 0; i < count; i++) {
+		__enter_monitor_desc( monitors[i] );
 	}
 }
 
 static inline void leave(monitor_desc ** monitors, int count) {
-	__leave_monitor_desc( monitors[0], NULL );
-	for(int i = count - 1; i >= 1; i--) {
-		__leave_monitor_desc( monitors[i], monitors[0] );
+	for(int i = count - 1; i >= 0; i--) {
+		__leave_monitor_desc( monitors[i] );
 	}
 }
@@ -169,53 +135,56 @@
 // Internal scheduling
 void wait( condition * this ) {
-	assertf(false, "NO SUPPORTED");
-	// LIB_DEBUG_FPRINTF("Waiting\n");
-	thread_desc * this_thrd = this_thread();
-
-	if( !this->monitors ) {
-		this->monitors = this_thrd->current_monitors;
-		this->monitor_count = this_thrd->current_monitor_count;
-	}
+	LIB_DEBUG_PRINT_SAFE("Waiting\n");
+
+	brand_condition( this );
+
+	//Check that everything is as expected
+	assertf( this->monitors != NULL, "Waiting with no monitors (%p)", this->monitors );
+	assertf( this->monitor_count != 0, "Waiting with 0 monitors (%i)", this->monitor_count );
 
 	unsigned short count = this->monitor_count;
-
-	//Check that everything is as expected
-	assert( this->monitors != NULL );
-	assert( this->monitor_count != 0 );
-
-	unsigned int recursions[ count ];		//Save the current recursion levels to restore them later
-	spinlock *   locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
-
-	// LIB_DEBUG_FPRINTF("Getting ready to wait\n");
-
-	//Loop on all the monitors and release the owner
-	for( unsigned int i = 0; i < count; i++ ) {
-		monitor_desc * cur = this->monitors[i];
-
-		assert( cur );
-
-		// LIB_DEBUG_FPRINTF("cur %p lock %p\n", cur, &cur->lock);
-
-		//Store the locks for later
-		locks[i] = &cur->lock;
-
-		//Protect the monitors
-		lock( locks[i] );
-		{		
-			//Save the recursion levels
-			recursions[i] = cur->recursion;
-
-			//Release the owner
-			cur->recursion = 0;
-			cur->owner = NULL;
-		}
-		//Release the monitor
-		unlock( locks[i] );
-	}
-
-	// LIB_DEBUG_FPRINTF("Waiting now\n");
+	unsigned int blarg_recursions[ count ];		//Save the current recursion levels to restore them later
+	spinlock *   blarg_locks     [ count ];		//We need to pass-in an array of locks to ScheduleInternal
+
+	LIB_DEBUG_PRINT_SAFE("count %i\n", count);
+
+	__condition_node_t waiter;
+	waiter.waiting_thread = this_thread();
+	waiter.count = count;
+	waiter.next = NULL;
+
+	__condition_criterion_t criteria[count];
+	for(int i = 0; i < count; i++) {
+		criteria[i].ready  = false;
+		criteria[i].target = this->monitors[i];
+		criteria[i].owner  = &waiter;
+		criteria[i].next   = NULL;
+		LIB_DEBUG_PRINT_SAFE( "Criterion %p\n", &criteria[i] );
+	}
+
+	waiter.criteria = criteria;
+	append( &this->blocked, &waiter );
+
+	lock_all( this->monitors, blarg_locks, count );
+	save_recursion( this->monitors, blarg_recursions, count );
+	//DON'T unlock, ask the kernel to do it
+
+	//Find the next thread(s) to run
+	unsigned short thread_count = count;
+	thread_desc * threads[ count ];
+
+	for( int i = 0; i < count; i++) {
+		thread_desc * new_owner = next_thread( this->monitors[i] );
+		thread_count = insert_unique( threads, i, new_owner );
+	}
+
+	LIB_DEBUG_PRINT_SAFE("Will unblock: ");
+	for(int i = 0; i < thread_count; i++) {
+		LIB_DEBUG_PRINT_SAFE("%p ", threads[i]);
+	}
+	LIB_DEBUG_PRINT_SAFE("\n");
 
 	//Everything is ready to go to sleep
-	ScheduleInternal( locks, count );
+	ScheduleInternal( blarg_locks, count, threads, thread_count );
 
 
@@ -224,43 +193,184 @@
 
 	//We are back, restore the owners and recursions
-	for( unsigned int i = 0; i < count; i++ ) {
-		monitor_desc * cur = this->monitors[i];
-
-		//Protect the monitors
-		lock( locks[i] );
-		{
-			//Release the owner
-			cur->owner = this_thrd;
-			cur->recursion = recursions[i];
-		}
-		//Release the monitor
-		unlock( locks[i] );
-	}
-}
-
-static void __signal_internal( condition * this ) {
-	assertf(false, "NO SUPPORTED");
-	if( !this->blocked.head ) return;
+	lock_all( blarg_locks, count );
+	restore_recursion( this->monitors, blarg_recursions, count );
+	unlock_all( blarg_locks, count );
+}
+
+void signal( condition * this ) {
+	if( !this->blocked.head ) {
+		LIB_DEBUG_PRINT_SAFE("Nothing to signal\n");
+		return;
+	}
 
 	//Check that everything is as expected
 	assert( this->monitors );
 	assert( this->monitor_count != 0 );
+
+	unsigned short count = this->monitor_count;
 	
 	LIB_DEBUG_DO(
-		if ( this->monitors != this_thread()->current_monitors ) {
-			abortf( "Signal on condition %p made outside of the correct monitor(s)", this );
+		thread_desc * this_thrd = this_thread();
+		if ( this->monitor_count != this_thrd->current_monitor_count ) {
+			abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", this, this->monitor_count, this_thrd->current_monitor_count );
 		} // if
+
+		for(int i = 0; i < this->monitor_count; i++) {
+			if ( this->monitors[i] != this_thrd->current_monitors[i] ) {
+				abortf( "Signal on condition %p made with different monitor, expected %p got %i", this, this->monitors[i], this_thrd->current_monitors[i] );
+			} // if
+		}
 	);
 
-	monitor_desc * owner = this->monitors[0];
-	lock( &owner->lock );
-	{
-		thread_desc * unblock = pop_head( &this->blocked );
-		push( &owner->signal_stack, unblock );
-	}
-	unlock( &owner->lock );
-}
-
-void signal( condition * this ) {
-	__signal_internal( this );
-}
+	lock_all( this->monitors, NULL, count );
+	LIB_DEBUG_PRINT_SAFE("Signalling");
+
+	__condition_node_t * node = pop_head( &this->blocked );
+	for(int i = 0; i < count; i++) {
+		__condition_criterion_t * crit = &node->criteria[i];
+		LIB_DEBUG_PRINT_SAFE(" %p", crit->target);
+		assert( !crit->ready );
+		push( &crit->target->signal_stack, crit );
+	}
+
+	LIB_DEBUG_PRINT_SAFE("\n");
+
+	unlock_all( this->monitors, count );
+}
+
+//-----------------------------------------------------------------------------
+// Utilities
+
+static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
+	//Pass the monitor appropriately
+	this->owner = owner;
+
+	//We are passing the monitor to someone else, which means recursion level is not 0
+	this->recursion = owner ? 1 : 0;
+}
+
+static inline thread_desc * next_thread( monitor_desc * this ) {
+	//Check the signaller stack
+	__condition_criterion_t * urgent = pop( &this->signal_stack );
+	if( urgent ) {
+		//The signaller stack is not empty,
+		//regardless of if we are ready to baton pass,
+		//we need to set the monitor as in use
+		set_owner( this,  urgent->owner->waiting_thread );
+
+		return check_condition( urgent );
+	}
+
+	// No signaller thread
+	// Get the next thread in the entry_queue
+	thread_desc * new_owner = pop_head( &this->entry_queue );
+	set_owner( this, new_owner );
+
+	return new_owner;
+}
+
+static inline void lock_all( spinlock ** locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		lock( locks[i] );
+	}
+}
+
+static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		spinlock * l = &source[i]->lock;
+		lock( l );
+		if(locks) locks[i] = l;
+	}
+}
+
+static inline void unlock_all( spinlock ** locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		unlock( locks[i] );
+	}
+}
+
+static inline void unlock_all( monitor_desc ** locks, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		unlock( &locks[i]->lock );
+	}
+}
+
+
+static inline void save_recursion   ( monitor_desc ** ctx, unsigned int * /*out*/ recursions, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		recursions[i] = ctx[i]->recursion;
+	}
+}
+
+static inline void restore_recursion( monitor_desc ** ctx, unsigned int * /*in */ recursions, unsigned short count ) {
+	for( int i = 0; i < count; i++ ) {
+		ctx[i]->recursion = recursions[i];
+	}
+}
+
+// Function has 2 different behavior
+// 1 - Marks a monitors as being ready to run
+// 2 - Checks if all the monitors are ready to run
+//     if so return the thread to run
+static inline thread_desc * check_condition( __condition_criterion_t * target ) {
+	__condition_node_t * node = target->owner;
+	unsigned short count = node->count;
+	__condition_criterion_t * criteria = node->criteria;
+
+	bool ready2run = true;
+
+	for(	int i = 0; i < count; i++ ) {
+		LIB_DEBUG_PRINT_SAFE( "Checking %p for %p\n", &criteria[i], target );
+		if( &criteria[i] == target ) {
+			criteria[i].ready = true;
+			LIB_DEBUG_PRINT_SAFE( "True\n" );
+		}
+
+		ready2run = criteria[i].ready && ready2run;
+	}
+
+	LIB_DEBUG_PRINT_SAFE( "Runing %i\n", ready2run );
+	return ready2run ? node->waiting_thread : NULL;
+}
+
+static inline void brand_condition( condition * this ) {
+	thread_desc * thrd = this_thread();
+	if( !this->monitors ) {
+		LIB_DEBUG_PRINT_SAFE("Branding\n");
+		assertf( thrd->current_monitors != NULL, "No current monitor to brand condition", thrd->current_monitors );
+		this->monitors = thrd->current_monitors;
+		this->monitor_count = thrd->current_monitor_count;
+	}
+}
+
+static inline unsigned short insert_unique( thread_desc ** thrds, unsigned short end, thread_desc * val ) {
+	for(int i = 0; i < end; i++) {
+		if( thrds[i] == val ) return end;
+	}
+
+	thrds[end] = val;
+	return end + 1;
+}
+
+void ?{}( __condition_blocked_queue_t * this ) {
+	this->head = NULL;
+	this->tail = &this->head;
+}
+
+void append( __condition_blocked_queue_t * this, __condition_node_t * c ) {
+	assert(this->tail != NULL);
+	*this->tail = c;
+	this->tail = &c->next;
+}
+
+__condition_node_t * pop_head( __condition_blocked_queue_t * this ) {
+	__condition_node_t * head = this->head;
+	if( head ) {
+		this->head = head->next;
+		if( !head->next ) {
+			this->tail = &this->head;
+		}
+		head->next = NULL;
+	}
+	return head;
+}
