Index: doc/LaTeXmacros/lstlang.sty
===================================================================
--- doc/LaTeXmacros/lstlang.sty	(revision 3364962e92096cfc5d05c93607d92838baa1fe51)
+++ doc/LaTeXmacros/lstlang.sty	(revision fb31cb899c2ed0144f33897963468ed96bf3edf3)
@@ -2,7 +2,7 @@
 %%
 %% Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-%% 
-%% lstlang.sty -- 
-%% 
+%%
+%% lstlang.sty --
+%%
 %% Author           : Peter A. Buhr
 %% Created On       : Sat May 13 16:34:42 2017
@@ -110,10 +110,10 @@
 		__attribute__, auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__,
 		__const, __const__, disable, dtype, enable, __extension__, fallthrough, fallthru,
-		finally, forall, ftype, _Generic, _Imaginary, inline, __label__, lvalue, _Noreturn, one_t, 
-		otype, restrict, _Static_assert, throw, throwResume, trait, try, ttype, typeof, __typeof, 
-		__typeof__, virtual, waitfor, when, with, zero_t},
+		finally, forall, ftype, _Generic, _Imaginary, inline, __label__, lvalue, _Noreturn, one_t,
+		otype, restrict, _Static_assert, throw, throwResume, trait, try, ttype, typeof, __typeof,
+		__typeof__, virtual, with, zero_t},
 	morekeywords=[2]{
-		_Atomic, coroutine, is_coroutine, is_monitor, is_thread, monitor, mutex, nomutex, 
-		resume, suspend, thread, _Thread_local, yield},
+		_Atomic, coroutine, is_coroutine, is_monitor, is_thread, monitor, mutex, nomutex, or,
+		resume, suspend, thread, _Thread_local, waitfor, when, yield},
 	moredirectives={defined,include_next}%
 }
Index: doc/proposals/concurrency/text/basics.tex
===================================================================
--- doc/proposals/concurrency/text/basics.tex	(revision 3364962e92096cfc5d05c93607d92838baa1fe51)
+++ doc/proposals/concurrency/text/basics.tex	(revision fb31cb899c2ed0144f33897963468ed96bf3edf3)
@@ -328,5 +328,5 @@
 
 \begin{cfacode}
-	thread foo {};
+thread foo {};
 \end{cfacode}
 
@@ -343,27 +343,27 @@
 Obviously, for this thread implementation to be usefull it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread superseeds this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics using overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
 \begin{cfacode}
-	thread foo {};
-
-	void main(foo & this) {
-		sout | "Hello World!" | endl;
-	}
+thread foo {};
+
+void main(foo & this) {
+	sout | "Hello World!" | endl;
+}
 \end{cfacode}
 
 In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!"}. While this thesis encourages this approach to enforce strongly-typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With these semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously
 \begin{cfacode}
-	typedef void (*voidFunc)(int);
-
-	thread FuncRunner {
-		voidFunc func;
-		int arg;
-	};
-
-	void ?{}(FuncRunner & this, voidFunc inFunc, int arg) {
-		this.func = inFunc;
-	}
-
-	void main(FuncRunner & this) {
-		this.func( this.arg );
-	}
+typedef void (*voidFunc)(int);
+
+thread FuncRunner {
+	voidFunc func;
+	int arg;
+};
+
+void ?{}(FuncRunner & this, voidFunc inFunc, int arg) {
+	this.func = inFunc;
+}
+
+void main(FuncRunner & this) {
+	this.func( this.arg );
+}
 \end{cfacode}
 
Index: doc/proposals/concurrency/text/concurrency.tex
===================================================================
--- doc/proposals/concurrency/text/concurrency.tex	(revision 3364962e92096cfc5d05c93607d92838baa1fe51)
+++ doc/proposals/concurrency/text/concurrency.tex	(revision fb31cb899c2ed0144f33897963468ed96bf3edf3)
@@ -28,11 +28,11 @@
 A monitor is a set of routines that ensure mutual exclusion when accessing shared state. This concept is generally associated with Object-Oriented Languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirements is the ability to declare a handle to a shared object and a set of routines that act on it :
 \begin{cfacode}
-	typedef /*some monitor type*/ monitor;
-	int f(monitor & m);
-
-	int main() {
-		monitor m;  //Handle m
-		f(m);       //Routine using handle
-	}
+typedef /*some monitor type*/ monitor;
+int f(monitor & m);
+
+int main() {
+	monitor m;  //Handle m
+	f(m);       //Routine using handle
+}
 \end{cfacode}
 
@@ -47,11 +47,11 @@
 
 \begin{cfacode}
-	monitor counter_t { /*...see section $\ref{data}$...*/ };
-
-	void ?{}(counter_t & nomutex this); //constructor
-	size_t ++?(counter_t & mutex this); //increment
-
-	//need for mutex is platform dependent
-	void ?{}(size_t * this, counter_t & mutex cnt); //conversion
+monitor counter_t { /*...see section $\ref{data}$...*/ };
+
+void ?{}(counter_t & nomutex this); //constructor
+size_t ++?(counter_t & mutex this); //increment
+
+//need for mutex is platform dependent
+void ?{}(size_t * this, counter_t & mutex cnt); //conversion
 \end{cfacode}
 This counter is used as follows:
@@ -125,15 +125,15 @@
 The capacity to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of aquisition is consistent across calls to routines using the same monitors as arguments. However, since \CFA monitors use \gls{multi-acq} locks, users can effectively force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects aquiring order:
 \begin{cfacode}
-	void foo(A & mutex a, B & mutex b) { //acquire a & b
-		...
-	}
-
-	void bar(A & mutex a, B & /*nomutex*/ b) { //acquire a
-		... foo(a, b); ... //acquire b
-	}
-
-	void baz(A & /*nomutex*/ a, B & mutex b) { //acquire b
-		... foo(a, b); ... //acquire a
-	}
+void foo(A & mutex a, B & mutex b) { //acquire a & b
+	...
+}
+
+void bar(A & mutex a, B & /*nomutex*/ b) { //acquire a
+	... foo(a, b); ... //acquire b
+}
+
+void baz(A & /*nomutex*/ a, B & mutex b) { //acquire b
+	... foo(a, b); ... //acquire a
+}
 \end{cfacode}
 The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
@@ -159,4 +159,38 @@
 This example shows a trivial solution to the bank account transfer problem\cit. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
 
+\subsubsection{\code{mutex} statement} \label{mutex-stmt}
+
+The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cit. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
+
+\begin{figure}
+\begin{center}
+\begin{tabular}{|c|c|}
+function call & \code{mutex} statement \\
+\hline
+\begin{cfacode}[tabsize=3]
+monitor M {};
+void foo( M & mutex m ) {
+	//critical section
+}
+
+void bar( M & m ) {
+	foo( m );
+}
+\end{cfacode}&\begin{cfacode}[tabsize=3]
+monitor M {};
+void bar( M & m ) {
+	mutex(m) {
+		//critical section
+	}
+}
+
+
+\end{cfacode}
+\end{tabular}
+\end{center}
+\caption{Regular call semantics vs. \code{mutex} statement}
+\label{lst:mutex-stmt}
+\end{figure}
+
 % ======================================================================
 % ======================================================================
@@ -195,21 +229,21 @@
 
 \begin{cfacode}
-	monitor A {
-		condition e;
-	}
-
-	void foo(A & mutex a) {
-		...
-		//Wait for cooperation from bar()
-		wait(a.e);
-		...
-	}
-
-	void bar(A & mutex a) {
-		//Provide cooperation for foo()
-		...
-		//Unblock foo
-		signal(a.e);
-	}
+monitor A {
+	condition e;
+}
+
+void foo(A & mutex a) {
+	...
+	//Wait for cooperation from bar()
+	wait(a.e);
+	...
+}
+
+void bar(A & mutex a) {
+	//Provide cooperation for foo()
+	...
+	//Unblock foo
+	signal(a.e);
+}
 \end{cfacode}
 
@@ -223,5 +257,5 @@
 % ======================================================================
 % ======================================================================
-It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors.
+It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use a single condition as paremeter and waits on the monitors associated with the condition.
 
 \begin{multicols}{2}
@@ -305,6 +339,7 @@
 \end{multicols}
 
-The next example is where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics.
-
+Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics. Listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code which implements the pseudo-code in listing \ref{lst:int-bulk-pseudo}. Note that listing \ref{lst:int-bulk-cfa} uses non-\code{mutex} parameter to introduce monitor \code{b} into context. However, for the purpose of translating the given pseudo-code into \CFA-code any method of introducing new monitors into context, other than a \code{mutex} parameter, is acceptable, e.g. global variables, pointer parameters or using locals with the \code{mutex}-statement.
+
+\begin{figure}[!b]
 \begin{multicols}{2}
 Waiting thread
@@ -336,7 +371,47 @@
 \end{pseudo}
 \end{multicols}
-\begin{center}
-Listing 1
-\end{center}
+\caption{Internal scheduling with \gls{bulk-acq}}
+\label{lst:int-bulk-pseudo}
+\end{figure}
+
+\begin{figure}[!b]
+\begin{multicols}{2}
+Waiting thread
+\begin{cfacode}
+monitor A;
+monitor B;
+extern condition c;
+void foo(A & mutex a, B & b) {
+	//Code Section 1
+	mutex(a, b) {
+		//Code Section 2
+		wait(c);
+		//Code Section 3
+	}
+	//Code Section 4
+}
+\end{cfacode}
+
+\columnbreak
+
+Signalling thread
+\begin{cfacode}
+monitor A;
+monitor B;
+extern condition c;
+void foo(A & mutex a, B & b) {
+	//Code Section 5
+	mutex(a, b) {
+		//Code Section 6
+		signal(c);
+		//Code Section 7
+	}
+	//Code Section 8
+}
+\end{cfacode}
+\end{multicols}
+\caption{Equivalent \CFA code for listing \ref{lst:int-bulk-pseudo}}
+\label{lst:int-bulk-cfa}
+\end{figure}
 
 It is particularly important to pay attention to code sections 4 and 8, which are where the existing semantics of internal scheduling need to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should "release A \& B" (line 16), it must actually transfer ownership of monitor B to the waiting thread. This ownership trasnfer is required in order to prevent barging. Since the signalling thread still needs monitor A, simply waking up the waiting thread is not an option because it would violate mutual exclusion. There are three options.
@@ -388,5 +463,5 @@
 
 Thread 3
-\begin{pseudo}[numbers=left, firstnumber=10]
+\begin{pseudo}[numbers=left, firstnumber=9]
 acquire A
 	acquire A & B
@@ -480,8 +555,8 @@
 \end{center}
 \label{fig:dependency}
-\caption{Dependency graph of the statments in listing \ref{lst:dependency}}
+\caption{Dependency graph of the statements in listing \ref{lst:dependency}}
 \end{figure}
 
-Listing \ref{lst:dependency} is the three thread example rewritten for dependency graphs as well as the corresponding dependency graph. Figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statment of one of the three threads, and the arrows the dependency of that statment. The extra challenge is that this dependency graph is effectively post-mortem, but the run time system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graph being a complex and expensive endeavour, this solution is not the preffered one.
+Listing \ref{lst:dependency} is the three thread example rewritten for dependency graphs as well as the corresponding dependency graph. Figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement. The extra challenge is that this dependency graph is effectively post-mortem, but the run time system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graph being a complex and expensive endeavour, this solution is not the preffered one.
 
 \subsubsection{Partial signalling} \label{partial-sig}
@@ -675,15 +750,15 @@
 
 \begin{cfacode}
-	monitor A {};
-
-	void f(A & mutex a);
-	void g(A & mutex a) {
-		waitfor(f); //Obvious which f() to wait for
-	}
-
-	void f(A & mutex a, int); // New different F added in scope
-	void h(A & mutex a) {
-		waitfor(f); //Less obvious which f() to wait for
-	}
+monitor A {};
+
+void f(A & mutex a);
+void g(A & mutex a) {
+	waitfor(f); //Obvious which f() to wait for
+}
+
+void f(A & mutex a, int); //New different F added in scope
+void h(A & mutex a) {
+	waitfor(f); //Less obvious which f() to wait for
+}
 \end{cfacode}
 
@@ -732,11 +807,11 @@
 External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax. Even in the simplest possible case, some new semantics need to be established:
 \begin{cfacode}
-	monitor M {};
-
-	void f(M & mutex a);
-
-	void g(M & mutex a, M & mutex b) {
-		waitfor(f); //ambiguous, keep a pass b or other way around?
-	}
+monitor M {};
+
+void f(M & mutex a);
+
+void g(M & mutex a, M & mutex b) {
+	waitfor(f); //ambiguous, keep a pass b or other way around?
+}
 \end{cfacode}
 
@@ -744,23 +819,23 @@
 
 \begin{cfacode}
-	monitor M {};
-
-	void f(M & mutex a);
-
-	void g(M & mutex a, M & mutex b) {
-		waitfor( f, b );
-	}
-\end{cfacode}
-
-This syntax is unambiguous. Both locks are acquired and kept. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behavior can be extended to multi-monitor waitfor statment as follows.
-
-\begin{cfacode}
-	monitor M {};
-
-	void f(M & mutex a, M & mutex b);
-
-	void g(M & mutex a, M & mutex b) {
-		waitfor( f, a, b);
-	}
+monitor M {};
+
+void f(M & mutex a);
+
+void g(M & mutex a, M & mutex b) {
+	waitfor( f, b );
+}
+\end{cfacode}
+
+This syntax is unambiguous. Both locks are acquired and kept. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behavior can be extended to multi-monitor waitfor statement as follows.
+
+\begin{cfacode}
+monitor M {};
+
+void f(M & mutex a, M & mutex b);
+
+void g(M & mutex a, M & mutex b) {
+	waitfor( f, a, b);
+}
 \end{cfacode}
 
@@ -770,22 +845,22 @@
 
 \begin{cfacode}
-	mutex struct A {};
-
-	mutex struct B {};
-
-	void g(A & mutex a, B & mutex b) {
-		waitfor(f, a, b);
-	}
-
-	A a1, a2;
-	B b;
-
-	void foo() {
-		g(a1, b); //block on accept
-	}
-
-	void bar() {
-		f(a2, b); //fufill cooperation
-	}
+mutex struct A {};
+
+mutex struct B {};
+
+void g(A & mutex a, B & mutex b) {
+	waitfor(f, a, b);
+}
+
+A a1, a2;
+B b;
+
+void foo() {
+	g(a1, b); //block on accept
+}
+
+void bar() {
+	f(a2, b); //fufill cooperation
+}
 \end{cfacode}
 
@@ -794,5 +869,106 @@
 % ======================================================================
 % ======================================================================
-\subsection{Waitfor semantics}
-% ======================================================================
-% ======================================================================
+\subsection{\code{waitfor} semantics}
+% ======================================================================
+% ======================================================================
+
+Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expression, the function name is more restricted. This is because the compiler validates at compile time the validity of the waitfor statement. It checks that the set of monitor passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usage of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading.
+\begin{figure}
+\begin{cfacode}
+monitor A{};
+monitor B{};
+
+void f1( A & mutex );
+void f2( A & mutex, B & mutex );
+void f3( A & mutex, int );
+void f4( A & mutex, int );
+void f4( A & mutex, double );
+
+void foo( A & mutex a1, A & mutex a2, B & mutex b1, B & b2 ) {
+	A * ap = & a1;
+	void (*fp)( A & mutex ) = f1;
+
+	waitfor(f1, a1);     //Correct : 1 monitor case
+	waitfor(f2, a1, b1); //Correct : 2 monitor case
+	waitfor(f3, a1);     //Correct : non-mutex arguments are ignored
+	waitfor(f1, *ap);    //Correct : expression as argument
+
+	waitfor(f1, a1, b1); //Incorrect : Too many mutex arguments
+	waitfor(f2, a1);     //Incorrect : Too few mutex arguments
+	waitfor(f2, a1, a2); //Incorrect : Mutex arguments don't match
+	waitfor(f1, 1);      //Incorrect : 1 not a mutex argument
+	waitfor(f4, a1);     //Incorrect : f9 not a function
+	waitfor(*fp, a1 );   //Incorrect : fp not a identifier
+	waitfor(f4, a1);     //Incorrect : f4 ambiguous
+
+	waitfor(f2, a1, b2); //Undefined Behaviour : b2 may not acquired
+}
+\end{cfacode}
+\caption{Various correct and incorrect uses of the waitfor statement}
+\label{lst:waitfor}
+\end{figure}
+
+Finally, for added flexibility, \CFA supports constructing complex waitfor mask using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} can be chained together using \code{or}; this chain will form a single statement which will baton-pass to any one function that fits one of the function+monitor set which was passed in. To eanble users to tell which was the accepted function, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement. When multiple \code{waitfor} are chained together, only the statement corresponding to the accepted function is executed. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, that is only check of a matching function already arrived and return immediately otherwise. Any and all of these clauses can be preceded by a \code{when} condition to dynamically construct the mask based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
+
+\begin{figure}
+\begin{cfacode}
+monitor A{};
+
+void f1( A & mutex );
+void f2( A & mutex );
+
+void foo( A & mutex a, bool b, int t ) {
+	//Correct : blocking case
+	waitfor(f1, a);
+
+	//Correct : block with statement
+	waitfor(f1, a) {
+		sout | "f1" | endl;
+	}
+
+	//Correct : block waiting for f1 or f2
+	waitfor(f1, a) {
+		sout | "f1" | endl;
+	} or waitfor(f2, a) {
+		sout | "f2" | endl;
+	}
+
+	//Correct : non-blocking case
+	waitfor(f1, a); or else;
+
+	//Correct : non-blocking case
+	waitfor(f1, a) {
+		sout | "blocked" | endl;
+	} or else {
+		sout | "didn't block" | endl;
+	}
+
+	//Correct : block at most 10 seconds
+	waitfor(f1, a) {
+		sout | "blocked" | endl;
+	} or timeout( 10`s) {
+		sout | "didn't block" | endl;
+	}
+
+	//Correct : block only if b == true
+	//if b == false, don't even make the call
+	when(b) waitfor(f1, a);
+
+	//Correct : block only if b == true
+	//if b == false, make non-blocking call
+	waitfor(f1, a); or when(!b) else;
+
+	//Correct : block only of t > 1
+	waitfor(f1, a); or when(t > 1) timeout(t); or else;
+
+	//Incorrect : timeout clause is dead code
+	waitfor(f1, a); or timeout(t); or else;
+
+	//Incorrect : order must be
+	//waitfor [or waitfor... [or timeout] [or else]]
+	timeout(t); or waitfor(f1, a); or else;
+}
+\end{cfacode}
+\caption{Various correct and incorrect uses of the or, else, and timeout clause around a waitfor statement}
+\label{lst:waitfor2}
+\end{figure}
Index: doc/proposals/concurrency/thesis.tex
===================================================================
--- doc/proposals/concurrency/thesis.tex	(revision 3364962e92096cfc5d05c93607d92838baa1fe51)
+++ doc/proposals/concurrency/thesis.tex	(revision fb31cb899c2ed0144f33897963468ed96bf3edf3)
@@ -70,6 +70,6 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-\setcounter{secnumdepth}{3}                           % number subsubsections
-\setcounter{tocdepth}{3}                              % subsubsections in table of contents
+\setcounter{secnumdepth}{2}                           % number subsubsections
+\setcounter{tocdepth}{2}                              % subsubsections in table of contents
 % \linenumbers                                       	% comment out to turn off line numbering
 \makeindex
Index: doc/proposals/concurrency/version
===================================================================
--- doc/proposals/concurrency/version	(revision 3364962e92096cfc5d05c93607d92838baa1fe51)
+++ doc/proposals/concurrency/version	(revision fb31cb899c2ed0144f33897963468ed96bf3edf3)
@@ -1,1 +1,1 @@
-0.10.181
+0.10.212
