Index: doc/papers/concurrency/Paper.tex
===================================================================
--- doc/papers/concurrency/Paper.tex	(revision 73de1754c58bad6c335ecf46f1c340743b6d5d84)
+++ doc/papers/concurrency/Paper.tex	(revision 4c3ee8d83b18fa8870e0a924b814efd4ec1730fa)
@@ -3,7 +3,7 @@
 \articletype{RESEARCH ARTICLE}%
 
-\received{26 April 2016}
-\revised{6 June 2016}
-\accepted{6 June 2016}
+\received{XXXXX}
+\revised{XXXXX}
+\accepted{XXXXX}
 
 \raggedbottom
@@ -948,5 +948,5 @@
 \subsection{Coroutine Implementation}
 
-A significant implementation challenge for coroutines (and threads, see section \ref{threads}) is adding extra fields and executing code after/before the coroutine constructor/destructor and coroutine main to create/initialize/de-initialize/destroy extra fields and the stack.
+A significant implementation challenge for coroutines (and threads, see Section~\ref{threads}) is adding extra fields and executing code after/before the coroutine constructor/destructor and coroutine main to create/initialize/de-initialize/destroy extra fields and the stack.
 There are several solutions to this problem and the chosen option forced the \CFA coroutine design.
 
@@ -1301,15 +1301,11 @@
 
 For maximum usability, monitors have \newterm{multi-acquire} semantics allowing a thread to acquire it multiple times without deadlock.
-For example, atomically printing the contents of a binary tree:
-\begin{cfa}
-monitor Tree {
-	Tree * left, * right;
-	// value
-};
-void print( Tree & mutex tree ) {			$\C{// prefix traversal}$
-	// write value
-	print( *tree->left );					$\C{// multiply acquire monitor lock for tree on each recursion}$
-	print( *tree->right );
-}
+\begin{cfa}
+monitor M { ... } m;
+void foo( M & mutex m ) { ... }				$\C{// acquire mutual exclusion}$
+void bar( M & mutex m ) {					$\C{// acquire mutual exclusion}$
+	... `foo( m );` ...						$\C{// reacquire mutual exclusion}$
+}
+`bar( m );`									$\C{// nested monitor call}$
 \end{cfa}
 
@@ -1406,5 +1402,5 @@
 \begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
 \begin{cfa}
-monitor M {};
+monitor M { ... };
 void foo( M & mutex m1, M & mutex m2 ) {
 	// critical section
@@ -1652,8 +1648,7 @@
 @waitfor@ statically verifies the released monitors are the same as the acquired mutex-parameters of the given routine or routine pointer.
 To statically verify the released monitors match with the accepted routine's mutex parameters, the routine (pointer) prototype must be accessible.
-
 % When an overloaded routine appears in an @waitfor@ statement, calls to any routine with that name are accepted.
 % The rationale is that members with the same name should perform a similar function, and therefore, all should be eligible to accept a call.
-As always, overloaded routines can be disambiguated using a cast:
+Overloaded routines can be disambiguated using a cast:
 \begin{cfa}
 void rtn( M & mutex m );
@@ -1762,6 +1757,6 @@
 Signalled threads are moved to the urgent queue and the waiter at the front defines the set of monitors necessary for it to unblock.
 Partial signalling transfers ownership of monitors to the front waiter.
-When the signaller thread exits or waits in the monitor the front waiter is unblocked if all its monitors are released.
-The benefit of this solution is encapsulating complexity into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met.
+When the signaller thread exits or waits in the monitor, the front waiter is unblocked if all its monitors are released.
+The benefit is encapsulating complexity into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met.
 
 
@@ -1773,5 +1768,5 @@
 Similarly, monitor routines can be added at any time in \CFA, making it less clear for programmers and more difficult to implement.
 \begin{cfa}
-monitor M {};
+monitor M { ... };
 void `f`( M & mutex m );
 void g( M & mutex m ) { waitfor( `f` ); }	$\C{// clear which f}$
@@ -1819,6 +1814,7 @@
 External scheduling, like internal scheduling, becomes significantly more complex for multi-monitor semantics.
 Even in the simplest case, new semantics needs to be established.
-\begin{cfa}
-monitor M {};
+\newpage
+\begin{cfa}
+monitor M { ... };
 void f( M & mutex m1 );
 void g( M & mutex m1, M & mutex m2 ) {
@@ -1833,5 +1829,5 @@
 This behaviour can be extended to the multi-monitor @waitfor@ statement.
 \begin{cfa}
-monitor M {};
+monitor M { ... };
 void f( M & mutex m1, M & mutex m2 );
 void g( M & mutex m1, M & mutex m2 ) {
@@ -2244,5 +2240,5 @@
 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
 \begin{cfa}
-monitor M {} m1/*, m2, m3, m4*/;
+monitor M { ... } m1/*, m2, m3, m4*/;
 void __attribute__((noinline)) do_call( M & mutex m/*, m2, m3, m4*/ ) {}
 int main() {
@@ -2298,5 +2294,5 @@
 volatile int go = 0;
 condition c;
-monitor M {} m;
+monitor M { ... } m;
 void __attribute__((noinline)) do_call( M & mutex a1 ) { signal( c ); }
 thread T {};
@@ -2349,5 +2345,5 @@
 \begin{cfa}
 volatile int go = 0;
-monitor M {} m;
+monitor M { ... } m;
 thread T {};
 void __attribute__((noinline)) do_call( M & mutex ) {}