Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision 65a7050c7b6c234df3b0c1a23e0d49d07e827c23)
+++ doc/bibliography/pl.bib	(revision 45af7e1145e9631b244b43c9e04278d852a2e414)
@@ -832,5 +832,4 @@
     howpublished= {\href{http://www.boost.org/doc/libs/1_61_0/libs/coroutine/doc/html/index.html}
 		  {http://www.boost.org/\-doc/\-libs/1\_61\_0/\-libs/\-coroutine/\-doc/\-html/\-index.html}},
-    optnote	= {Accessed: 2016-09},
 }
 
@@ -843,5 +842,4 @@
     howpublished= {\href{https://www.boost.org/doc/libs/1_61_0/doc/html/thread.html}
 		  {https://\-www.boost.org/\-doc/\-libs/\-1\_61\_0/\-doc/\-html/\-thread.html}},
-    optnote	= {Accessed: 2016-09},
 }
 
@@ -950,5 +948,4 @@
     author	= {{\textsf{C}{$\mathbf{\forall}$} Features}},
     howpublished= {\href{https://plg.uwaterloo.ca/~cforall/features}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-features}},
-    optnote	= {Accessed: 2018-01-01},
 }
 
@@ -970,5 +967,4 @@
     year	= 2018,
     howpublished= {\href{https://cforall.uwaterloo.ca/CFAStackEvaluation.zip}{https://cforall.uwaterloo.ca/\-CFAStackEvaluation.zip}},
-    optnote	= {[Accessed May 2018]},
 }
 
@@ -1470,5 +1466,4 @@
     title	= {concurrent-locking},
     howpublished= {\href{https://github.com/pabuhr/concurrent-locking}{https://\-github.com/\-pabuhr/\-concurrent-locking}},
-    optnote	= {[Accessed April 2017]},
 }
 
@@ -1758,5 +1753,4 @@
     howpublished= {\href{https://www.airs.com/blog/archives/428}
 		  {https://www.airs.com/\-blog/\-archives/\-428}},
-    optnote	= {Accessed: 2018-05},
 }
 
@@ -2917,5 +2911,4 @@
     year	= 2014,
     howpublished= {\href{https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/C-Extensions.html}{https://\-gcc.gnu.org/\-onlinedocs/\-gcc-4.7.2/\-gcc/\-C\-Extensions.html}},
-    optnote	= {Accessed: 2017-04-02},
 }
 
@@ -3348,5 +3341,4 @@
     year	= 2014,
     howpublished= {https://developer.gnome.org/gobject/stable/},
-    optnote	= {Accessed: 2017-04},
 }
 
@@ -5026,5 +5018,4 @@
     year	= 2014,
     howpublished= {\href{https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/ProgrammingWithObjectiveC}{https://\-developer.apple.com/\-library/archive/\-documentation/\-Cocoa/\-Conceptual/\-ProgrammingWithObjectiveC}},
-    optnote	= {Accessed: 2018-03}
 }
 
@@ -5036,5 +5027,4 @@
     year	= 2015,
     howpublished= {\href{https://developer.apple.com/library/content/documentation/Xcode/Conceptual/RN-Xcode-Archive/Chapters/xc7_release_notes.html}{https://\-developer.apple.com/\-library/\-content/\-documentation/\-Xcode/\-Conceptual/\-RN-Xcode-Archive/\-Chapters/\-xc7\_release\_notes.html}},
-    optnote	= {Accessed: 2017-04}
 }
 
@@ -5549,5 +5539,4 @@
     year	= 2012,
     howpublished= {\href{http://cs.brown.edu/research/pubs/theses/masters/2012/verch.pdf}{http://cs.brown.edu/\-research/\-pubs/\-theses/\-masters/\-2012/\-verch.pdf}},
-    optnote	= {Accessed: 2013-10-4}
 }
 
@@ -6037,5 +6026,6 @@
     institution	= {Carnegie Mellon University},
     year	= 1991,
-    month	= feb, number = "CMU-CS-91-106",
+    month	= feb,
+    number	= {CMU-CS-91-106},
     annote	= {
         Discusses a typed lambda calculus with
@@ -6094,7 +6084,9 @@
     journal	= sigplan,
     year	= 1988,
-    month	= jul, volume = 23, number = 7, pages = {260-267},
-    note	= {Proceedings of the SIGPLAN '88 Conference on Programming Language
-	 Design and Implementation},
+    month	= jul,
+    volume	= 23,
+    number	= 7,
+    pages	= {260-267},
+    note	= {Proceedings of the SIGPLAN '88 Conference on Programming Language Design and Implementation},
     abstract	= {
         This paper deals with the integration of an efficient asynchronous
@@ -6144,4 +6136,15 @@
     month	= jun,
     year	= 1987,
+}
+
+@misc{Pthreads,
+    keywords	= {pthreads, C concurrency},
+    contributer	= {pabuhr@plg},
+    key		= {pthreads},
+    title	= {{Pthread}.h, Specifications Issue 7, {IEEE} Std 1003.1-2017},
+    author	= {IEEE and {The Open Group}},
+    year	= 2018,
+    howpublished= {\href{http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/pthread.h.html}
+		  {http://\-pubs.opengroup.org/\-onlinepubs/\-9699919799/\-basedefs/\-pthread.h.html}},
 }
 
@@ -6327,4 +6330,15 @@
     number	= 10,
     pages	= {27-32},
+}
+
+@article{Hesselink06,
+    author	= {Wim H. Hesselink},
+    title	= {Refinement Verification of the Lazy Caching Algorithm},
+    journal	= acta,
+    year	= 2006,
+    month	= oct,
+    volume	= 43,
+    number	= 3,
+    pages	= {195--222},
 }
 
@@ -7265,5 +7279,12 @@
     author	= {{TIOBE Index}},
     howpublished= {\href{http://www.tiobe.com/tiobe_index}{http://\-www.tiobe.com/\-tiobe\_index}},
-    optnote	= {Accessed: 2018-09},
+}
+
+@misc{ThreadModel,
+    contributer	= {pabuhr@plg},
+    key		= {ThreadModel},
+    title	= {Thread (computing)},
+    author	= {{Threading Model}},
+    howpublished= {\href{https://en.wikipedia.org/wiki/Thread_(computing)}{https://\-en.wikipedia.org/\-wiki/\-Thread\_(computing)}},
 }
 
@@ -7597,5 +7618,4 @@
     year	= 2017,
     howpublished= {\url{https://wiki.gnome.org/Projects/Vala/Manual}},
-    optnote	= {Accessed: 2017-04}
 }
 
Index: doc/papers/concurrency/Paper.tex
===================================================================
--- doc/papers/concurrency/Paper.tex	(revision 65a7050c7b6c234df3b0c1a23e0d49d07e827c23)
+++ doc/papers/concurrency/Paper.tex	(revision 45af7e1145e9631b244b43c9e04278d852a2e414)
@@ -241,9 +241,8 @@
 
 \abstract[Summary]{
-\CFA is a modern, polymorphic, \emph{non-object-oriented} extension of the C programming language.
-This paper discusses the design of the concurrency and parallelism features in \CFA, and its concurrent runtime-system.
-These features are created from scratch as ISO C lacks concurrency, relying largely on the pthreads library for concurrency.
-Coroutines and lightweight (user) threads are introduced into \CFA;
-as well, monitors are added as a high-level mechanism for mutual exclusion and synchronization.
+\CFA is a modern, polymorphic, \emph{non-object-oriented}, backwards-compatible extension of the C programming language.
+This paper discusses the concurrency and parallelism features in \CFA, and its runtime system.
+These features are created from scratch as ISO C's concurrency is low-level and unimplemented, so C programmers continue to rely on the C pthreads library.
+\CFA provides high-level control-flow mechanisms, like coroutines and lightweight (user) threads, and monitors for mutual exclusion and synchronization.
 A unique contribution of this work is allowing multiple monitors to be safely acquired \emph{simultaneously}.
 All features respect the expectations of C programmers, while being fully integrate with the \CFA polymorphic type-system and other language features.
@@ -251,5 +250,5 @@
 }%
 
-\keywords{concurrency, parallelism, coroutines, threads, monitors, runtime, C, Cforall}
+\keywords{concurrency, parallelism, coroutines, threads, monitors, runtime, C, \CFA (Cforall)}
 
 
@@ -262,4 +261,36 @@
 \section{Introduction}
 
+This paper discusses the design of the high-level concurrency and parallelism features in \CFA, and its runtime.
+\CFA is a modern, polymorphic, \emph{non-object-oriented}, backwards-compatible extension of the C programming language~\cite{Moss18}.
+Within the \CFA framework, new concurrency features were created from scratch.
+While ISO \Celeven defines concurrency~\cite[\S~7.26]{C11}, it is largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}.
+Furthermore, \Celeven and pthreads concurrency is simple: create/join threads in a function and a few locks, which is low-level and error prone;
+no high-level language concurrency features exist.
+Interestingly, 8 years since publication of the \Celeven standard, neither gcc-8 nor clang-8 (most recent versions) support \Celeven @threads.h@, indicating little interest in the C concurrency approach.
+Finally, while the \Celeven standard does not state a concurrent threading-model, the strong association with pthreads suggests the threading model is kernel-level threading (1:1)~\cite{ThreadModel}.
+
+There has been a re-interest during the past decade in user-level (M:N, green) threading in new and old programming languages, and providing high-level constructs like coroutines, monitors, tasks, and actors for presenting advanced control-flow.
+As multi-core hardware became available in the 1980/90s, both user and kernel threading were examined.
+Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}.
+Libraries like pthreads were developed for C and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads.
+As a result, languages like Java, Scala~\cite{Scala}, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopted the 1:1 kernel-threading model, with a variety of presentation mechanisms.
+From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,BoostThreads}, including putting green threads back into Java~\cite{Quasar}.
+Because advanced control-flow (including exception handling) is pervasive in a programming language and its runtime, these features must be understood by the language (i.e., not added via a library) to prevent invalidation by sequential optimizations~\cite{Buhr95a,Boehm05}.
+
+The main argument for user-level threading is matching the concurrency model with the programming-language style, versus adapting language concurrency to one general approach.
+For example, it is possible to provide coroutines, monitors, and tasks as specialized types in an object-oriented language, integrating these constructs to allow leveraging the type-system (static type-checking) and all other object-oriented capabilities~\cite{uC++}.
+The user-threading approach facilitates a simpler concurrency construction using thread objects and leveraging sequential patterns versus call-backs and events~\cite{vonBehren03}.
+As well, user-level threads are lighter weight than kernel threads, so there is less restriction on programming styles that encourage large numbers of threads performing smaller work-units to facilitate load balancing by the runtime~\cite{Verch12}.
+User threading is also able to layer multiple concurrency models into a single language (locks, monitors, tasks, actors, futures), so programmers can chose the model that best fits an application.
+Finally, it is possible to discretely fold locking and non-blocking I/O multiplexing into the language's I/O libraries, so threading implicitly dovetails with the I/O subsystem.
+Performant user-threading implementations (both time and space) are appearing that are competitive with direct kernel-threading implementations, while achieving the programming advantages of high concurrency levels and safety.
+
+Adding advanced control-flow to \CFA is similar to current and future extensions in \CCeleven through to \CCtwenty.
+However, we contend the \CFA extensions are demonstrably better than those proposed for \CC.
+For example, a unique contribution of this work is allowing multiple monitors to be safely acquired \emph{simultaneously} (deadlock free), while integrating this capability with all monitor synchronization mechanisms.
+As well, all control-flow features respect the expectations of C programmers, with statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features.
+Experimental results show comparable performance of the new features with similar mechanisms in other concurrent programming-languages.
+
+\begin{comment}
 This paper provides a minimal concurrency \newterm{Application Program Interface} (API) that is simple, efficient and can be used to build other concurrency features.
 While the simplest concurrency system is a thread and a lock, this low-level approach is hard to master.
@@ -281,6 +312,8 @@
 The proposed concurrency API is implemented in a dialect of C, called \CFA (pronounced C-for-all).
 The paper discusses how the language features are added to the \CFA translator with respect to parsing, semantics, and type checking, and the corresponding high-performance runtime-library to implement the concurrent features.
-
-
+\end{comment}
+
+
+\begin{comment}
 \section{\CFA Overview}
 
@@ -551,4 +584,5 @@
 \end{cfa}
 where the return type supplies the type/size of the allocation, which is impossible in most type systems.
+\end{comment}
 
 
