Changes in / [aba20d2:8b34df0]


Ignore:
Files:
3 added
4 deleted
17 edited

Legend:

Unmodified
Added
Removed
  • doc/papers/concurrency/Paper.tex

    raba20d2 r8b34df0  
    158158                _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
    159159                virtual, __volatile, __volatile__, waitfor, when, with, zero_t},
    160         moredirectives={defined,include_next},
    161         % replace/adjust listing characters that look bad in sanserif
    162         literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
    163                 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
    164                 {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1
    165                 {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex{\textrm{\textgreater}}}2,
     160        moredirectives={defined,include_next}%
    166161}
    167162
     
    180175aboveskip=4pt,                                                                                  % spacing above/below code block
    181176belowskip=3pt,
     177% replace/adjust listing characters that look bad in sanserif
     178literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
     179        {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
     180        {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1
     181        {<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex{\textrm{\textgreater}}}2,
    182182moredelim=**[is][\color{red}]{`}{`},
    183183}% lstset
     
    205205}
    206206
    207 % Go programming language: https://github.com/julienc91/listings-golang/blob/master/listings-golang.sty
    208 \lstdefinelanguage{Golang}{
    209         morekeywords=[1]{package,import,func,type,struct,return,defer,panic,recover,select,var,const,iota,},
    210         morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,int32,int64,
    211                 bool,float32,float64,complex64,complex128,byte,rune,uintptr, error,interface},
    212         morekeywords=[3]{map,slice,make,new,nil,len,cap,copy,close,true,false,delete,append,real,imag,complex,chan,},
    213         morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,else,default,},
    214         morekeywords=[5]{Println,Printf,Error,},
    215         sensitive=true,
    216         morecomment=[l]{//},
    217         morecomment=[s]{/*}{*/},
    218         morestring=[b]',
    219         morestring=[b]",
    220         morestring=[s]{`}{`},
    221         % replace/adjust listing characters that look bad in sanserif
    222         literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
    223                 {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
    224                 {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1
    225                 {<-}{\makebox[2ex][c]{\textrm{\textless}\raisebox{0.5ex}{\rule{0.8ex}{0.075ex}}}}2,
    226 }
    227 
    228207\lstnewenvironment{cfa}[1][]
    229208{\lstset{#1}}
     
    236215{}
    237216\lstnewenvironment{Go}[1][]
    238 {\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
     217{\lstset{language=go,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
    239218{}
    240219\lstnewenvironment{python}[1][]
     
    274253These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library features like C pthreads.
    275254\CFA introduces modern language-level control-flow mechanisms, like generators, coroutines, user-level threading, and monitors for mutual exclusion and synchronization.
    276 % Library extension for executors, futures, and actors are built on these basic mechanisms.
     255Library extension for executors, futures, and actors are built on these basic mechanisms.
    277256The runtime provides significant programmer simplification and safety by eliminating spurious wakeup and optional monitor barging.
    278257The runtime also ensures multiple monitors can be safely acquired \emph{simultaneously} (deadlock free), and this feature is fully integrated with all monitor synchronization mechanisms.
     
    297276However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.},
    298277backwards-compatible extension of the C programming language.
    299 In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control flow capabilities on top of a highly popular programming language allowing immediate dissemination.
    300278Within the \CFA framework, new control-flow features are created from scratch.
    301279ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}.
     
    310288Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}.
    311289Libraries like pthreads were developed for C, and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads.
    312 As a result, languages like Java, Scala, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.
     290As a result, languages like Java, Scala~\cite{Scala}, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.
    313291From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,BoostThreads}, including putting green threads back into Java~\cite{Quasar}.
    314292The main argument for user-level threading is that they are lighter weight than kernel threads (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourage large numbers of threads performing medium work-units to facilitate load balancing by the runtime~\cite{Verch12}.
     
    364342\item
    365343providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features.
    366 % \item
    367 % library extensions for executors, futures, and actors built on the basic mechanisms.
     344\item
     345library extensions for executors, futures, and actors built on the basic mechanisms.
    368346\item
    369347a runtime system with no spurious wakeup.
    370348\item
    371349a dynamic partitioning mechanism to segregate the execution environment for specialized requirements.
    372 % \item
    373 % a non-blocking I/O library
     350\item
     351a non-blocking I/O library
    374352\item
    375353experimental results showing comparable performance of the new features with similar mechanisms in other programming languages.
     
    392370Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
    393371Note, creation cost is amortized across usage, so activation cost is usually the dominant factor.
     372
    394373
    395374\begin{figure}
     
    476455\hspace{3pt}
    477456\subfloat[C generator implementation]{\label{f:CFibonacciSim}\usebox\myboxC}
    478 \caption{Fibonacci (output) asymmetric generator}
     457\caption{Fibonacci (output) Asymmetric Generator}
    479458\label{f:FibonacciAsymmetricGenerator}
    480459
     
    551530\subfloat[C generator simulation]{\label{f:CFormatSim}\usebox\myboxB}
    552531\hspace{3pt}
    553 \caption{Formatter (input) asymmetric generator}
     532\caption{Formatter (input) Asymmetric Generator}
    554533\label{f:FormatterAsymmetricGenerator}
    555534\end{figure}
    556535
    557 For generators, coroutines, and threads, many designs are based on function objects or pointers~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}.
    558 For example, Python presents generators as a function object:
    559 \begin{python}
    560 def Gen():
    561         ... `yield val` ...
    562 gen = Gen()
    563 for i in range( 10 ):
    564         print( next( gen ) )
    565 \end{python}
    566 Boost presents coroutines in terms of four functor object-types:
    567 \begin{cfa}
    568 asymmetric_coroutine<>::pull_type
    569 asymmetric_coroutine<>::push_type
    570 symmetric_coroutine<>::call_type
    571 symmetric_coroutine<>::yield_type
    572 \end{cfa}
    573 and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}, \eg pthreads:
    574 \begin{cfa}
    575 void * rtn( void * arg ) { ... }
    576 int i = 3, rc;
    577 pthread_t t; $\C{// thread id}$
    578 `rc = pthread_create( &t, rtn, (void *)i );` $\C{// create and initialized task, type-unsafe input parameter}$
    579 \end{cfa}
    580 % void mycor( pthread_t cid, void * arg ) {
    581 %       int * value = (int *)arg;                               $\C{// type unsafe, pointer-size only}$
    582 %       // thread body
    583 % }
    584 % int main() {
    585 %       int input = 0, output;
    586 %       coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$
    587 %       coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$
    588 % }
    589 \CFA's preferred presentation model for generators/coroutines/threads is a hybrid of objects and functions, with an object-oriented flavour.
    590 Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type.
    591 The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed.
    592 
    593536
    594537\subsection{Generator}
     
    596539Stackless generators have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
    597540The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
    598 A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.
     541How this goal is accomplished is done through a series of different kinds of generators and their implementation.
    599542
    600543Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written in C and \CFA, with a simple C implementation for the \CFA version.
     
    605548Figure~\ref{f:CFibonacci} shows the C approach of manually creating the closure in structure @Fib@, and multiple instances of this closure provide multiple Fibonacci generators.
    606549The C version only has the middle execution state because the top execution state becomes declaration initialization.
    607 Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a custom \CFA @generator@ type.
    608 This generator type is then connected to a function that \emph{must be named \lstinline|main|},\footnote{
    609 The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
    610 Hence, overloading this name for other starting points (generator/coroutine/thread) is a logical extension.}
    611 which takes as its only parameter a reference to the generator type, called a \emph{generator main}.
     550Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a special \CFA @generator@ type.
     551This generator type is then connected to a function named @main@ that takes as its only parameter a reference to the generator type, called a \emph{generator main}.
    612552The generator main contains @suspend@ statements that suspend execution without ending the generator versus @return@.
    613553For the Fibonacci generator-main,\footnote{
     
    634574For contrast, Figure~\ref{f:PythonFibonacci} shows the equivalent Python Fibonacci generator, which does not use a generator type, and hence only has a single interface, but an implicit closure.
    635575
    636 Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden.
     576Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden and possible point of error.
    637577(This restriction is removed by the coroutine, see Section~\ref{s:Coroutine}.)
    638 This requirement follows from the generality of variable-size local-state, \eg local state with a variable-length array requires dynamic allocation because the array size is unknown at compile time.
    639 However, dynamic allocation significantly increases the cost of generator creation/destruction and is a show-stopper for embedded real-time programming.
     578Our concern is that static analysis to discriminate local state from temporary variables is complex and beyond the current scope of the \CFA project.
     579As well, supporting variable-size local-state, like variable-length arrays, requires dynamic allocation of the local state, which significantly increases the cost of generator creation/destruction, and is a show-stopper for embedded programming.
    640580But more importantly, the size of the generator type is tied to the local state in the generator main, which precludes separate compilation of the generator main, \ie a generator must be inlined or local state must be dynamically allocated.
    641 With respect to safety, we believe static analysis can discriminate local state from temporary variables in a generator, \ie variable usage spanning @suspend@, and generate a compile-time error.
    642 Finally, our current experience is that most generator problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
     581Our current experience is that most generator problems have simple data state, including local state, but complex execution state.
    643582As well, C programmers are not afraid with this kind of semantic programming requirement, if it results in very small, fast generators.
    644583
     
    690629\begin{python}[aboveskip=0pt,belowskip=0pt]
    691630def Fib():
    692         fn1, fn = 0, 1
    693         while True:
    694                 `yield fn1`
    695                 fn1, fn = fn, fn1 + fn
     631    fn1, fn = 0, 1
     632    while True:
     633        `yield fn1`
     634        fn1, fn = fn, fn1 + fn
    696635f1 = Fib()
    697636f2 = Fib()
     
    732671\hspace{3pt}
    733672\subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB}
    734 \caption{Python generator}
     673\caption{Python Generator}
    735674\label{f:PythonGenerator}
    736675
     
    820759`generator PingPong` {
    821760        const char * name;
    822         int N;
    823         int i;                          // local state
    824761        PingPong & partner; // rebindable reference
     762        int N, i;
    825763};
    826 
     764void ?{}(PingPong & pp, char * nm, int N) with(pp) {
     765        name = nm;  partner = 0p;  pp.N = N;  i = 0; }
    827766void `main( PingPong & pp )` with(pp) {
    828767        for ( ; i < N; i += 1 ) {
     
    833772int main() {
    834773        enum { N = 5 };
    835         PingPong ping = {"ping",N,0}, pong = {"pong",N,0};
     774        PingPong ping = {"ping", N}, pong = {"pong", N};
    836775        &ping.partner = &pong;  &pong.partner = &ping;
    837776        `resume( ping );`
     
    844783typedef struct PingPong {
    845784        const char * name;
     785        struct PingPong * partner;
    846786        int N, i;
    847         struct PingPong * partner;
    848787        void * next;
    849788} PingPong;
    850 #define PPCtor(name, N) {name,N,0,NULL,NULL}
     789#define PPCtor(name, N) { name, NULL, N, 0, NULL }
    851790void comain( PingPong * pp ) {
    852791        if ( pp->next ) goto *pp->next;
     
    870809\subfloat[C generator simulation]{\label{f:CPingPongSim}\usebox\myboxB}
    871810\hspace{3pt}
    872 \caption{Ping-Pong symmetric generator}
     811\caption{Ping-Pong Symmetric Generator}
    873812\label{f:PingPongSymmetricGenerator}
    874813\end{figure}
     
    892831A coroutine is specified by replacing @generator@ with @coroutine@ for the type.
    893832This generality results in higher cost for creation, due to dynamic stack allocation, execution, due to context switching among stacks, and terminating, due to possible stack unwinding and dynamic stack deallocation.
    894 A series of different kinds of coroutines and their implementation demonstrate how coroutines extend generators.
     833How coroutines differ from generators is done through a series of examples.
    895834
    896835First, the previous generator examples are converted to their coroutine counterparts, allowing local-state variables to be moved from the generator type into the coroutine main.
     
    923862\end{cfa}
    924863\end{description}
    925 It is also possible to refactor code containing local-state and @suspend@ statements into a helper function, like the computation of the CRC for the device driver.
     864It is also possible to refactor code containing local-state and @suspend@ statements into a helper routine, like the computation of the CRC for the device driver.
    926865\begin{cfa}
    927866unsigned int Crc() {
     
    938877
    939878\begin{comment}
    940 Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface functions, \eg @next@.
     879Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface routines, \eg @next@.
    941880Like the structure in Figure~\ref{f:ExternalState}, the coroutine type allows multiple instances, where instances of this type are passed to the (overloaded) coroutine main.
    942881The coroutine main's stack holds the state for the next generation, @f1@ and @f2@, and the code represents the three states in the Fibonacci formula via the three suspend points, to context switch back to the caller's @resume@.
    943 The interface function @next@, takes a Fibonacci instance and context switches to it using @resume@;
     882The interface routine @next@, takes a Fibonacci instance and context switches to it using @resume@;
    944883on restart, the Fibonacci field, @fn@, contains the next value in the sequence, which is returned.
    945884The first @resume@ is special because it allocates the coroutine stack and cocalls its coroutine main on that stack;
     
    1027966def Fib():
    1028967
    1029         fn1, fn = 0, 1
    1030         while True:
    1031                 `yield fn1`
    1032                 fn1, fn = fn, fn1 + fn
     968    fn1, fn = 0, 1
     969    while True:
     970        `yield fn1`
     971        fn1, fn = fn, fn1 + fn
    1033972
    1034973
     
    1055994\hspace{3pt}
    1056995\subfloat[Python]{\label{f:ExternalState}\usebox\myboxC}
    1057 \caption{Fibonacci generator}
     996\caption{Fibonacci Generator}
    1058997\label{f:C-fibonacci}
    1059998\end{figure}
     
    11791118\caption{Producer / consumer: resume-resume cycle, bi-directional communication}
    11801119\label{f:ProdCons}
     1120
     1121\medskip
     1122
     1123\begin{center}
     1124\input{FullProdConsStack.pstex_t}
     1125\end{center}
     1126\vspace*{-10pt}
     1127\caption{Producer / consumer runtime stacks}
     1128\label{f:ProdConsRuntimeStacks}
     1129
     1130\medskip
     1131
     1132\begin{center}
     1133\input{FullCoroutinePhases.pstex_t}
     1134\end{center}
     1135\vspace*{-10pt}
     1136\caption{Ping / Pong coroutine steps}
     1137\label{f:PingPongFullCoroutineSteps}
    11811138\end{figure}
    11821139
     
    11991156The loop then repeats calling @payment@, where each call resumes the producer coroutine.
    12001157Figure~\ref{f:ProdConsRuntimeStacks} shows the runtime stacks of the program main, and the coroutine mains for @prod@ and @cons@ during the cycling.
    1201 
    1202 \begin{figure}
    1203 \begin{center}
    1204 \input{FullProdConsStack.pstex_t}
    1205 \end{center}
    1206 \vspace*{-10pt}
    1207 \caption{Producer / consumer runtime stacks}
    1208 \label{f:ProdConsRuntimeStacks}
    1209 
    1210 \medskip
    1211 
    1212 \begin{center}
    1213 \input{FullCoroutinePhases.pstex_t}
    1214 \end{center}
    1215 \vspace*{-10pt}
    1216 \caption{Ping / Pong coroutine steps}
    1217 \label{f:PingPongFullCoroutineSteps}
    1218 \end{figure}
    12191158
    12201159Terminating a coroutine cycle is more complex than a generator cycle, because it requires context switching to the program main's \emph{stack} to shutdown the program, whereas generators started by the program main run on its stack.
     
    12581197
    12591198
    1260 \subsection{(Generator) Coroutine Implementation}
    1261 
    1262 A significant implementation challenge for generators/coroutines (and threads, see Section~\ref{s:threads}) is adding extra fields to the custom types and related functions, \eg inserting code after/before the coroutine constructor/destructor and @main@ to create/initialize/de-initialize/destroy any extra fields, \eg stack.
    1263 There are several solutions to these problem, which follow from the object-oriented-flavoured choice to adopt custom types.
    1264 
    1265 For object-oriented languages, inheritance is used to provide extra fields and code via explicit inheritance:
     1199\subsection{Coroutine Implementation}
     1200
     1201A significant implementation challenge for coroutines (and threads, see Section~\ref{threads}) is adding extra fields and executing code after/before the coroutine constructor/destructor and coroutine main to create/initialize/de-initialize/destroy extra fields and the stack.
     1202There are several solutions to this problem and the chosen option directed the \CFA coroutine design.
     1203
     1204For object-oriented languages, inheritance can be used to provide extra fields and code, but it requires explicitly writing the inheritance:
    12661205\begin{cfa}[morekeywords={class,inherits}]
    1267 class myCoroutine inherits baseCoroutine { ... }
    1268 \end{cfa}
    1269 The problem is that the programming language and its tool chain, \eg debugger, @valgrind@, need to understand @baseCoroutine@ because it infers special property, so type @baseCoroutine@ becomes a de facto keyword and all types inheriting from it are implicitly custom types.
    1270 As well, some special properties are not handled by existing language semantics, \eg the execution of constructors/destructors is in the wrong order to implicitly start threads because the thread must start \emph{after} all constructors as it relies on a completely initialized object, but the inherited constructor runs \emph{before} the derived.
    1271 Alternatives, such as explicitly starting threads as in Java, are repetitive and forgetting to call start is a common source of errors.
     1206class mycoroutine inherits baseCoroutine { ... }
     1207\end{cfa}
     1208In addition, the programming language and possibly its tool set, \eg debugger, @valgrind@ need to understand @baseCoroutine@ because of special stack property of coroutines.
     1209Furthermore, the execution of constructors/destructors is in the wrong order for certain operations.
     1210For example, for threads if the thread is implicitly started, it must start \emph{after} all constructors, because the thread relies on a completely initialized object, but the inherited constructor runs \emph{before} the derived.
    12721211
    12731212An alternative is composition:
    12741213\begin{cfa}
    1275 struct myCoroutine {
    1276         ... // declaration/communication variables
     1214struct mycoroutine {
     1215        ... // declarations
    12771216        baseCoroutine dummy; // composition, last declaration
    12781217}
    12791218\end{cfa}
    12801219which also requires an explicit declaration that must be last to ensure correct initialization order.
    1281 However, there is nothing preventing wrong placement or multiple declarations.
    1282 
    1283 \CFA custom types make any special properties explicit to the language and its tool chain, \eg the language code-generator knows where to inject code and when it is unsafe to perform certain optimizations, and IDEs using simple parsing can find and manipulate types with special properties.
    1284 The downside of this approach is that it makes custom types a special case in the language.
    1285 Users wanting to extend custom types or build their own can only do so in ways offered by the language.
    1286 Furthermore, implementing custom types without language supports may display the power of a programming language.
    1287 \CFA blends the two approaches, providing custom type for idiomatic \CFA code, extending and building new custom types is still possible, similar to Java approach with builtin and library concurrency.
    1288 
    1289 Part of the mechanism to generalize custom types is the \CFA trait, \eg the definition for custom-type @coroutine@ is anything satisfying the trait @is_coroutine@, and this trait both enforces and restricts the coroutine-interface functions.
    1290 \begin{cfa}
    1291 trait is_coroutine( `dtype` T ) {
    1292         void main( T & );
    1293         coroutine_desc * get_coroutine( T & );
    1294 };
    1295 forall( `dtype` T | is_coroutine(T) ) void $suspend$( T & );
    1296 forall( `dtype` T | is_coroutine(T) ) void resume( T & );
    1297 \end{cfa}
    1298 Note, copying generators/coroutines/threads is not meaningful.
    1299 For example, a coroutine retains its last resumer and suspends back to it;
    1300 having a copy also suspend back to the same resumer is undefined semantics.
    1301 Furthermore, two coroutines cannot logically execute on the same stack.
    1302 A deep coroutine copy, which copies the stack, is also meaningless in an unmanaged language (no garbage collection), like C, because the stack may contain pointers to object within it that require updating for the copy.
    1303 Now, the @dtype@ property provides no \emph{implicit} copying operations and the @is_coroutine@ trait provides no \emph{explicit} copying operations, so all coroutines must be passed by reference (pointer).
    1304 The function definitions ensures there is a statically-typed @main@ function that is the starting point (first stack frame) of a coroutine, and a mechanism to get (read) the currently executing coroutine handle.
    1305 The @main@ function has no return value or additional parameters because the coroutine type allows an arbitrary number of interface functions with corresponding arbitrary typed input/output values versus fixed ones.
    1306 The advantage of this approach is that users can easily create different types of coroutines, \eg changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@ function, and possibly redefining @suspend@ and @resume@.
    1307 
    1308 The \CFA custom-type @coroutine@ implicitly implements the getter and forward declarations required for implementing the coroutine main:
    1309 \begin{cquote}
    1310 \begin{tabular}{@{}ccc@{}}
    1311 \begin{cfa}
    1312 coroutine MyCor {
    1313         int value;
    1314 
    1315 };
    1316 \end{cfa}
    1317 &
    1318 {\Large $\Rightarrow$}
    1319 &
    1320 \begin{tabular}{@{}ccc@{}}
    1321 \begin{cfa}
    1322 struct MyCor {
    1323         int value;
    1324         coroutine_desc cor;
    1325 };
    1326 \end{cfa}
    1327 &
    1328 \begin{cfa}
    1329 static inline coroutine_desc *
    1330 get_coroutine( MyCor & this ) {
    1331         return &this.cor;
    1332 }
    1333 \end{cfa}
    1334 &
    1335 \begin{cfa}
    1336 void main( MyCor * this );
    1337 
    1338 
    1339 
    1340 \end{cfa}
    1341 \end{tabular}
    1342 \end{tabular}
    1343 \end{cquote}
    1344 The combination of custom types and fundamental @trait@ description of these types allows an concise specification for programmers and tools, while more advanced programmers can have tighter control over memory layout and initialization.
     1220However, there is nothing preventing wrong placement or multiple declarations of @baseCoroutine@.
     1221
     1222For coroutines, as for threads, many implementations are based on function pointers or function objects~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}.
     1223For example, Boost implements coroutines in terms of four functor object-types:
     1224\begin{cfa}
     1225asymmetric_coroutine<>::pull_type
     1226asymmetric_coroutine<>::push_type
     1227symmetric_coroutine<>::call_type
     1228symmetric_coroutine<>::yield_type
     1229\end{cfa}
    13451230
    13461231Figure~\ref{f:CoroutineMemoryLayout} shows different memory-layout options for a coroutine (where a task is similar).
    1347 The coroutine handle is the @coroutine@ instance containing programmer specified global/communication variables across interface functions.
    1348 The coroutine descriptor contains all implicit declarations needed by the runtime, \eg @suspend@/@resume@, and can be part of the coroutine handle or separate.
    1349 The coroutine stack can appear in a number of locations and fixed or variable sized.
    1350 Hence, the coroutine's stack could be a VLS\footnote{
     1232The coroutine handle is the @coroutine@ instance containing programmer specified global/communication variables.
     1233The coroutine descriptor contains all implicit declarations needed by the runtime, \eg @suspend@/@resume@, and can be part of the coroutine handle or separate.\footnote{
    13511234We are examining variable-sized structures (VLS), where fields can be variable-sized structures or arrays.
    13521235Once allocated, a VLS is fixed sized.}
    1353 on the allocating stack, provided the allocating stack is large enough.
    1354 For a VLS stack allocation, allocation/deallocation is an inexpensive adjustment of the stack point, modulo any stack constructor costs (\eg initial frame setup).
    1355 For heap stack allocation, allocation/deallocation is an expensive heap allocation (where the heap can be a shared resource), modulo any stack constructor costs.
    1356 With heap stack allocation, it is also possible to use a split (segmented) stack calling-convention, available with gcc and clang, so the stack is variable sized.
    1357 Currently, \CFA supports stack/heap allocated descriptors but only fixed-sized heap allocated stacks;
    1358 In \CFA debug-mode, the fixed-sized stack is terminated with a write-only page, which catches most stack overflows.
    1359 Experience teaching concurrency with \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue for students.
    1360 Split-stack allocation is under development but requires recompilation of existing code, which may be impossible.
     1236The coroutine stack can appear in a number of locations and forms, fixed or variable sized.
     1237Hence, the coroutine's stack could be a VLS on the allocating stack, provided the allocating stack is large enough.
     1238For stack allocation, allocation/deallocation only requires a few arithmetic operation to compute the size and adjust the stack point, modulo any constructor costs.
     1239For heap allocation, allocation/deallocation is an expensive heap operation (where the heap can be a shared resource), modulo any constructor costs.
     1240For heap stack allocation, it is also possible to use a split (segmented) stack calling-convention, available with gcc and clang, so the stack is variable sized.
     1241Currently, \CFA supports stack/heap allocated descriptors but only heap allocated stacks;
     1242split-stack allocation is under development.
     1243In \CFA debug-mode, a fixed-sized stack is terminated with a write-only page, which catches most stack overflows.
    13611244
    13621245\begin{figure}
     
    13671250\end{figure}
    13681251
     1252Similarly, the canonical threading paradigm is often based on function pointers, \eg @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}.
     1253However, the generic thread-handle (identifier) is limited (few operations), unless it is wrapped in a custom type, as in the pthreads approach.
     1254\begin{cfa}
     1255void mycor( coroutine_t cid, void * arg ) {
     1256        int * value = (int *)arg;                               $\C{// type unsafe, pointer-size only}$
     1257        // Coroutine body
     1258}
     1259int main() {
     1260        int input = 0, output;
     1261        coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$
     1262        coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$
     1263}
     1264\end{cfa}
     1265Since the custom type is simple to write in \CFA and solves several issues, added support for function/lambda-based coroutines adds very little.
     1266
     1267Note, the type @coroutine_t@ must be an abstract handle to the coroutine, because the coroutine descriptor and its stack are non-copyable.
     1268Copying the coroutine descriptor results in copies being out of date with the current state of the stack.
     1269Correspondingly, copying the stack results is copies being out of date with the coroutine descriptor, and pointers in the stack being out of date to data on the stack.
     1270(There is no mechanism in C to find all stack-specific pointers and update them as part of a copy.)
     1271
     1272The selected approach is to use language support by introducing a new kind of aggregate (structure):
     1273\begin{cfa}
     1274coroutine Fibonacci {
     1275        int fn; // communication variables
     1276};
     1277\end{cfa}
     1278The @coroutine@ keyword means the compiler (and tool set) can find and inject code where needed.
     1279The downside of this approach is that it makes coroutine a special case in the language.
     1280Users wanting to extend coroutines or build their own for various reasons can only do so in ways offered by the language.
     1281Furthermore, implementing coroutines without language supports also displays the power of a programming language.
     1282While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed without language support.
     1283The reserved keyword simply eases use for the common case.
     1284
     1285Part of the mechanism to generalize coroutines is using a \CFA trait, which defines a coroutine as anything satisfying the trait @is_coroutine@, and this trait restricts the available set of coroutine-manipulation routines:
     1286\begin{cfa}
     1287trait is_coroutine( `dtype` T ) {
     1288        void main( T & );
     1289        coroutine_desc * get_coroutine( T & );
     1290};
     1291forall( `dtype` T | is_coroutine(T) ) void suspend( T & );
     1292forall( `dtype` T | is_coroutine(T) ) void resume( T & );
     1293\end{cfa}
     1294The @dtype@ property provides no implicit copying operations and the @is_coroutine@ trait provides no explicit copying operations, so all coroutines must be passed by reference (pointer).
     1295The routine definitions ensures there is a statically-typed @main@ routine that is the starting point (first stack frame) of a coroutine, and a mechanism to get (read) the currently executing coroutine handle.
     1296The @main@ routine has no return value or additional parameters because the coroutine type allows an arbitrary number of interface routines with corresponding arbitrary typed input/output values versus fixed ones.
     1297The advantage of this approach is that users can easily create different types of coroutines, \eg changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@ routine, and possibly redefining @suspend@ and @resume@.
     1298The \CFA keyword @coroutine@ implicitly implements the getter and forward declarations required for implementing the coroutine main:
     1299\begin{cquote}
     1300\begin{tabular}{@{}ccc@{}}
     1301\begin{cfa}
     1302coroutine MyCor {
     1303        int value;
     1304
     1305};
     1306\end{cfa}
     1307&
     1308{\Large $\Rightarrow$}
     1309&
     1310\begin{tabular}{@{}ccc@{}}
     1311\begin{cfa}
     1312struct MyCor {
     1313        int value;
     1314        coroutine_desc cor;
     1315};
     1316\end{cfa}
     1317&
     1318\begin{cfa}
     1319static inline coroutine_desc *
     1320get_coroutine( MyCor & this ) {
     1321        return &this.cor;
     1322}
     1323\end{cfa}
     1324&
     1325\begin{cfa}
     1326void main( MyCor * this );
     1327
     1328
     1329
     1330\end{cfa}
     1331\end{tabular}
     1332\end{tabular}
     1333\end{cquote}
     1334The combination of these two approaches allows an easy and concise specification to coroutining (and concurrency) for normal users, while more advanced users have tighter control on memory layout and initialization.
     1335
    13691336
    13701337\section{Concurrency}
    13711338\label{s:Concurrency}
    13721339
    1373 Concurrency is nondeterministic scheduling of independent sequential execution-paths (threads), where each thread has its own stack.
    1374 A single thread with multiple call stacks, \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
    1375 In coroutining, coroutines self-schedule the thread across stacks so execution is deterministic.
    1376 (It is \emph{impossible} to generate a concurrency error when coroutining.)
    1377 However, coroutines are a stepping stone towards concurrency.
    1378 
    1379 The transition to concurrency, even for a single thread with multiple stacks, occurs when coroutines context switch to a \newterm{scheduling coroutine}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
    1380 Therefore, a minimal concurrency system requires coroutines \emph{in conjunction with a nondeterministic scheduler}.
     1340At its core, concurrency is based on multiple call-stacks and scheduling threads executing on these stacks.
     1341Multiple call stacks (or contexts) and a single thread of execution, called \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
     1342In coroutining, the single thread is self-scheduling across the stacks, so execution is deterministic, \ie the execution path from input to output is fixed and predictable.
     1343A \newterm{stackless} coroutine executes on the caller's stack~\cite{Python} but this approach is restrictive, \eg preventing modularization and supporting only iterator/generator-style programming;
     1344a \newterm{stackful} coroutine executes on its own stack, allowing full generality.
     1345Only stackful coroutines are a stepping stone to concurrency.
     1346
     1347The transition to concurrency, even for execution with a single thread and multiple stacks, occurs when coroutines also context switch to a \newterm{scheduling oracle}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
     1348Therefore, a minimal concurrency system is possible using coroutines (see Section \ref{coroutine}) in conjunction with a scheduler to decide where to context switch next.
    13811349The resulting execution system now follows a cooperative threading-model, called \newterm{non-preemptive scheduling}.
    1382 Adding \newterm{preemption} introduces non-cooperative scheduling, where context switching occurs randomly between any two instructions often based on a timer interrupt, called \newterm{preemptive scheduling}.
    1383 While a scheduler introduces uncertain execution among explicit context switches, preemption introduces uncertainty by introducing implicit context switches.
    1384 Uncertainty gives the illusion of parallelism on a single processor and provides a mechanism to access and increase performance on multiple processors.
    1385 The reason is that the scheduler/runtime have complete knowledge about resources and how to best utilized them.
    1386 However, the introduction of unrestricted nondeterminism results in the need for \newterm{mutual exclusion} and \newterm{synchronization}, which restrict nondeterminism for correctness;
    1387 otherwise, it is impossible to write meaningful concurrent programs.
    1388 Optimal concurrent performance is often obtained by having as much nondeterminism as mutual exclusion and synchronization correctness allow.
    1389 
    1390 A scheduler can either be a stackless or stackful.
     1350
     1351Because the scheduler is special, it can either be a stackless or stackful coroutine.
    13911352For stackless, the scheduler performs scheduling on the stack of the current coroutine and switches directly to the next coroutine, so there is one context switch.
    13921353For stackful, the current coroutine switches to the scheduler, which performs scheduling, and it then switches to the next coroutine, so there are two context switches.
    13931354A stackful scheduler is often used for simplicity and security.
    13941355
    1395 
    1396 \subsection{Thread}
    1397 \label{s:threads}
    1398 
    1399 Threading needs the ability to start a thread and wait for its completion.
    1400 A common API for this ability is @fork@ and @join@.
     1356Regardless of the approach used, a subset of concurrency related challenges start to appear.
     1357For the complete set of concurrency challenges to occur, the missing feature is \newterm{preemption}, where context switching occurs randomly between any two instructions, often based on a timer interrupt, called \newterm{preemptive scheduling}.
     1358While a scheduler introduces uncertainty in the order of execution, preemption introduces uncertainty about where context switches occur.
     1359Interestingly, uncertainty is necessary for the runtime (operating) system to give the illusion of parallelism on a single processor and increase performance on multiple processors.
     1360The reason is that only the runtime has complete knowledge about resources and how to best utilized them.
     1361However, the introduction of unrestricted non-determinism results in the need for \newterm{mutual exclusion} and \newterm{synchronization} to restrict non-determinism for correctness;
     1362otherwise, it is impossible to write meaningful programs.
     1363Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
     1364
     1365An important missing feature in C is threading\footnote{While the C11 standard defines a \protect\lstinline@threads.h@ header, it is minimal and defined as optional.
     1366As such, library support for threading is far from widespread.
     1367At the time of writing the paper, neither \protect\lstinline@gcc@ nor \protect\lstinline@clang@ support \protect\lstinline@threads.h@ in their standard libraries.}.
     1368In modern programming languages, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore existing and new programming languages must have tools for writing efficient concurrent programs to take advantage of parallelism.
     1369As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages.
     1370Furthermore, because C is a system-level language, programmers expect to choose precisely which features they need and which cost they are willing to pay.
     1371Hence, concurrent programs should be written using high-level mechanisms, and only step down to lower-level mechanisms when performance bottlenecks are encountered.
     1372
     1373
     1374\subsection{Thread Interface}
     1375\label{threads}
     1376
     1377Both user and kernel threads are supported, where user threads provide concurrency and kernel threads provide parallelism.
     1378Like coroutines and for the same design reasons, the selected approach for user threads is to use language support by introducing a new kind of aggregate (structure) and a \CFA trait:
    14011379\begin{cquote}
    1402 \begin{tabular}{@{}lll@{}}
    1403 \multicolumn{1}{c}{\textbf{Java}} & \multicolumn{1}{c}{\textbf{\Celeven}} & \multicolumn{1}{c}{\textbf{pthreads}} \\
    1404 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    1405 class MyTask extends Thread {...}
    1406 mytask t = new MyTask(...);
    1407 `t.start();` // start
    1408 // concurrency
    1409 `t.join();` // wait
     1380\begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}}
     1381\begin{cfa}
     1382thread myThread {
     1383        // communication variables
     1384};
     1385
     1386
    14101387\end{cfa}
    14111388&
    1412 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    1413 class MyTask { ... } // functor
    1414 MyTask mytask;
    1415 `thread t( mytask, ... );` // start
    1416 // concurrency
    1417 `t.join();` // wait
    1418 \end{cfa}
    1419 &
    1420 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    1421 void * rtn( void * arg ) {...}
    1422 pthread_t t;  int i = 3;
    1423 `pthread_create( &t, rtn, (void *)i );` // start
    1424 // concurrency
    1425 `pthread_join( t, NULL );` // wait
     1389\begin{cfa}
     1390trait is_thread( `dtype` T ) {
     1391      void main( T & );
     1392      thread_desc * get_thread( T & );
     1393      void ^?{}( T & `mutex` );
     1394};
    14261395\end{cfa}
    14271396\end{tabular}
    14281397\end{cquote}
    1429 \CFA has a simpler approach using a custom @thread@ type and leveraging declaration semantics (allocation/deallocation), where threads implicitly @fork@ after construction and @join@ before destruction.
    1430 \begin{cfa}
    1431 thread MyTask {};
    1432 void main( MyTask & this ) { ... }
     1398(The qualifier @mutex@ for the destructor parameter is discussed in Section~\ref{s:Monitor}.)
     1399Like a coroutine, the statically-typed @main@ routine is the starting point (first stack frame) of a user thread.
     1400The difference is that a coroutine borrows a thread from its caller, so the first thread resuming a coroutine creates an instance of @main@;
     1401whereas, a user thread receives its own thread from the runtime system, which starts in @main@ as some point after the thread constructor is run.\footnote{
     1402The \lstinline@main@ routine is already a special routine in C, \ie where the program's initial thread begins, so it is a natural extension of this semantics to use overloading to declare \lstinline@main@s for user coroutines and threads.}
     1403No return value or additional parameters are necessary for this routine because the task type allows an arbitrary number of interface routines with corresponding arbitrary typed input/output values.
     1404
     1405\begin{comment} % put in appendix with coroutine version ???
     1406As such the @main@ routine of a thread can be defined as
     1407\begin{cfa}
     1408thread foo {};
     1409
     1410void main(foo & this) {
     1411        sout | "Hello World!";
     1412}
     1413\end{cfa}
     1414
     1415In this example, threads of type @foo@ start execution in the @void main(foo &)@ routine, which prints @"Hello World!".@ While this paper encourages this approach to enforce strongly typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity.
     1416With the static semantics it is trivial to write a thread type that takes a routine pointer as a parameter and executes it on its stack asynchronously.
     1417\begin{cfa}
     1418typedef void (*voidRtn)(int);
     1419
     1420thread RtnRunner {
     1421        voidRtn func;
     1422        int arg;
     1423};
     1424
     1425void ?{}(RtnRunner & this, voidRtn inRtn, int arg) {
     1426        this.func = inRtn;
     1427        this.arg  = arg;
     1428}
     1429
     1430void main(RtnRunner & this) {
     1431        // thread starts here and runs the routine
     1432        this.func( this.arg );
     1433}
     1434
     1435void hello(/*unused*/ int) {
     1436        sout | "Hello World!";
     1437}
     1438
    14331439int main() {
    1434         MyTask team`[10]`; $\C[2.5in]{// allocate stack-based threads, implicit start after construction}$
    1435         // concurrency
    1436 } $\C{// deallocate stack-based threads, implicit joins before destruction}$
    1437 \end{cfa}
    1438 This semantic ensures a thread is started and stopped exactly once, eliminating some programming error, and scales to multiple threads for basic (termination) synchronization.
    1439 For block allocation to arbitrary depth, including recursion, threads are created/destroyed in a lattice structure (tree with top and bottom).
    1440 Arbitrary topologies are possible using dynamic allocation, allowing threads to outlive their declaration scope, identical to normal dynamically allocating.
    1441 \begin{cfa}
    1442 MyTask * factory( int N ) { ... return `anew( N )`; } $\C{// allocate heap-based threads, implicit start after construction}$
     1440        RtnRunner f = {hello, 42};
     1441        return 0?
     1442}
     1443\end{cfa}
     1444A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \textbf{API}.
     1445\end{comment}
     1446
     1447For user threads to be useful, it must be possible to start and stop the underlying thread, and wait for it to complete execution.
     1448While using an API such as @fork@ and @join@ is relatively common, such an interface is awkward and unnecessary.
     1449A simple approach is to use allocation/deallocation principles, and have threads implicitly @fork@ after construction and @join@ before destruction.
     1450\begin{cfa}
     1451thread World {};
     1452void main( World & this ) {
     1453        sout | "World!";
     1454}
    14431455int main() {
    1444         MyTask * team = factory( 10 );
    1445         // concurrency
    1446         `delete( team );` $\C{// deallocate heap-based threads, implicit joins before destruction}\CRT$
    1447 }
    1448 \end{cfa}
     1456        World w`[10]`;                                                  $\C{// implicit forks after creation}$
     1457        sout | "Hello ";                                        $\C{// "Hello " and 10 "World!" printed concurrently}$
     1458}                                                                                       $\C{// implicit joins before destruction}$
     1459\end{cfa}
     1460This semantics ensures a thread is started and stopped exactly once, eliminating some programming error, and scales to multiple threads for basic (termination) synchronization.
     1461This tree-structure (lattice) create/delete from C block-structure is generalized by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
     1462\begin{cfa}
     1463int main() {
     1464        MyThread * heapLive;
     1465        {
     1466                MyThread blockLive;                                     $\C{// fork block-based thread}$
     1467                heapLive = `new`( MyThread );           $\C{// fork heap-based thread}$
     1468                ...
     1469        }                                                                               $\C{// join block-based thread}$
     1470        ...
     1471        `delete`( heapLive );                                   $\C{// join heap-based thread}$
     1472}
     1473\end{cfa}
     1474The heap-based approach allows arbitrary thread-creation topologies, with respect to fork/join-style concurrency.
    14491475
    14501476Figure~\ref{s:ConcurrentMatrixSummation} shows concurrently adding the rows of a matrix and then totalling the subtotals sequentially, after all the row threads have terminated.
     
    14531479The allocation/deallocation pattern appears unusual because allocated objects are immediately deallocated without any intervening code.
    14541480However, for threads, the deletion provides implicit synchronization, which is the intervening code.
    1455 % While the subtotals are added in linear order rather than completion order, which slightly inhibits concurrency, the computation is restricted by the critical-path thread (\ie the thread that takes the longest), and so any inhibited concurrency is very small as totalling the subtotals is trivial.
     1481While the subtotals are added in linear order rather than completion order, which slightly inhibits concurrency, the computation is restricted by the critical-path thread (\ie the thread that takes the longest), and so any inhibited concurrency is very small as totalling the subtotals is trivial.
    14561482
    14571483\begin{figure}
     
    14591485`thread` Adder { int * row, cols, & subtotal; } $\C{// communication variables}$
    14601486void ?{}( Adder & adder, int row[], int cols, int & subtotal ) {
    1461         adder.[ row, cols, &subtotal ] = [ row, cols, &subtotal ];
     1487    adder.[ row, cols, &subtotal ] = [ row, cols, &subtotal ];
    14621488}
    14631489void main( Adder & adder ) with( adder ) {
    1464         subtotal = 0;
    1465         for ( c; cols ) { subtotal += row[c]; }
     1490    subtotal = 0;
     1491    for ( int c = 0; c < cols; c += 1 ) { subtotal += row[c]; }
    14661492}
    14671493int main() {
    1468         const int rows = 10, cols = 1000;
    1469         int matrix[rows][cols], subtotals[rows], total = 0;
    1470         // read matrix
    1471         Adder * adders[rows];
    1472         for ( r; rows; ) { $\C{// start threads to sum rows}$
     1494    const int rows = 10, cols = 1000;
     1495    int matrix[rows][cols], subtotals[rows], total = 0;
     1496    // read matrix
     1497    Adder * adders[rows];
     1498    for ( int r = 0; r < rows; r += 1 ) {       $\C{// start threads to sum rows}$
    14731499                adders[r] = `new( matrix[r], cols, &subtotals[r] );`
    1474         }
    1475         for ( r; rows ) { $\C{// wait for threads to finish}$
    1476                 `delete( adders[r] );` $\C{// termination join}$
    1477                 total += subtotals[r]; $\C{// total subtotal}$
    1478         }
    1479         sout | total;
    1480 }
    1481 \end{cfa}
    1482 \caption{Concurrent matrix summation}
     1500    }
     1501    for ( int r = 0; r < rows; r += 1 ) {       $\C{// wait for threads to finish}$
     1502                `delete( adders[r] );`                          $\C{// termination join}$
     1503                total += subtotals[r];                          $\C{// total subtotal}$
     1504    }
     1505    sout | total;
     1506}
     1507\end{cfa}
     1508\caption{Concurrent Matrix Summation}
    14831509\label{s:ConcurrentMatrixSummation}
    14841510\end{figure}
    14851511
    14861512
    1487 \subsection{Thread Implementation}
    1488 
    1489 Threads in \CFA are user level run by runtime kernel threads (see Section~\ref{s:Parallelism}), where user threads provide concurrency and kernel threads provide parallelism.
    1490 Like coroutines, and for the same design reasons, \CFA provides a custom @thread@ type and a @trait@ to enforce and restrict the task-interface functions.
    1491 \begin{cquote}
    1492 \begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}}
    1493 \begin{cfa}
    1494 thread myThread {
    1495         ... // declaration/communication variables
    1496 };
    1497 
    1498 
    1499 \end{cfa}
    1500 &
    1501 \begin{cfa}
    1502 trait is_thread( `dtype` T ) {
    1503         void main( T & );
    1504         thread_desc * get_thread( T & );
    1505         void ^?{}( T & `mutex` );
    1506 };
    1507 \end{cfa}
    1508 \end{tabular}
    1509 \end{cquote}
    1510 Like coroutines, the @dtype@ property prevents \emph{implicit} copy operations and the @is_coroutine@ trait provides no \emph{explicit} copy operations, so threads must be passed by reference (pointer).
    1511 Similarly, the function definitions ensures there is a statically-typed @main@ function that is the thread starting point (first stack frame), a mechanism to get (read) the currently executing thread handle, and a special destructor to prevent deallocation while the thread is executing.
    1512 (The qualifier @mutex@ for the destructor parameter is discussed in Section~\ref{s:Monitor}.)
    1513 The difference between the coroutine and thread is that a coroutine borrows a thread from its caller, so the first thread resuming a coroutine creates the coroutine's stack starts running the coroutine main on the stack;
    1514 whereas, a thread is scheduling for execution in @main@ immediately after its constructor is run.
    1515 No return value or additional parameters are necessary for this function because the @thread@ type allows an arbitrary number of interface functions with corresponding arbitrary typed input/output values.
    1516 
    1517 \begin{comment} % put in appendix with coroutine version ???
    1518 As such the @main@ function of a thread can be defined as
    1519 \begin{cfa}
    1520 thread foo {};
    1521 
    1522 void main(foo & this) {
    1523         sout | "Hello World!";
    1524 }
    1525 \end{cfa}
    1526 
    1527 In this example, threads of type @foo@ start execution in the @void main(foo &)@ function, which prints @"Hello World!".@ While this paper encourages this approach to enforce strongly typed programming, users may prefer to use the function-based thread semantics for the sake of simplicity.
    1528 With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously.
    1529 \begin{cfa}
    1530 typedef void (*voidRtn)(int);
    1531 
    1532 thread RtnRunner {
    1533         voidRtn func;
    1534         int arg;
    1535 };
    1536 
    1537 void ?{}(RtnRunner & this, voidRtn inRtn, int arg) {
    1538         this.func = inRtn;
    1539         this.arg  = arg;
    1540 }
    1541 
    1542 void main(RtnRunner & this) {
    1543         // thread starts here and runs the function
    1544         this.func( this.arg );
    1545 }
    1546 
    1547 void hello(/*unused*/ int) {
    1548         sout | "Hello World!";
    1549 }
    1550 
    1551 int main() {
    1552         RtnRunner f = {hello, 42};
    1553         return 0?
    1554 }
    1555 \end{cfa}
    1556 A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \textbf{API}.
    1557 \end{comment}
    1558 
    1559 
    15601513\section{Mutual Exclusion / Synchronization}
    15611514
    1562 Uncontrolled nondeterministic execution produces meaningless results.
    1563 To produce meaningful execution requires clawing back some determinism using mutual exclusion and synchronization, where mutual exclusion provides access-control for threads using shared data, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}.
    1564 Some concurrent systems eliminate mutable shared-state by switching to stateless communication like message passing~\cite{Thoth,Harmony,V-Kernel,MPI} (Erlang, MPI), channels~\cite{CSP} (CSP,Go), actors~\cite{Akka} (Akka, Scala), or functional techniques (Haskell).
    1565 However, in call/return-based languages, these approaches force a clear distinction, \ie introduce a new programming paradigm between regular and concurrent computation, \eg function call versus message passing.
    1566 Hence, a programmer must learn and manipulate two sets of design/programming patterns.
     1515Uncontrolled non-deterministic execution is meaningless.
     1516To reestablish meaningful execution requires mechanisms to reintroduce determinism, \ie restrict non-determinism, called mutual exclusion and synchronization, where mutual exclusion is an access-control mechanism on data shared by threads, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}.
     1517Since many deterministic challenges appear with the use of mutable shared state, some languages/libraries disallow it, \eg Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka~\cite{Akka} (Scala).
     1518In these paradigms, interaction among concurrent objects is performed by stateless message-passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely related to networking concepts, \eg channels~\cite{CSP,Go}.
     1519However, in call/return-based languages, these approaches force a clear distinction, \ie introduce a new programming paradigm between regular and concurrent computation, \eg routine call versus message passing.
     1520Hence, a programmer must learn and manipulate two sets of design patterns.
    15671521While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
    15681522In contrast, approaches based on stateful models more closely resemble the standard call/return programming-model, resulting in a single programming paradigm.
     
    15831537A group of instructions manipulating a specific instance of shared data that must be performed atomically is called an (individual) \newterm{critical-section}~\cite{Dijkstra65}.
    15841538The generalization is called a \newterm{group critical-section}~\cite{Joung00}, where multiple tasks with the same session may use the resource simultaneously, but different sessions may not use the resource simultaneously.
    1585 The readers/writer problem~\cite{Courtois71} is an instance of a group critical-section, where readers share a session but writers have a unique session.
    1586 \newterm{Mutual exclusion} enforces the correct kind and number of threads use a critical section.
     1539The readers/writer problem~\cite{Courtois71} is an instance of a group critical-section, where readers have the same session and all writers have a unique session.
     1540\newterm{Mutual exclusion} enforces that the correct kind and number of threads are using a critical section.
    15871541
    15881542However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use.
     
    15981552Synchronization enforces relative ordering of execution, and synchronization tools provide numerous mechanisms to establish these timing relationships.
    15991553Low-level synchronization primitives offer good performance and flexibility at the cost of ease of use;
    1600 higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg receive-specific versus receive-any thread in message passing or offering specialized solutions, \eg barrier lock.
    1601 Often synchronization is used to order access to a critical section, \eg ensuring a waiting writer thread enters the critical section before a calling reader thread.
    1602 If the calling reader is scheduled before the waiting writer, the reader has \newterm{barged}.
     1554higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg message passing, or offering a simpler solution to otherwise involved challenges, \eg barrier lock.
     1555Often synchronization is used to order access to a critical section, \eg ensuring a reader thread is the next kind of thread to enter a critical section.
     1556If a writer thread is scheduled for next access, but another reader thread acquires the critical section first, that reader \newterm{barged}.
    16031557Barging can result in staleness/freshness problems, where a reader barges ahead of a writer and reads temporally stale data, or a writer barges ahead of another writer overwriting data with a fresh value preventing the previous value from ever being read (lost computation).
    1604 Preventing or detecting barging is an involved challenge with low-level locks, which is made easier through higher-level constructs.
    1605 This challenge is often split into two different approaches: barging avoidance and prevention.
    1606 Algorithms that unconditionally releasing a lock for competing threads to acquire use barging avoidance during synchronization to force a barging thread to wait.
    1607 algorithms that conditionally hold locks during synchronization, \eg baton-passing~\cite{Andrews89}, prevent barging completely.
     1558Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs.
     1559This challenge is often split into two different approaches: barging avoidance and barging prevention.
     1560Algorithms that allow a barger, but divert it until later using current synchronization state (flags), are avoiding the barger;
     1561algorithms that preclude a barger from entering during synchronization in the critical section prevent barging completely.
     1562Techniques like baton-passing locks~\cite{Andrews89} between threads instead of unconditionally releasing locks is an example of barging prevention.
    16081563
    16091564
     
    16111566\label{s:Monitor}
    16121567
    1613 A \textbf{monitor} is a set of functions that ensure mutual exclusion when accessing shared state.
    1614 More precisely, a monitor is a programming technique that binds mutual exclusion to function scope, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation).
    1615 The strong association with the call/return paradigm eases programming, comprehension, and maintenance, at a slight cost in flexibility and efficiency.
    1616 \CFA uses a custom @monitor@ type and leverages declaration semantics (deallocation) to protect active or waiting threads in a monitor.
    1617 
    1618 The following is a \CFA monitor implementation of an atomic counter.
    1619 \newpage
    1620 \begin{cfa}[morekeywords=nomutex]
    1621 `monitor` Aint { int cnt; }; $\C[4.25in]{// atomic integer counter}$
    1622 int ++?( Aint & `mutex`$\(_{opt}\)$ this ) with( this ) { return ++cnt; } $\C{// increment}$
    1623 int ?=?( Aint & `mutex`$\(_{opt}\)$ lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions}\CRT$
    1624 int ?=?( int & lhs, Aint & `mutex`$\(_{opt}\)$ rhs ) with( rhs ) { lhs = cnt; }
    1625 \end{cfa}
    1626 % The @Aint@ constructor, @?{}@, uses the \lstinline[morekeywords=nomutex]@nomutex@ qualifier indicating mutual exclusion is unnecessary during construction because an object is inaccessible (private) until after it is initialized.
    1627 % (While a constructor may publish its address into a global variable, doing so generates a race-condition.)
    1628 The prefix increment operation, @++?@, is normally @mutex@, indicating mutual exclusion is necessary during function execution, to protect the incrementing from race conditions, unless there is an atomic increment instruction for the implementation type.
    1629 The assignment operators provide bi-directional conversion between an atomic and normal integer without accessing field @cnt@;
    1630 these operations only need @mutex@, if reading/writing the implementation type is not atomic.
    1631 The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics, which is similar to the \CC template @std::atomic@.
    1632 \begin{cfa}
    1633 int i = 0, j = 0, k = 5;
    1634 Aint x = { 0 }, y = { 0 }, z = { 5 }; $\C{// no mutex required}$
    1635 ++x; ++y; ++z; $\C{// safe increment by multiple threads}$
    1636 x = 2; y = i; z = k; $\C{// conversions}$
    1637 i = x; j = y; k = z;
    1638 \end{cfa}
    1639 
    1640 \CFA monitors have \newterm{multi-acquire} semantics so the thread in the monitor may acquire it multiple times without deadlock, allowing recursion and calling other interface functions.
    1641 \begin{cfa}
    1642 monitor M { ... } m;
    1643 void foo( M & mutex m ) { ... } $\C{// acquire mutual exclusion}$
    1644 void bar( M & mutex m ) { $\C{// acquire mutual exclusion}$
    1645         ... `bar( m );` ... `foo( m );` ... $\C{// reacquire mutual exclusion}$
    1646 }
    1647 \end{cfa}
    1648 \CFA monitors also ensure the monitor lock is always released regardless of how an acquiring function ends (normal or exceptional), and returning a shared variable is safe via copying before the lock is released.
    1649 Similar safety is offered by explicit mechanisms like \CC RAII;
    1650 monitor implicit safety ensures no programmer error.
    1651 Furthermore, RAII mechansims cannot handle complex synchronization within a monitor, where the monitor lock may not be released on function exit but passed to an unblocking thread.
    1652 RAII is purely a mutual-exclusion mechanism (see Section~\ref{s:Scheduling}).
    1653 
    1654 
    1655 \subsection{Monitor Implementation}
    1656 
    1657 For the same design reasons, \CFA provides a custom @monitor@ type and a @trait@ to enforce and restrict the monitor-interface functions.
    1658 \begin{cquote}
    1659 \begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}}
    1660 \begin{cfa}
    1661 monitor M {
    1662         ... // shared data
    1663 };
    1664 
    1665 \end{cfa}
    1666 &
     1568A \textbf{monitor} is a set of routines that ensure mutual exclusion when accessing shared state.
     1569More precisely, a monitor is a programming technique that binds mutual exclusion to routine scope, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation).
     1570The strong association with the call/return paradigm eases programmability, readability and maintainability, at a slight cost in flexibility and efficiency.
     1571
     1572Note, like coroutines/threads, both locks and monitors require an abstract handle to reference them, because at their core, both mechanisms are manipulating non-copyable shared-state.
     1573Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data.
     1574Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity.
     1575As for coroutines/tasks, the @dtype@ property provides no implicit copying operations and the @is_monitor@ trait provides no explicit copying operations, so all locks/monitors must be passed by reference (pointer).
    16671576\begin{cfa}
    16681577trait is_monitor( `dtype` T ) {
     
    16711580};
    16721581\end{cfa}
    1673 \end{tabular}
    1674 \end{cquote}
    1675 Like before, the @dtype@ property prevents \emph{implicit} copy operations and the @is_monitor@ trait provides no \emph{explicit} copy operations, so monitors must be passed by reference (pointer).
    1676 % Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data.
    1677 % Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity.
    1678 Similarly, the function definitions ensures there is a mechanism to get (read) the currently executing monitor handle, and a special destructor to prevent deallocation if a thread using the shared data.
    1679 The custom monitor type also inserts any locking vehicles needed to implement the monitor locking semnatics.
    16801582
    16811583
     
    16831585\label{s:MutexAcquisition}
    16841586
    1685 While the monitor lock provides mutual exclusion for shared data, there are implementation options for when and where the locking/unlocking occurs.
     1587While correctness implies a monitor's mutual exclusion is acquired and released, there are implementation options about when and where the locking/unlocking occurs.
    16861588(Much of this discussion also applies to basic locks.)
    1687 For example, a monitor may be passed through multiple helper functions before it is necessary to acquire the monitor's mutual exclusion.
     1589For example, a monitor may need to be passed through multiple helper routines before it becomes necessary to acquire the monitor mutual-exclusion.
     1590\begin{cfa}[morekeywords=nomutex]
     1591monitor Aint { int cnt; };                                      $\C{// atomic integer counter}$
     1592void ?{}( Aint & `nomutex` this ) with( this ) { cnt = 0; } $\C{// constructor}$
     1593int ?=?( Aint & `mutex`$\(_{opt}\)$ lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions}$
     1594void ?{}( int & this, Aint & `mutex`$\(_{opt}\)$ v ) { this = v.cnt; }
     1595int ?=?( int & lhs, Aint & `mutex`$\(_{opt}\)$ rhs ) with( rhs ) { lhs = cnt; }
     1596int ++?( Aint & `mutex`$\(_{opt}\)$ this ) with( this ) { return ++cnt; } $\C{// increment}$
     1597\end{cfa}
     1598The @Aint@ constructor, @?{}@, uses the \lstinline[morekeywords=nomutex]@nomutex@ qualifier indicating mutual exclusion is unnecessary during construction because an object is inaccessible (private) until after it is initialized.
     1599(While a constructor may publish its address into a global variable, doing so generates a race-condition.)
     1600The conversion operators for initializing and assigning with a normal integer only need @mutex@, if reading/writing the implementation type is not atomic.
     1601Finally, the prefix increment operato, @++?@, is normally @mutex@ to protect the incrementing from race conditions, unless there is an atomic increment instruction for the implementation type.
     1602
     1603The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics, which is similar to the \CC template @std::atomic@.
     1604\begin{cfa}
     1605Aint x, y, z;
     1606++x; ++y; ++z;                                                          $\C{// safe increment by multiple threads}$
     1607x = 2; y = 2; z = 2;                                            $\C{// conversions}$
     1608int i = x, j = y, k = z;
     1609i = x; j = y; k = z;
     1610\end{cfa}
     1611
     1612For maximum usability, monitors have \newterm{multi-acquire} semantics allowing a thread to acquire it multiple times without deadlock.
     1613\begin{cfa}
     1614monitor M { ... } m;
     1615void foo( M & mutex m ) { ... }                         $\C{// acquire mutual exclusion}$
     1616void bar( M & mutex m ) {                                       $\C{// acquire mutual exclusion}$
     1617        ... `foo( m );` ...                                             $\C{// reacquire mutual exclusion}$
     1618}
     1619`bar( m );`                                                                     $\C{// nested monitor call}$
     1620\end{cfa}
    16881621
    16891622The benefit of mandatory monitor qualifiers is self-documentation, but requiring both @mutex@ and \lstinline[morekeywords=nomutex]@nomutex@ for all monitor parameters is redundant.
     
    16961629
    16971630The next semantic decision is establishing which parameter \emph{types} may be qualified with @mutex@.
    1698 The following has monitor parameter types that are composed of multiple objects.
     1631Given:
    16991632\begin{cfa}
    17001633monitor M { ... }
    1701 int f1( M & mutex m ); $\C{// single parameter object}$
    1702 int f2( M * mutex m ); $\C{// single or multiple parameter object}$
    1703 int f3( M * mutex m[$\,$] ); $\C{// multiple parameter object}$
    1704 int f4( stack( M * ) & mutex m ); $\C{// multiple parameters object}$
    1705 \end{cfa}
    1706 Function @f1@ has a single parameter object, while @f2@'s indirection could be a single or multi-element array, where static array size is often unknown in C.
    1707 Function @f3@ has a multiple object matrix, and @f4@ a multiple object data structure.
    1708 While shown shortly, multiple object acquisition is possible, but the number of objects must be statically known.
    1709 Therefore, \CFA only acquires one monitor per parameter with at most one level of indirection, excluding pointers as it is impossible to statically determine the size.
    1710 
    1711 For object-oriented monitors, \eg Java, calling a mutex member \emph{implicitly} acquires mutual exclusion of the receiver object, @`rec`.foo(...)@.
    1712 \CFA has no receiver, and hence, the explicit @mutex@ qualifier is used to specify which objects acquire mutual exclusion.
    1713 A positive consequence of this design decision is the ability to support multi-monitor functions,\footnote{
    1714 While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.}
    1715 called \newterm{bulk acquire} (see Section~\ref{s:Implementation} for implementation details).
    1716 \CFA guarantees acquisition order is consistent across calls to @mutex@ functions using the same monitors as arguments, so acquiring multiple monitors is safe from deadlock.
    1717 Figure~\ref{f:BankTransfer} shows a trivial solution to the bank-account transfer problem using \CFA monitors with implicit locking and \CC with explicit locking.
    1718 A \CFA programmer only has to manage when to acquire mutual exclusion;
    1719 a \CC programmer must select the correct lock and acquisition mechanism from a panoply of locking options.
    1720 Making good choices for common cases in \CFA simplifies the programming experience and enhances safety.
    1721 
    1722 \begin{figure}
    1723 \centering
    1724 \begin{lrbox}{\myboxA}
    1725 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    1726 monitor BankAccount {
    1727 
    1728         int balance;
    1729 } b1 = { 0 }, b2 = { 0 };
    1730 void deposit( BankAccount & `mutex` b,
    1731                         int deposit ) with(b) {
    1732         balance += deposit;
    1733 }
    1734 void transfer( BankAccount & `mutex` my,
    1735         BankAccount & `mutex` your, int me2you ) {
    1736 
    1737         deposit( my, -me2you ); // debit
    1738         deposit( your, me2you ); // credit
    1739 }
    1740 `thread` Person { BankAccount & b1, & b2; };
    1741 void main( Person & person ) with(person) {
    1742         for ( 10_000_000 ) {
    1743                 if ( random() % 3 ) deposit( b1, 3 );
    1744                 if ( random() % 3 ) transfer( b1, b2, 7 );
    1745         }
    1746 }   
    1747 int main() {
    1748         `Person p1 = { b1, b2 }, p2 = { b2, b1 };`
    1749 
    1750 } // wait for threads to complete
    1751 \end{cfa}
    1752 \end{lrbox}
    1753 
    1754 \begin{lrbox}{\myboxB}
    1755 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    1756 struct BankAccount {
    1757         `recursive_mutex m;`
    1758         int balance = 0;
    1759 } b1, b2;
    1760 void deposit( BankAccount & b, int deposit ) {
    1761         `scoped_lock lock( b.m );`
    1762         b.balance += deposit;
    1763 }
    1764 void transfer( BankAccount & my,
    1765                         BankAccount & your, int me2you ) {
    1766         `scoped_lock lock( my.m, your.m );`
    1767         deposit( my, -me2you ); // debit
    1768         deposit( your, me2you ); // credit
    1769 }
    1770 
    1771 void person( BankAccount & b1, BankAccount & b2 ) {
    1772         for ( int i = 0; i < 10$'$000$'$000; i += 1 ) {
    1773                 if ( random() % 3 ) deposit( b1, 3 );
    1774                 if ( random() % 3 ) transfer( b1, b2, 7 );
    1775         }
    1776 }   
    1777 int main() {
    1778         `thread p1(person, ref(b1), ref(b2)), p2(person, ref(b2), ref(b1));`
    1779         `p1.join(); p2.join();`
    1780 }
    1781 \end{cfa}
    1782 \end{lrbox}
    1783 
    1784 \subfloat[\CFA]{\label{f:CFABank}\usebox\myboxA}
    1785 \hspace{3pt}
    1786 \vrule
    1787 \hspace{3pt}
    1788 \subfloat[\CC]{\label{f:C++Bank}\usebox\myboxB}
    1789 \hspace{3pt}
    1790 \caption{Bank transfer problem}
    1791 \label{f:BankTransfer}
    1792 \end{figure}
    1793 
    1794 Users can force the acquiring order, by using @mutex@/\lstinline[morekeywords=nomutex]@nomutex@.
    1795 \begin{cfa}
    1796 void foo( M & mutex m1, M & mutex m2 ); $\C{// acquire m1 and m2}$
     1634int f1( M & mutex m );
     1635int f2( M * mutex m );
     1636int f3( M * mutex m[] );
     1637int f4( stack( M * ) & mutex m );
     1638\end{cfa}
     1639the issue is that some of these parameter types are composed of multiple objects.
     1640For @f1@, there is only a single parameter object.
     1641Adding indirection in @f2@ still identifies a single object.
     1642However, the matrix in @f3@ introduces multiple objects.
     1643While shown shortly, multiple acquisition is possible;
     1644however array lengths are often unknown in C.
     1645This issue is exacerbated in @f4@, where the data structure must be safely traversed to acquire all of its elements.
     1646
     1647To make the issue tractable, \CFA only acquires one monitor per parameter with at most one level of indirection.
     1648However, there is an ambiguity in the C type-system with respects to arrays.
     1649Is the argument for @f2@ a single object or an array of objects?
     1650If it is an array, only the first element of the array is acquired, which seems unsafe;
     1651hence, @mutex@ is disallowed for array parameters.
     1652\begin{cfa}
     1653int f1( M & mutex m );                                          $\C{// allowed: recommended case}$
     1654int f2( M * mutex m );                                          $\C{// disallowed: could be an array}$
     1655int f3( M mutex m[$\,$] );                                      $\C{// disallowed: array length unknown}$
     1656int f4( M ** mutex m );                                         $\C{// disallowed: could be an array}$
     1657int f5( M * mutex m[$\,$] );                            $\C{// disallowed: array length unknown}$
     1658\end{cfa}
     1659% Note, not all array routines have distinct types: @f2@ and @f3@ have the same type, as do @f4@ and @f5@.
     1660% However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
     1661
     1662For object-oriented monitors, calling a mutex member \emph{implicitly} acquires mutual exclusion of the receiver object, @`rec`.foo(...)@.
     1663\CFA has no receiver, and hence, must use an explicit mechanism to specify which object acquires mutual exclusion.
     1664A positive consequence of this design decision is the ability to support multi-monitor routines.
     1665\begin{cfa}
     1666int f( M & mutex x, M & mutex y );              $\C{// multiple monitor parameter of any type}$
     1667M m1, m2;
     1668f( m1, m2 );
     1669\end{cfa}
     1670(While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.)
     1671In practice, writing multi-locking routines that do not deadlock is tricky.
     1672Having language support for such a feature is therefore a significant asset for \CFA.
     1673
     1674The capability to acquire multiple locks before entering a critical section is called \newterm{bulk acquire} (see Section~\ref{s:Implementation} for implementation details).
     1675In the previous example, \CFA guarantees the order of acquisition is consistent across calls to different routines using the same monitors as arguments.
     1676This consistent ordering means acquiring multiple monitors is safe from deadlock.
     1677However, users can force the acquiring order.
     1678For example, notice the use of @mutex@/\lstinline[morekeywords=nomutex]@nomutex@ and how this affects the acquiring order:
     1679\begin{cfa}
     1680void foo( M & mutex m1, M & mutex m2 );         $\C{// acquire m1 and m2}$
    17971681void bar( M & mutex m1, M & /* nomutex */ m2 ) { $\C{// acquire m1}$
    1798         ... foo( m1, m2 ); ... $\C{// acquire m2}$
     1682        ... foo( m1, m2 ); ...                                  $\C{// acquire m2}$
    17991683}
    18001684void baz( M & /* nomutex */ m1, M & mutex m2 ) { $\C{// acquire m2}$
    1801         ... foo( m1, m2 ); ... $\C{// acquire m1}$
    1802 }
    1803 \end{cfa}
    1804 The bulk-acquire semantics allow @bar@ or @baz@ to acquire a monitor lock and reacquire it in @foo@.
    1805 In the calls to @bar@ and @baz@, the monitors are acquired in opposite order, resulting in deadlock.
    1806 However, this case is the simplest instance of the \emph{nested-monitor problem}~\cite{Lister77}, where monitors are acquired in sequence versus bulk.
    1807 Detecting the nested-monitor problem requires dynamic tracking of monitor calls, and dealing with it requires rollback semantics~\cite{Dice10}.
    1808 \CFA does not deal with this fundamental problem.
     1685        ... foo( m1, m2 ); ...                                  $\C{// acquire m1}$
     1686}
     1687\end{cfa}
     1688The multi-acquire semantics allows @bar@ or @baz@ to acquire a monitor lock and reacquire it in @foo@.
     1689In the calls to @bar@ and @baz@, the monitors are acquired in opposite order.
     1690
     1691However, such use leads to lock acquiring order problems resulting in deadlock~\cite{Lister77}, where detecting it requires dynamic tracking of monitor calls, and dealing with it requires rollback semantics~\cite{Dice10}.
     1692In \CFA, a safety aid is provided by using bulk acquire of all monitors to shared objects, whereas other monitor systems provide no aid.
     1693While \CFA provides only a partial solution, it handles many useful cases, \eg:
     1694\begin{cfa}
     1695monitor BankAccount { ... };
     1696void deposit( BankAccount & `mutex` b, int deposit );
     1697void transfer( BankAccount & `mutex` my, BankAccount & `mutex` your, int me2you ) {
     1698        deposit( my, `-`me2you );                               $\C{// debit}$
     1699        deposit( your, me2you );                                $\C{// credit}$
     1700}
     1701\end{cfa}
     1702This example shows a trivial solution to the bank-account transfer problem.
     1703Without multi- and bulk acquire, the solution to this problem requires careful engineering.
    18091704
    18101705
     
    18121707\label{mutex-stmt}
    18131708
    1814 The monitor call-semantics associate all locking semantics with functions.
     1709The monitor call-semantics associate all locking semantics to routines.
    18151710Like Java, \CFA offers an alternative @mutex@ statement to reduce refactoring and naming.
    18161711\begin{cquote}
    18171712\begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
    1818 \multicolumn{1}{c}{\textbf{\lstinline@mutex@ call}} & \multicolumn{1}{c}{\lstinline@mutex@ \textbf{statement}} \\
    18191713\begin{cfa}
    18201714monitor M { ... };
     
    18361730
    18371731\end{cfa}
     1732\\
     1733\multicolumn{1}{c}{\textbf{routine call}} & \multicolumn{1}{c}{\lstinline@mutex@ \textbf{statement}}
    18381734\end{tabular}
    18391735\end{cquote}
    18401736
    18411737
    1842 \subsection{Scheduling}
     1738\section{Scheduling}
    18431739\label{s:Scheduling}
    18441740
    1845 While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed, \eg a bounded buffer may be full/empty so produce/consumer threads must block.
     1741While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed.
     1742For example, Figure~\ref{f:GenericBoundedBuffer} shows a bounded buffer that may be full/empty so produce/consumer threads must block.
    18461743Leaving the monitor and trying again (busy waiting) is impractical for high-level programming.
    18471744Monitors eliminate busy waiting by providing synchronization to schedule threads needing access to the shared data, where threads block versus spinning.
    18481745Synchronization is generally achieved with internal~\cite{Hoare74} or external~\cite[\S~2.9.2]{uC++} scheduling, where \newterm{scheduling} defines which thread acquires the critical section next.
    18491746\newterm{Internal scheduling} is characterized by each thread entering the monitor and making an individual decision about proceeding or blocking, while \newterm{external scheduling} is characterized by an entering thread making a decision about proceeding for itself and on behalf of other threads attempting entry.
    1850 Finally, \CFA monitors do not allow calling threads to barge ahead of signalled threads, which simplifies synchronization among threads in the monitor and increases correctness.
    1851 If barging is allowed, synchronization between a signaller and signallee is difficult, often requiring additional flags and multiple unblock/block cycles.
    1852 In fact, signals-as-hints is completely opposite from that proposed by Hoare in the seminal paper on monitors:
    1853 \begin{cquote}
    1854 However, we decree that a signal operation be followed immediately by resumption of a waiting program, without possibility of an intervening procedure call from yet a third program.
    1855 It is only in this way that a waiting program has an absolute guarantee that it can acquire the resource just released by the signalling program without any danger that a third program will interpose a monitor entry and seize the resource instead.~\cite[p.~550]{Hoare74}
    1856 \end{cquote}
    1857 Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implicit form of barging.
    1858 Hence, a \CFA @wait@ statement is not enclosed in a @while@ loop that can cause thread starvation.
    18591747
    18601748Figure~\ref{f:BBInt} shows a \CFA generic bounded-buffer with internal scheduling, where producers/consumers enter the monitor, see the buffer is full/empty, and block on an appropriate condition lock, @full@/@empty@.
    1861 The @wait@ function atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the function's parameter list.
     1749The @wait@ routine atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the routine's parameter list.
    18621750The appropriate condition lock is signalled to unblock an opposite kind of thread after an element is inserted/removed from the buffer.
    18631751Signalling is unconditional, because signalling an empty condition lock does nothing.
     
    19371825\end{lrbox}
    19381826
    1939 \subfloat[Internal scheduling]{\label{f:BBInt}\usebox\myboxA}
     1827\subfloat[Internal Scheduling]{\label{f:BBInt}\usebox\myboxA}
    19401828%\qquad
    1941 \subfloat[External scheduling]{\label{f:BBExt}\usebox\myboxB}
    1942 \caption{Generic bounded-buffer}
     1829\subfloat[External Scheduling]{\label{f:BBExt}\usebox\myboxB}
     1830\caption{Generic Bounded-Buffer}
    19431831\label{f:GenericBoundedBuffer}
    19441832\end{figure}
    19451833
    1946 Figure~\ref{f:BBExt} shows a \CFA generic bounded-buffer with external scheduling, where producers/consumers detecting a full/\-empty buffer block and prevent more producers/consumers from entering the monitor until there is a free/empty slot in the buffer.
    1947 External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the function calls that can next acquire mutual exclusion.
     1834Figure~\ref{f:BBExt} shows a \CFA generic bounded-buffer with external scheduling, where producers/consumers detecting a full/empty buffer block and prevent more producers/consumers from entering the monitor until there is a free/empty slot in the buffer.
     1835External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the routine calls that can next acquire mutual exclusion.
    19481836If the buffer is full, only calls to @remove@ can acquire the buffer, and if the buffer is empty, only calls to @insert@ can acquire the buffer.
    1949 Threads making calls to functions that are currently excluded, block outside of (external to) the monitor on a calling queue, versus blocking on condition queues inside of (internal to) the monitor.
     1837Threads making calls to routines that are currently excluded, block outside of (external to) the monitor on a calling queue, versus blocking on condition queues inside of (internal to) the monitor.
    19501838External scheduling allows users to wait for events from other threads without concern of unrelated events occurring.
    1951 The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go @select@ on channels.
     1839The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go channels.
    19521840While both mechanisms have strengths and weaknesses, this project uses a control-flow mechanism to stay consistent with other language semantics.
    1953 Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor functions (see Section~\ref{s:Multi-MonitorScheduling}).
     1841Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor routines (see Section~\ref{s:Multi-MonitorScheduling}).
    19541842
    19551843For internal scheduling, non-blocking signalling (as in the producer/consumer example) is used when the signaller is providing the cooperation for a waiting thread;
     
    19661854For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the opposite number, post its phone number, and unblock the partner.
    19671855For signal-block scheduling, the implicit urgent-queue replaces the explict @exchange@-condition and @signal_block@ puts the finding thread on the urgent condition and unblocks the matcher.
     1856
    19681857The dating service is an example of a monitor that cannot be written using external scheduling because it requires knowledge of calling parameters to make scheduling decisions, and parameters of waiting threads are unavailable;
    19691858as well, an arriving thread may not find a partner and must wait, which requires a condition variable, and condition variables imply internal scheduling.
    1970 Furthermore, barging corrupts the dating service during an exchange because a barger may also match and change the phone numbers, invalidating the previous exchange phone number.
    1971 Putting loops around the @wait@s does not correct the problem;
    1972 the solution must be restructured to account for barging.
    19731859
    19741860\begin{figure}
     
    20311917\qquad
    20321918\subfloat[\lstinline@signal_block@]{\label{f:DatingSignalBlock}\usebox\myboxB}
    2033 \caption{Dating service}
     1919\caption{Dating service. }
    20341920\label{f:DatingService}
    20351921\end{figure}
     
    20601946To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @wait( e, m1 )@.
    20611947Wait statically verifies the released monitors are the acquired mutex-parameters so unconditional release is safe.
    2062 While \CC supports bulk locking, @wait@ only accepts a single lock for a condition variable, so bulk locking with condition variables is asymmetric.
    20631948Finally, a signaller,
    20641949\begin{cfa}
     
    20711956Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex types in the parameter list, \ie @waitfor( rtn, m1, m2 )@.
    20721957To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn, m1 )@.
    2073 @waitfor@ statically verifies the released monitors are the same as the acquired mutex-parameters of the given function or function pointer.
    2074 To statically verify the released monitors match with the accepted function's mutex parameters, the function (pointer) prototype must be accessible.
    2075 % When an overloaded function appears in an @waitfor@ statement, calls to any function with that name are accepted.
     1958@waitfor@ statically verifies the released monitors are the same as the acquired mutex-parameters of the given routine or routine pointer.
     1959To statically verify the released monitors match with the accepted routine's mutex parameters, the routine (pointer) prototype must be accessible.
     1960% When an overloaded routine appears in an @waitfor@ statement, calls to any routine with that name are accepted.
    20761961% The rationale is that members with the same name should perform a similar function, and therefore, all should be eligible to accept a call.
    2077 Overloaded functions can be disambiguated using a cast
     1962Overloaded routines can be disambiguated using a cast:
    20781963\begin{cfa}
    20791964void rtn( M & mutex m );
     
    20891974        ... signal( `e` ); ...
    20901975\end{cfa}
    2091 The @wait@ only releases @m1@ so the signalling thread cannot acquire @m1@ and @m2@ to enter @bar@ and @signal@ the condition.
    2092 While deadlock can occur with multiple/nesting acquisition, this is a consequence of locks, and by extension monitors, not being perfectly composable.
    2093 
    2094 % Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
     1976The @wait@ only releases @m1@ so the signalling thread cannot acquire both @m1@ and @m2@ to  enter @bar@ to get to the @signal@.
     1977While deadlock issues can occur with multiple/nesting acquisition, this issue results from the fact that locks, and by extension monitors, are not perfectly composable.
     1978
     1979Finally, an important aspect of monitor implementation is barging, \ie can calling threads barge ahead of signalled threads?
     1980If barging is allowed, synchronization between a signaller and signallee is difficult, often requiring multiple unblock/block cycles (looping around a wait rechecking if a condition is met).
     1981In fact, signals-as-hints is completely opposite from that proposed by Hoare in the seminal paper on monitors:
     1982\begin{quote}
     1983However, we decree that a signal operation be followed immediately by resumption of a waiting program, without possibility of an intervening procedure call from yet a third program.
     1984It is only in this way that a waiting program has an absolute guarantee that it can acquire the resource just released by the signalling program without any danger that a third program will interpose a monitor entry and seize the resource instead.~\cite[p.~550]{Hoare74}
     1985\end{quote}
     1986\CFA scheduling \emph{precludes} barging, which simplifies synchronization among threads in the monitor and increases correctness.
     1987Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implict form of barging.
     1988For example, there are no loops in either bounded buffer solution in Figure~\ref{f:GenericBoundedBuffer}.
     1989Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
    20951990
    20961991
     
    21542049\begin{cquote}
    21552050\subfloat[Signalling Thread]{\label{f:SignallingThread}\usebox\myboxA}
    2156 \hspace{3\parindentlnth}
     2051\hspace{2\parindentlnth}
    21572052\subfloat[Waiting Thread (W1)]{\label{f:WaitingThread}\usebox\myboxB}
    21582053\hspace{2\parindentlnth}
     
    21812076In an object-oriented programming-language, a class includes an exhaustive list of operations.
    21822077However, new members can be added via static inheritance or dynamic members, \eg JavaScript~\cite{JavaScript}.
    2183 Similarly, monitor functions can be added at any time in \CFA, making it less clear for programmers and more difficult to implement.
     2078Similarly, monitor routines can be added at any time in \CFA, making it less clear for programmers and more difficult to implement.
    21842079\begin{cfa}
    21852080monitor M { ... };
    21862081void `f`( M & mutex m );
    2187 void g( M & mutex m ) { waitfor( `f` ); } $\C{// clear which f}$
    2188 void `f`( M & mutex m, int ); $\C{// different f}$
    2189 void h( M & mutex m ) { waitfor( `f` ); } $\C{// unclear which f}$
    2190 \end{cfa}
    2191 Hence, the \CFA code for entering a monitor looks like:
    2192 \begin{cfa}
    2193 if ( $\textrm{\textit{monitor is free}}$ ) $\C{// enter}$
    2194 else if ( $\textrm{\textit{already own monitor}}$ ) $\C{// continue}$
    2195 else if ( $\textrm{\textit{monitor accepts me}}$ ) $\C{// enter}$
    2196 else $\C{// block}$
     2082void g( M & mutex m ) { waitfor( `f` ); }       $\C{// clear which f}$
     2083void `f`( M & mutex m, int );                           $\C{// different f}$
     2084void h( M & mutex m ) { waitfor( `f` ); }       $\C{// unclear which f}$
     2085\end{cfa}
     2086Hence, the cfa-code for entering a monitor looks like:
     2087\begin{cfa}
     2088if ( $\textrm{\textit{monitor is free}}$ ) $\LstCommentStyle{// \color{red}enter}$
     2089else if ( $\textrm{\textit{already own monitor}}$ ) $\LstCommentStyle{// \color{red}continue}$
     2090else if ( $\textrm{\textit{monitor accepts me}}$ ) $\LstCommentStyle{// \color{red}enter}$
     2091else $\LstCommentStyle{// \color{red}block}$
    21972092\end{cfa}
    21982093For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions.
     
    22112106{\resizebox{0.45\textwidth}{!}{\input{ext_monitor.pstex_t}}}
    22122107}% subfloat
    2213 \caption{Monitor implementation}
     2108\caption{Monitor Implementation}
    22142109\label{f:MonitorImplementation}
    22152110\end{figure}
    22162111
    2217 For a fixed (small) number of mutex functions (\eg 128), the accept check reduces to a bitmask of allowed callers, which can be checked with a single instruction.
    2218 This approach requires a unique dense ordering of functions with a small upper-bound and the ordering must be consistent across translation units.
    2219 For object-oriented languages these constraints are common, but \CFA mutex functions can be added in any scope and are only visible in certain translation unit, precluding program-wide dense-ordering among mutex functions.
     2112For a fixed (small) number of mutex routines (\eg 128), the accept check reduces to a bitmask of allowed callers, which can be checked with a single instruction.
     2113This approach requires a unique dense ordering of routines with a small upper-bound and the ordering must be consistent across translation units.
     2114For object-oriented languages these constraints are common, but \CFA mutex routines can be added in any scope and are only visible in certain translation unit, precluding program-wide dense-ordering among mutex routines.
    22202115
    22212116Figure~\ref{fig:BulkMonitor} shows the \CFA monitor implementation.
    2222 The mutex function called is associated with each thread on the entry queue, while a list of acceptable functions is kept separately.
    2223 The accepted list is a variable-sized array of accepted function pointers, so the single instruction bitmask comparison is replaced by dereferencing a pointer followed by a (usually short) linear search.
     2117The mutex routine called is associated with each thread on the entry queue, while a list of acceptable routines is kept separately.
     2118The accepted list is a variable-sized array of accepted routine pointers, so the single instruction bitmask comparison is replaced by dereferencing a pointer followed by a (usually short) linear search.
    22242119
    22252120
     
    22292124External scheduling, like internal scheduling, becomes significantly more complex for multi-monitor semantics.
    22302125Even in the simplest case, new semantics needs to be established.
     2126\newpage
    22312127\begin{cfa}
    22322128monitor M { ... };
    22332129void f( M & mutex m1 );
    2234 void g( M & mutex m1, M & mutex m2 ) { `waitfor( f );` } $\C{// pass m1 or m2 to f?}$
     2130void g( M & mutex m1, M & mutex m2 ) {
     2131        waitfor( f );                                                   $\C{\color{red}// pass m1 or m2 to f?}$
     2132}
    22352133\end{cfa}
    22362134The solution is for the programmer to disambiguate:
    22372135\begin{cfa}
    2238 waitfor( f, `m2` ); $\C{// wait for call to f with argument m2}$
    2239 \end{cfa}
    2240 Both locks are acquired by function @g@, so when function @f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@.
     2136        waitfor( f, m2 );                                               $\C{\color{red}// wait for call to f with argument m2}$
     2137\end{cfa}
     2138Both locks are acquired by routine @g@, so when routine @f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@.
    22412139This behaviour can be extended to the multi-monitor @waitfor@ statement.
    22422140\begin{cfa}
    22432141monitor M { ... };
    22442142void f( M & mutex m1, M & mutex m2 );
    2245 void g( M & mutex m1, M & mutex m2 ) { waitfor( f, `m1, m2` ); $\C{// wait for call to f with arguments m1 and m2}$
    2246 \end{cfa}
    2247 Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting function.
     2143void g( M & mutex m1, M & mutex m2 ) {
     2144        waitfor( f, m1, m2 );                                   $\C{\color{red}// wait for call to f with arguments m1 and m2}$
     2145}
     2146\end{cfa}
     2147Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting routine.
    22482148Also, the order of the monitors in a @waitfor@ statement is unimportant.
    22492149
     
    22522152
    22532153\begin{figure}
    2254 \centering
    2255 \begin{lrbox}{\myboxA}
    2256 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2154\lstDeleteShortInline@%
     2155\begin{tabular}{@{}l@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}}
     2156\begin{cfa}
    22572157monitor M1 {} m11, m12;
    22582158monitor M2 {} m2;
     
    22672167f( `m12`, m2 ); // cannot fulfil
    22682168\end{cfa}
    2269 \end{lrbox}
    2270 
    2271 \begin{lrbox}{\myboxB}
    2272 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2169&
     2170\begin{cfa}
    22732171monitor M1 {} m11, m12;
    22742172monitor M2 {} m2;
     
    22832181f( `m12`, m2 ); // cannot fulfil
    22842182\end{cfa}
    2285 \end{lrbox}
    2286 \subfloat[Internal scheduling]{\label{f:InternalScheduling}\usebox\myboxA}
    2287 \hspace{3pt}
    2288 \vrule
    2289 \hspace{3pt}
    2290 \subfloat[External scheduling]{\label{f:ExternalScheduling}\usebox\myboxB}
     2183\end{tabular}
     2184\lstMakeShortInline@%
    22912185\caption{Unmatched \protect\lstinline@mutex@ sets}
    22922186\label{f:UnmatchedMutexSets}
     
    22962190\subsection{Extended \protect\lstinline@waitfor@}
    22972191
    2298 Figure~\ref{f:ExtendedWaitfor} show the extended form of the @waitfor@ statement to conditionally accept one of a group of mutex functions, with a specific action to be performed \emph{after} the mutex function finishes.
     2192Figure~\ref{f:ExtendedWaitfor} show the extended form of the @waitfor@ statement to conditionally accept one of a group of mutex routines, with a specific action to be performed \emph{after} the mutex routine finishes.
    22992193For a @waitfor@ clause to be executed, its @when@ must be true and an outstanding call to its corresponding member(s) must exist.
    2300 The \emph{conditional-expression} of a @when@ may call a function, but the function must not block or context switch.
    2301 If there are multiple acceptable mutex calls, selection occurs top-to-bottom (prioritized) in the @waitfor@ clauses, whereas some programming languages with similar mechanisms accept nondeterministically for this case, \eg Go \lstinline[morekeywords=select]@select@.
     2194The \emph{conditional-expression} of a @when@ may call a routine, but the routine must not block or context switch.
     2195If there are multiple acceptable mutex calls, selection occurs top-to-bottom (prioritized) in the @waitfor@ clauses, whereas some programming languages with similar mechanisms accept non-deterministically for this case, \eg Go \lstinline[morekeywords=select]@select@.
    23022196If some accept guards are true and there are no outstanding calls to these members, the acceptor is accept-blocked until a call to one of these members is made.
    23032197If all the accept guards are false, the statement does nothing, unless there is a terminating @else@ clause with a true guard, which is executed instead.
     
    23082202
    23092203\begin{figure}
    2310 \centering
    23112204\begin{cfa}
    23122205`when` ( $\emph{conditional-expression}$ )      $\C{// optional guard}$
     
    23332226else if ( C2 ) waitfor( mem2 );         or when ( C2 ) waitfor( mem2 );
    23342227\end{cfa}
    2335 The left example only accepts @mem1@ if @C1@ is true or only @mem2@ if @C2@ is true.
     2228The left example accepts only @mem1@ if @C1@ is true or only @mem2@ if @C2@ is true.
    23362229The right example accepts either @mem1@ or @mem2@ if @C1@ and @C2@ are true.
    23372230
     
    23532246\subsection{\protect\lstinline@mutex@ Threads}
    23542247
    2355 Threads in \CFA can also be monitors to allow \emph{direct communication} among threads, \ie threads can have mutex functions that are called by other threads.
     2248Threads in \CFA are monitors to allow direct communication among threads, \ie threads can have mutex routines that are called by other threads.
    23562249Hence, all monitor features are available when using threads.
    2357 Figure~\ref{f:DirectCommunication} shows a comparison of direct call communication in \CFA with direct channel communication in Go.
    2358 (Ada provides a similar mechanism to the \CFA direct communication.)
    2359 The program main in both programs communicates directly with the other thread versus indirect communication where two threads interact through a passive monitor.
    2360 Both direct and indirection thread communication are valuable tools in structuring concurrent programs.
    2361 
    2362 \begin{figure}
    2363 \centering
    2364 \begin{lrbox}{\myboxA}
    2365 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2366 
    2367 struct Msg { int i, j; };
    2368 thread Gortn { int i;  float f;  Msg m; };
    2369 void mem1( Gortn & mutex gortn, int i ) { gortn.i = i; }
    2370 void mem2( Gortn & mutex gortn, float f ) { gortn.f = f; }
    2371 void mem3( Gortn & mutex gortn, Msg m ) { gortn.m = m; }
    2372 void ^?{}( Gortn & mutex ) {}
    2373 
    2374 void main( Gortn & gortn ) with( gortn ) {  // thread starts
    2375 
    2376         for () {
    2377 
    2378                 `waitfor( mem1, gortn )` sout | i;  // wait for calls
    2379                 or `waitfor( mem2, gortn )` sout | f;
    2380                 or `waitfor( mem3, gortn )` sout | m.i | m.j;
    2381                 or `waitfor( ^?{}, gortn )` break;
    2382 
    2383         }
    2384 
    2385 }
    2386 int main() {
    2387         Gortn gortn; $\C[2.0in]{// start thread}$
    2388         `mem1( gortn, 0 );` $\C{// different calls}\CRT$
    2389         `mem2( gortn, 2.5 );`
    2390         `mem3( gortn, (Msg){1, 2} );`
    2391 
    2392 
    2393 } // wait for completion
    2394 \end{cfa}
    2395 \end{lrbox}
    2396 
    2397 \begin{lrbox}{\myboxB}
    2398 \begin{Go}[aboveskip=0pt,belowskip=0pt]
    2399 func main() {
    2400         type Msg struct{ i, j int }
    2401 
    2402         ch1 := make( chan int )
    2403         ch2 := make( chan float32 )
    2404         ch3 := make( chan Msg )
    2405         hand := make( chan string )
    2406         shake := make( chan string )
    2407         gortn := func() { $\C[1.5in]{// thread starts}$
    2408                 var i int;  var f float32;  var m Msg
    2409                 L: for {
    2410                         select { $\C{// wait for messages}$
    2411                           case `i = <- ch1`: fmt.Println( i )
    2412                           case `f = <- ch2`: fmt.Println( f )
    2413                           case `m = <- ch3`: fmt.Println( m )
    2414                           case `<- hand`: break L $\C{// sentinel}$
    2415                         }
    2416                 }
    2417                 `shake <- "SHAKE"` $\C{// completion}$
    2418         }
    2419 
    2420         go gortn() $\C{// start thread}$
    2421         `ch1 <- 0` $\C{// different messages}$
    2422         `ch2 <- 2.5`
    2423         `ch3 <- Msg{1, 2}`
    2424         `hand <- "HAND"` $\C{// sentinel value}$
    2425         `<- shake` $\C{// wait for completion}\CRT$
    2426 }
    2427 \end{Go}
    2428 \end{lrbox}
    2429 
    2430 \subfloat[\CFA]{\label{f:CFAwaitfor}\usebox\myboxA}
    2431 \hspace{3pt}
    2432 \vrule
    2433 \hspace{3pt}
    2434 \subfloat[Go]{\label{f:Gochannel}\usebox\myboxB}
    2435 \caption{Direct communication}
    2436 \label{f:DirectCommunication}
    2437 \end{figure}
    2438 
    2439 \begin{comment}
    24402250The following shows an example of two threads directly calling each other and accepting calls from each other in a cycle.
    24412251\begin{cfa}
     2252thread Ping {} pi;
     2253thread Pong {} po;
     2254void ping( Ping & mutex ) {}
     2255void pong( Pong & mutex ) {}
     2256int main() {}
    24422257\end{cfa}
    24432258\vspace{-0.8\baselineskip}
     
    24452260\begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
    24462261\begin{cfa}
    2447 thread Ping {} pi;
    2448 void ping( Ping & mutex ) {}
    24492262void main( Ping & pi ) {
    2450         for ( 10 ) {
     2263        for ( int i = 0; i < 10; i += 1 ) {
    24512264                `waitfor( ping, pi );`
    24522265                `pong( po );`
    24532266        }
    24542267}
    2455 int main() {}
    24562268\end{cfa}
    24572269&
    24582270\begin{cfa}
    2459 thread Pong {} po;
    2460 void pong( Pong & mutex ) {}
    24612271void main( Pong & po ) {
    2462         for ( 10 ) {
     2272        for ( int i = 0; i < 10; i += 1 ) {
    24632273                `ping( pi );`
    24642274                `waitfor( pong, po );`
    24652275        }
    24662276}
    2467 
    24682277\end{cfa}
    24692278\end{tabular}
     
    24742283% \end{figure}
    24752284Note, the ping/pong threads are globally declared, @pi@/@po@, and hence, start (and possibly complete) before the program main starts.
    2476 \end{comment}
    2477 
    2478 
    2479 \subsection{Execution Properties}
    2480 
    2481 Table~\ref{t:ObjectPropertyComposition} shows how the \CFA high-level constructs cover 3 fundamental execution properties: thread, stateful function, and mutual exclusion.
    2482 Case 1 is a basic object, with none of the new execution properties.
    2483 Case 2 allows @mutex@ calls to Case 1 to protect shared data.
    2484 Case 3 allows stateful functions to suspend/resume but restricts operations because the state is stackless.
    2485 Case 4 allows @mutex@ calls to Case 3 to protect shared data.
    2486 Cases 5 and 6 are the same as 3 and 4 without restriction because the state is stackful.
    2487 Cases 7 and 8 are rejected because a thread cannot execute without a stackful state in a preemptive environment when context switching from the signal handler.
    2488 Cases 9 and 10 have a stackful thread without and with @mutex@ calls.
    2489 For situations where threads do not require direct communication, case 9 provides faster creation/destruction by eliminating @mutex@ setup.
    2490 
    2491 \begin{table}[b]
    2492 \caption{Object property composition}
    2493 \centering
    2494 \label{t:ObjectPropertyComposition}
    2495 \renewcommand{\arraystretch}{1.25}
    2496 %\setlength{\tabcolsep}{5pt}
    2497 \begin{tabular}{c|c|l|l}
    2498 \multicolumn{2}{c|}{object properties} & \multicolumn{2}{c}{mutual exclusion} \\
    2499 \hline
    2500 thread  & stateful                              & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
    2501 \hline
    2502 \hline
    2503 No              & No                                    & \textbf{1}\ \ \ aggregate type                & \textbf{2}\ \ \ @monitor@ aggregate type \\
    2504 \hline
    2505 No              & Yes (stackless)               & \textbf{3}\ \ \ @generator@                   & \textbf{4}\ \ \ @monitor@ generator \\
    2506 \hline
    2507 No              & Yes (stackful)                & \textbf{5}\ \ \ @coroutine@                   & \textbf{6}\ \ \ @monitor@ @coroutine@ \\
    2508 \hline
    2509 Yes             & No / Yes (stackless)  & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\
    2510 \hline
    2511 Yes             & Yes (stackful)                & \textbf{9}\ \ \ @thread@                              & \textbf{10}\ \ @monitor@ @thread@ \\
    2512 \end{tabular}
    2513 \end{table}
    25142285
    25152286
     
    25172288
    25182289For completeness and efficiency, \CFA provides a standard set of low-level locks: recursive mutex, condition, semaphore, barrier, \etc, and atomic instructions: @fetchAssign@, @fetchAdd@, @testSet@, @compareSet@, \etc.
    2519 However, we strongly advocate using high-level concurrency mechanisms whenever possible.
    25202290
    25212291
    25222292\section{Parallelism}
    2523 \label{s:Parallelism}
    25242293
    25252294Historically, computer performance was about processor speeds.
    25262295However, with heat dissipation being a direct consequence of speed increase, parallelism is the new source for increased performance~\cite{Sutter05, Sutter05b}.
    2527 Therefore, high-performance applications must care about parallelism, which requires concurrency.
     2296Now, high-performance applications must care about parallelism, which requires concurrency.
    25282297The lowest-level approach of parallelism is to use \newterm{kernel threads} in combination with semantics like @fork@, @join@, \etc.
    25292298However, kernel threads are better as an implementation tool because of complexity and higher cost.
     
    25312300
    25322301
    2533 \subsection{User Threads}
     2302\subsection{User Threads with Preemption}
    25342303
    25352304A direct improvement on kernel threads is user threads, \eg Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    2536 This approach provides an interface that matches the language paradigms, gives more control over concurrency by the language runtime, and an abstract (and portable) interface to the underlying kernel threads across operating systems.
     2305This approach provides an interface that matches the language paradigms, more control over concurrency by the language runtime, and an abstract (and portable) interface to the underlying kernel threads across operating systems.
    25372306In many cases, user threads can be used on a much larger scale (100,000 threads).
    2538 Like kernel threads, user threads support preemption, which maximizes nondeterminism, but increases the potential for concurrency errors: race, livelock, starvation, and deadlock.
     2307Like kernel threads, user threads support preemption, which maximizes nondeterminism, but introduces the same concurrency errors: race, livelock, starvation, and deadlock.
    25392308\CFA adopts user-threads as they represent the truest realization of concurrency and can build any other concurrency approach, \eg thread pools and actors~\cite{Actors}.
    25402309
    2541 A variant of user thread is \newterm{fibres}, which removes preemption, \eg Go~\cite{Go} @goroutine@s.
     2310
     2311\subsection{User Threads without Preemption (Fiber)}
     2312\label{s:fibers}
     2313
     2314A variant of user thread is \newterm{fibers}, which removes preemption, \eg Go~\cite{Go} @goroutine@s.
    25422315Like functional programming, which removes mutation and its associated problems, removing preemption from concurrency reduces nondeterminism, making race and deadlock errors more difficult to generate.
    2543 However, preemption is necessary for concurrency that relies on spinning, so there are a class of problems that cannot be programmed with fibres.
     2316However, preemption is necessary for concurrency that relies on spinning, so there are a class of problems that cannot be programmed without preemption.
    25442317
    25452318
    25462319\subsection{Thread Pools}
    25472320
    2548 In contrast to direct threading is indirect \newterm{thread pools}, \eg Java @executor@, where small jobs (work units) are inserted into a work pool for execution.
     2321In contrast to direct threading is indirect \newterm{thread pools}, where small jobs (work units) are inserted into a work pool for execution.
    25492322If the jobs are dependent, \ie interact, there is an implicit/explicit dependency graph that ties them together.
    25502323While removing direct concurrency, and hence the amount of context switching, thread pools significantly limit the interaction that can occur among jobs.
     
    25632336\centering
    25642337\input{RunTimeStructure}
    2565 \caption{\CFA Runtime structure}
     2338\caption{\CFA Runtime Structure}
    25662339\label{f:RunTimeStructure}
    25672340\end{figure}
     
    25952368Preemption occurs on virtual processors rather than user threads, via operating-system interrupts.
    25962369Thus virtual processors execute user threads, where preemption frequency applies to a virtual processor, so preemption occurs randomly across the executed user threads.
    2597 Turning off preemption transforms user threads into fibres.
     2370Turning off preemption transforms user threads into fibers.
    25982371
    25992372
     
    26072380\section{Implementation}
    26082381\label{s:Implementation}
     2382
     2383Currently, \CFA has fixed-sized stacks, where the stack size can be set at coroutine/thread creation but with no subsequent growth.
     2384Schemes exist for dynamic stack-growth, such as stack copying and chained stacks.
     2385However, stack copying requires pointer adjustment to items on the stack, which is impossible without some form of garbage collection.
     2386As well, chained stacks require all modules be recompiled to use this feature, which breaks backward compatibility with existing C libraries.
     2387In the long term, it is likely C libraries will migrate to stack chaining to support concurrency, at only a minimal cost to sequential programs.
     2388Nevertheless, experience teaching \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue in most concurrent programs.
    26092389
    26102390A primary implementation challenge is avoiding contention from dynamically allocating memory because of bulk acquire, \eg the internal-scheduling design is (almost) free of allocations.
     
    26172397This array persists for the entire duration of the mutual exclusion and is used extensively for synchronization operations.
    26182398
    2619 To improve performance and simplicity, context switching occurs inside a function call, so only callee-saved registers are copied onto the stack and then the stack register is switched;
     2399To improve performance and simplicity, context switching occurs inside a routine call, so only callee-saved registers are copied onto the stack and then the stack register is switched;
    26202400the corresponding registers are then restored for the other context.
    2621 Note, the instruction pointer is untouched since the context switch is always inside the same function.
    2622 Experimental results (not presented) for a stackless or stackful scheduler (1 versus 2 context switches) (see Section~\ref{s:Concurrency}) show the performance is virtually equivalent, because both approaches are dominated by locking to prevent a race condition.
     2401Note, the instruction pointer is untouched since the context switch is always inside the same routine.
     2402Unlike coroutines, threads do not context switch among each other;
     2403they context switch to the cluster scheduler.
     2404This method is a 2-step context-switch and provides a clear distinction between user and kernel code, where scheduling and other system operations happen.
     2405The alternative 1-step context-switch uses the \emph{from} thread's stack to schedule and then context-switches directly to the \emph{to} thread's stack.
     2406Experimental results (not presented) show the performance of these two approaches is virtually equivalent, because both approaches are dominated by locking to prevent a race condition.
    26232407
    26242408All kernel threads (@pthreads@) created a stack.
     
    26372421
    26382422However, on current UNIX systems:
    2639 \begin{cquote}
     2423\begin{quote}
    26402424A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked.
    26412425If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
    26422426SIGNAL(7) - Linux Programmer's Manual
    2643 \end{cquote}
     2427\end{quote}
    26442428Hence, the timer-expiry signal, which is generated \emph{externally} by the UNIX kernel to the UNIX process, is delivered to any of its UNIX subprocesses (kernel threads).
    26452429To ensure each virtual processor receives its own preemption signals, a discrete-event simulation is run on a special virtual processor, and only it sets and receives timer events.
     
    26522436\label{results}
    26532437
    2654 To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with Java OpenJDK-9, Go 1.9.2 and \uC 7.0.0.
    2655 The benchmark computer is an AMD Opteron\texttrademark\ 6380 NUMA 64-core, 8 socket, 2.5 GHz processor, running Ubuntu 16.04.3 LTS and \uC and \CFA are compiled with gcc 6.3.
    2656 
    2657 \begin{comment}
     2438To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with other widely used programming languages with concurrency.
     2439Table~\ref{t:machine} shows the specifications of the computer used to run the benchmarks, and the versions of the software used in the comparison.
     2440
    26582441\begin{table}
    26592442\centering
     
    26862469\end{tabular}
    26872470\end{table}
    2688 \end{comment}
    2689 
    2690 All benchmarks are run using the following harness.
    2691 \newpage
     2471
     2472All benchmarks are run using the following harness:
    26922473\begin{cfa}
    26932474unsigned int N = 10_000_000;
    2694 #define BENCH( `run` ) Time before = getTimeNsec();  `run;` Duration result = (getTimeNsec() - before) / N;
     2475#define BENCH( run ) Time before = getTimeNsec(); run; Duration result = (getTimeNsec() - before) / N;
    26952476\end{cfa}
    26962477The method used to get time is @clock_gettime( CLOCK_REALTIME )@.
     
    27022483\paragraph{Context-Switching}
    27032484
    2704 In procedural programming, the cost of a function call is important as modularization (refactoring) increases.
    2705 (In many cases, a compiler inlines function calls to eliminate this cost.)
     2485In procedural programming, the cost of a routine call is important as modularization (refactoring) increases.
     2486(In many cases, a compiler inlines routine calls to eliminate this cost.)
    27062487Similarly, when modularization extends to coroutines/tasks, the time for a context switch becomes a relevant factor.
    2707 The coroutine test is from resumer to suspender and from suspender to resumer, which is two context switches.
    2708 The thread test is using yield to enter and return from the runtime kernel, which is two context switches.
     2488The coroutine context-switch is 2-step using resume/suspend, \ie from resumer to suspender and from suspender to resumer.
     2489The thread context switch is 2-step using yield, \ie enter and return from the runtime kernel.
     2490Figure~\ref{f:ctx-switch} shows the code for coroutines/threads with all results in Table~\ref{tab:ctx-switch}.
    27092491The difference in performance between coroutine and thread context-switch is the cost of scheduling for threads, whereas coroutines are self-scheduling.
    2710 Figure~\ref{f:ctx-switch} only shows the \CFA code for coroutines/threads (other systems are similar) with all results in Table~\ref{tab:ctx-switch}.
    2711 
    2712 \begin{multicols}{2}
     2492
     2493\begin{figure}
    27132494\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2495
     2496\newbox\myboxA
     2497\begin{lrbox}{\myboxA}
    27142498\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    27152499coroutine C {} c;
    27162500void main( C & ) { for ( ;; ) { @suspend();@ } }
    2717 int main() { // coroutine test
    2718         BENCH( for ( N ) { @resume( c );@ } )
     2501int main() {
     2502        BENCH(
     2503                for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ } )
    27192504        sout | result`ns;
    27202505}
    2721 int main() { // task test
    2722         BENCH( for ( N ) { @yield();@ } )
     2506\end{cfa}
     2507\end{lrbox}
     2508
     2509\newbox\myboxB
     2510\begin{lrbox}{\myboxB}
     2511\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2512
     2513
     2514int main() {
     2515        BENCH(
     2516                for ( size_t i = 0; i < N; i += 1 ) { @yield();@ } )
    27232517        sout | result`ns;
    27242518}
    27252519\end{cfa}
     2520\end{lrbox}
     2521
     2522\subfloat[Coroutine]{\usebox\myboxA}
     2523\quad
     2524\subfloat[Thread]{\usebox\myboxB}
    27262525\captionof{figure}{\CFA context-switch benchmark}
    27272526\label{f:ctx-switch}
    27282527
    2729 \columnbreak
    2730 
    2731 \vspace*{-16pt}
     2528\centering
     2529
    27322530\captionof{table}{Context switch comparison (nanoseconds)}
    27332531\label{tab:ctx-switch}
    2734 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    2735 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2736 Kernel Thread   & 333.5 & 332.96        & 4.1   \\
    2737 \CFA Coroutine  & 49    & 48.68         & 0.47  \\
    2738 \CFA Thread             & 105   & 105.57        & 1.37  \\
    2739 \uC Coroutine   & 44    & 44            & 0             \\
    2740 \uC Thread              & 100   & 99.29         & 0.96  \\
    2741 Goroutine               & 145   & 147.25        & 4.15  \\
    2742 Java Thread             & 373.5 & 375.14        & 8.72
     2532\bigskip
     2533\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
     2534\cline{2-4}
     2535\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
     2536\hline
     2537Kernel Thread   & 333.5 & 332.96        & 4.1 \\
     2538\CFA Coroutine  & 49            & 48.68 & 0.47    \\
     2539\CFA Thread             & 105           & 105.57        & 1.37 \\
     2540\uC Coroutine   & 44            & 44            & 0 \\
     2541\uC Thread              & 100           & 99.29 & 0.96 \\
     2542Goroutine               & 145           & 147.25        & 4.15 \\
     2543Java Thread             & 373.5 & 375.14        & 8.72 \\
     2544\hline
    27432545\end{tabular}
    2744 \end{multicols}
    2745 
    2746 
    2747 \paragraph{Mutual-Exclusion}
    2748 
    2749 Mutual exclusion is measured by entering/leaving a critical section.
    2750 For monitors, entering and leaving a monitor function is measured.
    2751 To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
    2752 Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
    2753 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    2754 
    2755 \begin{multicols}{2}
     2546
     2547\bigskip
     2548\bigskip
     2549
    27562550\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    27572551\begin{cfa}
    2758 monitor M {} m1/*, m2, m3, m4*/;
    2759 void __attribute__((noinline))
    2760 do_call( M & mutex m/*, m2, m3, m4*/ ) {}
     2552monitor M { ... } m1/*, m2, m3, m4*/;
     2553void __attribute__((noinline)) do_call( M & mutex m/*, m2, m3, m4*/ ) {}
    27612554int main() {
    2762         BENCH(
    2763                 for( N ) @do_call( m1/*, m2, m3, m4*/ );@
    2764         )
     2555        BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ } )
    27652556        sout | result`ns;
    27662557}
     
    27692560\label{f:mutex}
    27702561
    2771 \columnbreak
    2772 
    2773 \vspace*{-16pt}
     2562\centering
     2563
    27742564\captionof{table}{Mutex comparison (nanoseconds)}
    27752565\label{tab:mutex}
    2776 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    2777 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2778 C function                                              & 2                     & 2             & 0             \\
    2779 FetchAdd + FetchSub                             & 26            & 26    & 0             \\
    2780 Pthreads Mutex Lock                             & 31            & 31.71 & 0.97  \\
    2781 \uC @monitor@ member rtn.               & 31            & 31    & 0             \\
    2782 \CFA @mutex@ function, 1 arg.   & 46            & 46.68 & 0.93  \\
    2783 \CFA @mutex@ function, 2 arg.   & 84            & 85.36 & 1.99  \\
    2784 \CFA @mutex@ function, 4 arg.   & 158           & 161   & 4.22  \\
    2785 Java synchronized function              & 27.5          & 29.79 & 2.93
     2566\bigskip
     2567
     2568\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
     2569\cline{2-4}
     2570\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
     2571\hline
     2572C routine                                       & 2             & 2             & 0    \\
     2573FetchAdd + FetchSub                     & 26            & 26            & 0    \\
     2574Pthreads Mutex Lock                     & 31            & 31.71 & 0.97 \\
     2575\uC @monitor@ member routine            & 31            & 31            & 0    \\
     2576\CFA @mutex@ routine, 1 argument        & 46            & 46.68 & 0.93  \\
     2577\CFA @mutex@ routine, 2 argument        & 84            & 85.36 & 1.99 \\
     2578\CFA @mutex@ routine, 4 argument        & 158           & 161           & 4.22 \\
     2579Java synchronized routine               & 27.5  & 29.79 & 2.93  \\
     2580\hline
    27862581\end{tabular}
    2787 \end{multicols}
     2582\end{figure}
     2583
     2584
     2585\paragraph{Mutual-Exclusion}
     2586
     2587Mutual exclusion is measured by entering/leaving a critical section.
     2588For monitors, entering and leaving a monitor routine is measured.
     2589Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
     2590To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
     2591Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    27882592
    27892593
     
    27952599Java scheduling is significantly greater because the benchmark explicitly creates multiple thread in order to prevent the JIT from making the program sequential, \ie removing all locking.
    27962600
    2797 \begin{multicols}{2}
     2601\begin{figure}
    27982602\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    27992603\begin{cfa}
    28002604volatile int go = 0;
    2801 monitor M { condition c; } m;
    2802 void __attribute__((noinline))
    2803 do_call( M & mutex a1 ) { signal( c ); }
     2605condition c;
     2606monitor M { ... } m;
     2607void __attribute__((noinline)) do_call( M & mutex a1 ) { signal( c ); }
    28042608thread T {};
    28052609void main( T & this ) {
    2806         while ( go == 0 ) { yield(); }
     2610        while ( go == 0 ) { yield(); }  // wait for other thread to start
    28072611        while ( go == 1 ) { @do_call( m );@ }
    28082612}
    2809 int  __attribute__((noinline))
    2810 do_wait( M & mutex m ) with(m) {
     2613int  __attribute__((noinline)) do_wait( M & mutex m ) {
    28112614        go = 1; // continue other thread
    2812         BENCH( for ( N ) { @wait( c );@ } );
     2615        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ } );
    28132616        go = 0; // stop other thread
    28142617        sout | result`ns;
     
    28222625\label{f:int-sched}
    28232626
    2824 \columnbreak
    2825 
    2826 \vspace*{-16pt}
     2627\centering
    28272628\captionof{table}{Internal-scheduling comparison (nanoseconds)}
    28282629\label{tab:int-sched}
    28292630\bigskip
    28302631
    2831 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
    2832 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2833 Pthreads Cond. Variable         & 6005          & 5681.43       & 835.45        \\
    2834 \uC @signal@                            & 324           & 325.54        & 3,02          \\
    2835 \CFA @signal@, 1 @monitor@      & 368.5         & 370.61        & 4.77          \\
    2836 \CFA @signal@, 2 @monitor@      & 467           & 470.5         & 6.79          \\
    2837 \CFA @signal@, 4 @monitor@      & 700.5         & 702.46        & 7.23          \\
    2838 Java @notify@                           & 15471         & 172511        & 5689
     2632\begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
     2633\cline{2-4}
     2634\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
     2635\hline
     2636Pthreads Condition Variable             & 6005  & 5681.43       & 835.45 \\
     2637\uC @signal@                                    & 324           & 325.54        & 3,02   \\
     2638\CFA @signal@, 1 @monitor@              & 368.5         & 370.61        & 4.77   \\
     2639\CFA @signal@, 2 @monitor@              & 467           & 470.5 & 6.79   \\
     2640\CFA @signal@, 4 @monitor@              & 700.5         & 702.46        & 7.23  \\
     2641Java @notify@                                   & 15471 & 172511        & 5689 \\
     2642\hline
    28392643\end{tabular}
    2840 \end{multicols}
     2644\end{figure}
    28412645
    28422646
     
    28472651Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    28482652
    2849 \begin{multicols}{2}
     2653\begin{figure}
    28502654\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    2851 \vspace*{-16pt}
    28522655\begin{cfa}
    28532656volatile int go = 0;
    2854 monitor M {} m;
     2657monitor M { ... } m;
    28552658thread T {};
    2856 void __attribute__((noinline))
    2857 do_call( M & mutex ) {}
     2659void __attribute__((noinline)) do_call( M & mutex ) {}
    28582660void main( T & ) {
    2859         while ( go == 0 ) { yield(); }
     2661        while ( go == 0 ) { yield(); }  // wait for other thread to start
    28602662        while ( go == 1 ) { @do_call( m );@ }
    28612663}
    2862 int __attribute__((noinline))
    2863 do_wait( M & mutex m ) {
     2664int __attribute__((noinline)) do_wait( M & mutex m ) {
    28642665        go = 1; // continue other thread
    2865         BENCH( for ( N ) { @waitfor( do_call, m );@ } )
     2666        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ } )
    28662667        go = 0; // stop other thread
    28672668        sout | result`ns;
     
    28752676\label{f:ext-sched}
    28762677
    2877 \columnbreak
    2878 
    2879 \vspace*{-16pt}
     2678\centering
     2679
    28802680\captionof{table}{External-scheduling comparison (nanoseconds)}
    28812681\label{tab:ext-sched}
    2882 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    2883 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2884 \uC @_Accept@                           & 358           & 359.11        & 2.53          \\
    2885 \CFA @waitfor@, 1 @monitor@     & 359           & 360.93        & 4.07          \\
    2886 \CFA @waitfor@, 2 @monitor@     & 450           & 449.39        & 6.62          \\
    2887 \CFA @waitfor@, 4 @monitor@     & 652           & 655.64        & 7.73
     2682\bigskip
     2683\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
     2684\cline{2-4}
     2685\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
     2686\hline
     2687\uC @_Accept@                           & 358           & 359.11        & 2.53  \\
     2688\CFA @waitfor@, 1 @monitor@     & 359           & 360.93        & 4.07  \\
     2689\CFA @waitfor@, 2 @monitor@     & 450           & 449.39        & 6.62  \\
     2690\CFA @waitfor@, 4 @monitor@     & 652           & 655.64        & 7.73 \\
     2691\hline
    28882692\end{tabular}
    2889 \end{multicols}
    2890 
     2693
     2694\bigskip
     2695\medskip
     2696
     2697\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2698\begin{cfa}
     2699thread MyThread {};
     2700void main( MyThread & ) {}
     2701int main() {
     2702        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ } )
     2703        sout | result`ns;
     2704}
     2705\end{cfa}
     2706\captionof{figure}{\CFA object-creation benchmark}
     2707\label{f:creation}
     2708
     2709\centering
     2710
     2711\captionof{table}{Creation comparison (nanoseconds)}
     2712\label{tab:creation}
     2713\bigskip
     2714
     2715\begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
     2716\cline{2-4}
     2717\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} & \multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
     2718\hline
     2719Pthreads                                & 28091         & 28073.39      & 163.1  \\
     2720\CFA Coroutine Lazy             & 6                     & 6.07          & 0.26   \\
     2721\CFA Coroutine Eager    & 520           & 520.61        & 2.04   \\
     2722\CFA Thread                             & 2032  & 2016.29       & 112.07  \\
     2723\uC Coroutine                   & 106           & 107.36        & 1.47   \\
     2724\uC Thread                              & 536.5 & 537.07        & 4.64   \\
     2725Goroutine                               & 3103  & 3086.29       & 90.25  \\
     2726Java Thread                             & 103416.5      & 103732.29     & 1137 \\
     2727\hline
     2728\end{tabular}
     2729\end{figure}
    28912730
    28922731
     
    28972736The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
    28982737
    2899 \begin{multicols}{2}
    2900 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    2901 \begin{cfa}
    2902 thread MyThread {};
    2903 void main( MyThread & ) {}
    2904 int main() {
    2905         BENCH( for ( N ) { @MyThread m;@ } )
    2906         sout | result`ns;
    2907 }
    2908 \end{cfa}
    2909 \captionof{figure}{\CFA object-creation benchmark}
    2910 \label{f:creation}
    2911 
    2912 \columnbreak
    2913 
    2914 \vspace*{-16pt}
    2915 \captionof{table}{Object creation comparison (nanoseconds)}
    2916 \label{tab:creation}
    2917 
    2918 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
    2919 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2920 Pthreads                                & 28091         & 28073.39      & 163.1         \\
    2921 \CFA Coroutine Lazy             & 6                     & 6.07          & 0.26          \\
    2922 \CFA Coroutine Eager    & 520           & 520.61        & 2.04          \\
    2923 \CFA Thread                             & 2032          & 2016.29       & 112.07        \\
    2924 \uC Coroutine                   & 106           & 107.36        & 1.47          \\
    2925 \uC Thread                              & 536.5         & 537.07        & 4.64          \\
    2926 Goroutine                               & 3103          & 3086.29       & 90.25         \\
    2927 Java Thread                             & 103416.5      & 103732.29     & 1137          \\
    2928 \end{tabular}
    2929 \end{multicols}
    2930 
    29312738
    29322739\section{Conclusion}
    29332740
    2934 Advanced control-flow will always be difficult, especially when there is temporal ordering and nondeterminism.
    2935 However, many systems exacerbate the difficulty through their presentation mechanisms.
    2936 This paper shows it is possible to present a hierarchy of control-flow features, generator, coroutine, thread, and monitor, providing an integrated set of high-level, efficient, and maintainable control-flow features.
    2937 Eliminated from \CFA are spurious wakeup and barging, which are nonintuitive and lead to errors, and having to work with a bewildering set of low-level locks and acquisition techniques.
    2938 \CFA high-level race-free monitors and tasks provide the core mechanisms for mutual exclusion and synchronization, without having to resort to magic qualifiers like @volatile@/@atomic@.
    2939 Extending these mechanisms to handle high-level deadlock-free bulk acquire across both mutual exclusion and synchronization is a unique contribution.
    2940 The \CFA runtime provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism.
     2741This paper demonstrates a concurrency API that is simple, efficient, and able to build higher-level concurrency features.
     2742The approach provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism.
    29412743The M:N model is judged to be efficient and provide greater flexibility than a 1:1 threading model.
    2942 These concepts and the entire \CFA runtime-system are written in the \CFA language, extensively leveraging the \CFA type-system, which demonstrates the expressiveness of the \CFA language.
     2744High-level objects (monitor/task) are the core mechanism for mutual exclusion and synchronization.
     2745A novel aspect is allowing multiple mutex-objects to be accessed simultaneously reducing the potential for deadlock for this complex scenario.
     2746These concepts and the entire \CFA runtime-system are written in the \CFA language, demonstrating the expressiveness of the \CFA language.
    29432747Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all low-level operations, which translates directly into good performance in well-written concurrent applications.
    2944 C programmers should feel comfortable using these mechanisms for developing complex control-flow in applications, with the ability to obtain maximum available performance by selecting mechanisms at the appropriate level of need.
     2748C programmers should feel comfortable using these mechanisms for developing concurrent applications, with the ability to obtain maximum available performance by mechanisms at the appropriate level.
    29452749
    29462750
    29472751\section{Future Work}
    29482752
    2949 While control flow in \CFA has a strong start, development is still underway to complete a number of missing features.
     2753While concurrency in \CFA has a strong start, development is still underway and there are missing features.
    29502754
    29512755\paragraph{Flexible Scheduling}
     
    29552759Different scheduling algorithms can affect performance (both in terms of average and variation).
    29562760However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs.
    2957 One solution is to offer various tuning options, allowing the scheduler to be adjusted to the requirements of the workload.
     2761One solution is to offer various tweaking options, allowing the scheduler to be adjusted to the requirements of the workload.
    29582762However, to be truly flexible, a pluggable scheduler is necessary.
    29592763Currently, the \CFA pluggable scheduler is too simple to handle complex scheduling, \eg quality of service and real-time, where the scheduler must interact with mutex objects to deal with issues like priority inversion.
     
    29752779While monitors offer flexible and powerful concurrency for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package.
    29762780Examples of such tools can include futures and promises~\cite{promises}, executors and actors.
    2977 These additional features are useful for applications that can be constructed without shared data and direct blocking.
     2781These additional features are useful when monitors offer a level of abstraction that is inadequate for certain tasks.
    29782782As well, new \CFA extensions should make it possible to create a uniform interface for virtually all mutual exclusion, including monitors and low-level locks.
    29792783
  • doc/papers/concurrency/examples/PingPong.c

    raba20d2 r8b34df0  
    33typedef struct PingPong {
    44        const char * name;
     5        struct PingPong * partner;
    56        int N, i;
    6         struct PingPong * partner;
    77        void * next;
    88} PingPong;
    9 #define PPCtor( name, N ) { name, N, 0, NULL, NULL }
     9#define PPCtor( name, N ) { name, NULL, N, 0, NULL }
    1010void comain( PingPong * pp ) __attribute__(( noinline ));
    1111void comain( PingPong * pp ) {
  • doc/papers/concurrency/examples/Pingpong.cfa

    raba20d2 r8b34df0  
    88};
    99
    10 // void ?{}( PingPong & this, const char * name, unsigned int N, PingPong & part ) {
    11 //      this.[name, N] = [name, N];  &this.part = &part;
    12 // }
    13 // void ?{}( PingPong & this, const char * name, unsigned int N ) {
    14 //      this{ name, N, *0p };                                                           // call first constructor
    15 // }
     10void ?{}( PingPong & this, const char * name, unsigned int N, PingPong & part ) {
     11        this.[name, N] = [name, N];  &this.part = &part;
     12}
     13void ?{}( PingPong & this, const char * name, unsigned int N ) {
     14        this{ name, N, *0p };                                                           // call first constructor
     15}
    1616void cycle( PingPong & pingpong ) {
    1717        resume( pingpong );
  • doc/papers/concurrency/figures/ext_monitor.fig

    raba20d2 r8b34df0  
    88-2
    991200 2
    10 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 3600.000 1500 3300 1200 3600 1500 3900
    11 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 4500.000 1500 4200 1200 4500 1500 4800
    12 6 4200 2100 4500 2400
    13 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2250 105 105 4350 2250 4455 2355
    14 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 2310 d\001
     105 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1575.000 3450.000 1575 3150 1275 3450 1575 3750
     115 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1575.000 4350.000 1575 4050 1275 4350 1575 4650
     126 4275 1950 4575 2250
     131 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2100 105 105 4425 2100 4530 2205
     144 1 -1 0 0 0 10 0.0000 2 105 90 4425 2160 d\001
    1515-6
    16 6 4200 1800 4500 2100
    17 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1950 105 105 4350 1950 4455 2055
    18 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 2010 b\001
     166 4275 1650 4575 1950
     171 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 1800 105 105 4425 1800 4530 1905
     184 1 -1 0 0 0 10 0.0000 2 105 90 4425 1860 b\001
    1919-6
    20 6 1420 5595 5625 5805
    21 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 5700 80 80 1500 5700 1580 5780
    22 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 5700 105 105 2850 5700 2955 5805
    23 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 5700 105 105 4350 5700 4455 5805
    24 4 0 -1 0 0 0 12 0.0000 2 135 1035 3075 5775 blocked task\001
    25 4 0 -1 0 0 0 12 0.0000 2 135 870 1650 5775 active task\001
    26 4 0 -1 0 0 0 12 0.0000 2 135 1050 4575 5775 routine mask\001
     206 1495 5445 5700 5655
     211 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1575 5550 80 80 1575 5550 1655 5630
     221 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2925 5550 105 105 2925 5550 3030 5655
     231 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4425 5550 105 105 4425 5550 4530 5655
     244 0 -1 0 0 0 12 0.0000 2 135 1035 3150 5625 blocked task\001
     254 0 -1 0 0 0 12 0.0000 2 135 870 1725 5625 active task\001
     264 0 -1 0 0 0 12 0.0000 2 135 1050 4650 5625 routine mask\001
    2727-6
    28 6 3450 1950 3750 2550
    29 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3600 2100 105 105 3600 2100 3705 2100
     286 3525 1800 3825 2400
     291 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3675 1950 105 105 3675 1950 3780 1950
    30302 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    31          3450 1950 3750 1950 3750 2550 3450 2550 3450 1950
    32 4 1 4 0 0 0 10 0.0000 2 105 120 3600 2160 Y\001
     31         3525 1800 3825 1800 3825 2400 3525 2400 3525 1800
     324 1 4 0 0 0 10 0.0000 2 105 120 3675 2010 Y\001
    3333-6
    34 6 3450 2250 3750 2550
    35 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3600 2400 105 105 3600 2400 3705 2400
    36 4 1 4 0 0 0 10 0.0000 2 105 120 3600 2445 X\001
     346 3525 2100 3825 2400
     351 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3675 2250 105 105 3675 2250 3780 2250
     364 1 4 0 0 0 10 0.0000 2 105 120 3675 2295 X\001
    3737-6
    38 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 3750 105 105 1650 3750 1755 3855
    39 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 3750 105 105 1950 3750 2055 3855
    40 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 4050 105 105 4950 4050 5055 4155
    41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 4050 105 105 5250 4050 5355 4155
    42 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2850 105 105 4350 2850 4455 2955
    43 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2550 105 105 4350 2550 4455 2655
    44 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 4725 80 80 3450 4725 3530 4805
     381 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1725 3600 105 105 1725 3600 1830 3705
     391 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2025 3600 105 105 2025 3600 2130 3705
     401 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5025 3900 105 105 5025 3900 5130 4005
     411 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5325 3900 105 105 5325 3900 5430 4005
     421 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2700 105 105 4425 2700 4530 2805
     431 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2400 105 105 4425 2400 4530 2505
     441 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3525 4575 80 80 3525 4575 3605 4655
    45452 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    46          2400 3075 3825 3075 3825 3375 2400 3375 2400 3075
     46         2475 2925 3900 2925 3900 3225 2475 3225 2475 2925
    47472 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    48          1500 3900 2100 3900 2100 4200 1500 4200
     48         1575 3750 2175 3750 2175 4050 1575 4050
    49492 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
    50          1500 3600 2100 3600 2250 3825
     50         1575 3450 2175 3450 2325 3675
    51512 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    52          2100 3300 1950 3525
     52         2175 3150 2025 3375
    53532 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
    54          1500 4500 2100 4500 2250 4725
     54         1575 4350 2175 4350 2325 4575
    55552 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    56          2100 4200 1950 4425
     56         2175 4050 2025 4275
    57572 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    58          1500 4800 2100 4800 2100 5100 3300 5100
     58         1575 4650 2175 4650 2175 4950 3375 4950
    59592 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    60          4800 3900 4650 4125
     60         4875 3750 4725 3975
    61612 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    62          3300 5100 3525 5250
     62         3375 4950 3600 5100
    63632 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
    64          3600 5100 4800 5100 4800 4200 5400 4200 5400 3900 4800 3900
    65          4800 3000 4500 3000 4500 1800
     64         3675 4950 4875 4950 4875 4050 5475 4050 5475 3750 4875 3750
     65         4875 2850 4575 2850 4575 1650
    66662 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
    67          4200 4350 4200 3450 2700 3450 2700 4350 4200 4350
     67         4275 4200 4275 3300 2775 3300 2775 4200 4275 4200
    68682 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    6969        1 1 1.00 60.00 120.00
    70          3600 3225 3600 2550
     70         3675 3075 3675 2400
    71712 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    72          4050 3000 4500 3150
     72         4125 2850 4575 3000
    73732 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    74          1500 3300 2100 3300 2100 3000 4050 3000 4050 1800
    75 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2895 a\001
    76 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2595 c\001
    77 4 1 -1 0 0 0 12 0.0000 2 135 315 3450 5475 exit\001
    78 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 3225 A\001
    79 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 5025 condition\001
    80 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 5250 B\001
    81 4 0 -1 0 0 0 12 0.0000 2 135 420 4950 3825 stack\001
    82 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3375 acceptor/\001
    83 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3600 signalled\001
    84 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 3000 condition\001
    85 4 0 -1 0 0 0 12 0.0000 2 135 525 4650 2550 arrival\001
    86 4 0 -1 0 0 0 12 0.0000 2 135 630 4650 2325 order of\001
    87 4 1 -1 0 0 0 12 0.0000 2 135 525 3450 3825 shared\001
    88 4 1 -1 0 0 0 12 0.0000 2 135 735 3450 4125 variables\001
    89 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2025 X\001
    90 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2325 Y\001
    91 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2625 Y\001
    92 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2925 X\001
    93 4 0 -1 0 0 3 12 0.0000 2 150 540 4950 4425 urgent\001
    94 4 1 0 50 -1 0 11 0.0000 2 165 600 3075 3300 accepted\001
    95 4 1 -1 0 0 0 12 0.0000 2 165 960 4275 1725 entry queue\001
     74         1575 3150 2175 3150 2175 2850 4125 2850 4125 1650
     754 1 -1 0 0 0 10 0.0000 2 75 75 4425 2745 a\001
     764 1 -1 0 0 0 10 0.0000 2 75 75 4425 2445 c\001
     774 1 -1 0 0 0 12 0.0000 2 135 315 3525 5325 exit\001
     784 1 -1 0 0 0 12 0.0000 2 135 135 1725 3075 A\001
     794 1 -1 0 0 0 12 0.0000 2 135 795 1725 4875 condition\001
     804 1 -1 0 0 0 12 0.0000 2 135 135 1725 5100 B\001
     814 0 -1 0 0 0 12 0.0000 2 135 420 5025 3675 stack\001
     824 0 -1 0 0 0 12 0.0000 2 180 750 5025 3225 acceptor/\001
     834 0 -1 0 0 0 12 0.0000 2 180 750 5025 3450 signalled\001
     844 1 -1 0 0 0 12 0.0000 2 135 795 1725 2850 condition\001
     854 1 -1 0 0 0 12 0.0000 2 165 420 4425 1350 entry\001
     864 1 -1 0 0 0 12 0.0000 2 135 495 4425 1575 queue\001
     874 0 -1 0 0 0 12 0.0000 2 135 525 4725 2400 arrival\001
     884 0 -1 0 0 0 12 0.0000 2 135 630 4725 2175 order of\001
     894 1 -1 0 0 0 12 0.0000 2 135 525 3525 3675 shared\001
     904 1 -1 0 0 0 12 0.0000 2 135 735 3525 3975 variables\001
     914 0 4 50 -1 0 11 0.0000 2 120 135 4150 1875 X\001
     924 0 4 50 -1 0 11 0.0000 2 120 135 4150 2175 Y\001
     934 0 4 50 -1 0 11 0.0000 2 120 135 4150 2475 Y\001
     944 0 4 50 -1 0 11 0.0000 2 120 135 4150 2775 X\001
     954 0 -1 0 0 3 12 0.0000 2 150 540 5025 4275 urgent\001
     964 1 0 50 -1 0 11 0.0000 2 165 600 3150 3150 accepted\001
  • doc/papers/concurrency/figures/monitor.fig

    raba20d2 r8b34df0  
    88-2
    991200 2
     105 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 2700.000 1500 2400 1200 2700 1500 3000
    10115 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 3600.000 1500 3300 1200 3600 1500 3900
    11 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1500.000 4500.000 1500 4200 1200 4500 1500 4800
    12 6 2400 2400 2700 2700
    13 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 2550 105 105 2550 2550 2655 2550
    14 4 1 -1 0 0 0 10 0.0000 2 105 90 2550 2610 b\001
     126 4200 1200 4500 1500
     131 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1350 105 105 4350 1350 4455 1455
     144 1 -1 0 0 0 10 0.0000 2 105 90 4350 1410 d\001
    1515-6
    16 6 2400 2700 2700 3000
    17 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 2850 105 105 2550 2850 2655 2850
    18 4 1 -1 0 0 0 10 0.0000 2 75 75 2550 2895 a\001
     166 4200 900 4500 1200
     171 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1050 105 105 4350 1050 4455 1155
     184 1 -1 0 0 0 10 0.0000 2 105 90 4350 1110 b\001
    1919-6
    20 6 3300 2400 3600 2700
    21 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 2550 105 105 3450 2550 3555 2550
    22 4 1 -1 0 0 0 10 0.0000 2 105 90 3450 2610 d\001
     206 2400 1500 2700 1800
     211 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1650 105 105 2550 1650 2655 1650
     224 1 -1 0 0 0 10 0.0000 2 105 90 2550 1710 b\001
    2323-6
    24 6 1350 5550 5325 5850
    25 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 5700 80 80 1500 5700 1580 5780
    26 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 5700 105 105 2850 5700 2955 5805
    27 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 5700 105 105 4350 5700 4455 5805
    28 4 0 -1 0 0 0 12 0.0000 2 180 765 4575 5775 duplicate\001
    29 4 0 -1 0 0 0 12 0.0000 2 135 1035 3075 5775 blocked task\001
    30 4 0 -1 0 0 0 12 0.0000 2 135 870 1650 5775 active task\001
     246 2400 1800 2700 2100
     251 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 2550 1950 105 105 2550 1950 2655 1950
     264 1 -1 0 0 0 10 0.0000 2 75 75 2550 1995 a\001
    3127-6
    32 6 4200 2100 4500 2400
    33 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2250 105 105 4350 2250 4455 2355
    34 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 2310 d\001
     286 3300 1500 3600 1800
     291 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1650 105 105 3450 1650 3555 1650
     304 1 -1 0 0 0 10 0.0000 2 105 90 3450 1710 d\001
    3531-6
    36 6 4200 1800 4500 2100
     326 1350 4650 5325 4950
     331 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1500 4800 80 80 1500 4800 1580 4880
     341 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2850 4800 105 105 2850 4800 2955 4905
     351 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4350 4800 105 105 4350 4800 4455 4905
     364 0 -1 0 0 0 12 0.0000 2 180 765 4575 4875 duplicate\001
     374 0 -1 0 0 0 12 0.0000 2 135 1035 3075 4875 blocked task\001
     384 0 -1 0 0 0 12 0.0000 2 135 870 1650 4875 active task\001
     39-6
     401 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 2850 105 105 1650 2850 1755 2955
     411 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 2850 105 105 1950 2850 2055 2955
     421 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 3150 105 105 4950 3150 5055 3255
     431 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 3150 105 105 5250 3150 5355 3255
    37441 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1950 105 105 4350 1950 4455 2055
    38 4 1 -1 0 0 0 10 0.0000 2 105 90 4350 2010 b\001
    39 -6
    40 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1650 3750 105 105 1650 3750 1755 3855
    41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1950 3750 105 105 1950 3750 2055 3855
    42 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4950 4050 105 105 4950 4050 5055 4155
    43 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 4050 105 105 5250 4050 5355 4155
    44 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 4725 80 80 3450 4725 3530 4805
    45 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 2850 105 105 3450 2850 3555 2850
    46 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2850 105 105 4350 2850 4455 2955
    47 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 2550 105 105 4350 2550 4455 2655
     451 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4350 1650 105 105 4350 1650 4455 1755
     461 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3450 3825 80 80 3450 3825 3530 3905
     471 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3450 1950 105 105 3450 1950 3555 1950
    48482 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    49          2400 3000 2625 3150
     49         2400 2100 2625 2250
    50502 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    51          3300 3000 3525 3150
     51         3300 2100 3525 2250
     522 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
     53         4200 2100 4425 2250
    52542 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 5
    53          1500 3300 2100 3300 2100 3000 2400 3000 2400 2400
     55         1500 2400 2100 2400 2100 2100 2400 2100 2400 1500
    54562 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    55          1500 3900 2100 3900 2100 4200 1500 4200
     57         1500 3000 2100 3000 2100 3300 1500 3300
     582 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
     59         1500 2700 2100 2700 2250 2925
     602 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
     61         2100 2400 1950 2625
    56622 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
    5763         1500 3600 2100 3600 2250 3825
    58642 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    5965         2100 3300 1950 3525
    60 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
    61          1500 4500 2100 4500 2250 4725
     662 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
     67         1500 3900 2100 3900 2100 4200 3300 4200
    62682 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    63          2100 4200 1950 4425
     69         4800 3000 4650 3225
     702 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
     71         3300 4200 3525 4350
    64722 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    65          1500 4800 2100 4800 2100 5100 3300 5100
    66 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    67          4800 3900 4650 4125
    68 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    69          3300 5100 3525 5250
     73         3600 1500 3600 2100 4200 2100 4200 900
     742 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
     75         3600 4200 4800 4200 4800 3300 5400 3300 5400 3000 4800 3000
     76         4800 2100 4500 2100 4500 900
    70772 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
    71          4200 4350 4200 3450 2700 3450 2700 4350 4200 4350
     78         4200 3450 4200 2550 2700 2550 2700 3450 4200 3450
    72792 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    73          2700 2400 2700 3000 3300 3000 3300 2400
    74 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    75          3600 2400 3600 3000 4050 3000 4050 1800
    76 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
    77          3600 5100 4800 5100 4800 4200 5400 4200 5400 3900 4800 3900
    78          4800 3000 4500 3000 4500 1800
    79 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    80          4050 3000 4500 3150
    81 4 1 -1 0 0 0 12 0.0000 2 135 315 3450 5475 exit\001
    82 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 3225 A\001
    83 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 5025 condition\001
    84 4 1 -1 0 0 0 12 0.0000 2 135 135 1650 5250 B\001
    85 4 0 -1 0 0 0 12 0.0000 2 135 420 4950 3825 stack\001
    86 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3375 acceptor/\001
    87 4 0 -1 0 0 0 12 0.0000 2 180 750 4950 3600 signalled\001
    88 4 1 -1 0 0 0 12 0.0000 2 135 795 1650 3000 condition\001
    89 4 1 4 0 0 0 12 0.0000 2 135 135 2550 2325 X\001
    90 4 1 4 0 0 0 12 0.0000 2 135 135 3450 2325 Y\001
    91 4 1 -1 0 0 0 12 0.0000 2 135 525 3450 3825 shared\001
    92 4 1 -1 0 0 0 12 0.0000 2 135 735 3450 4125 variables\001
    93 4 1 -1 0 0 0 10 0.0000 2 75 75 3450 2895 c\001
    94 4 1 -1 0 0 0 12 0.0000 2 165 1125 3000 2100 mutex queues\001
    95 4 0 -1 0 0 3 12 0.0000 2 150 540 4950 4425 urgent\001
    96 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2895 a\001
    97 4 1 -1 0 0 0 10 0.0000 2 75 75 4350 2595 c\001
    98 4 0 -1 0 0 0 12 0.0000 2 135 525 4650 2550 arrival\001
    99 4 0 -1 0 0 0 12 0.0000 2 135 630 4650 2325 order of\001
    100 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2025 X\001
    101 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2325 Y\001
    102 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2625 Y\001
    103 4 0 4 50 -1 0 11 0.0000 2 120 135 4075 2925 X\001
    104 4 1 -1 0 0 0 12 0.0000 2 165 960 4275 1725 entry queue\001
     80         2700 1500 2700 2100 3300 2100 3300 1500
     814 1 -1 0 0 0 10 0.0000 2 75 75 4350 1995 a\001
     824 1 -1 0 0 0 10 0.0000 2 75 75 4350 1695 c\001
     834 1 -1 0 0 0 12 0.0000 2 135 315 3450 4575 exit\001
     844 1 -1 0 0 0 12 0.0000 2 135 135 1650 2325 A\001
     854 1 -1 0 0 0 12 0.0000 2 135 795 1650 4125 condition\001
     864 1 -1 0 0 0 12 0.0000 2 135 135 1650 4350 B\001
     874 0 -1 0 0 0 12 0.0000 2 135 420 4950 2925 stack\001
     884 0 -1 0 0 0 12 0.0000 2 180 750 4950 2475 acceptor/\001
     894 0 -1 0 0 0 12 0.0000 2 180 750 4950 2700 signalled\001
     904 1 -1 0 0 0 12 0.0000 2 135 795 1650 2100 condition\001
     914 1 4 0 0 0 12 0.0000 2 135 135 2550 1425 X\001
     924 1 4 0 0 0 12 0.0000 2 135 135 3450 1425 Y\001
     934 1 -1 0 0 0 12 0.0000 2 165 420 4350 600 entry\001
     944 1 -1 0 0 0 12 0.0000 2 135 495 4350 825 queue\001
     954 0 -1 0 0 0 12 0.0000 2 135 525 4650 1650 arrival\001
     964 0 -1 0 0 0 12 0.0000 2 135 630 4650 1425 order of\001
     974 1 -1 0 0 0 12 0.0000 2 135 525 3450 2925 shared\001
     984 1 -1 0 0 0 12 0.0000 2 135 735 3450 3225 variables\001
     994 1 -1 0 0 0 12 0.0000 2 120 510 3000 975 mutex\001
     1004 1 -1 0 0 0 10 0.0000 2 75 75 3450 1995 c\001
     1014 1 -1 0 0 0 12 0.0000 2 135 570 3000 1200 queues\001
     1024 0 -1 0 0 3 12 0.0000 2 150 540 4950 3525 urgent\001
  • libcfa/src/iostream.cfa

    raba20d2 r8b34df0  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jun 13 17:21:10 2019
    13 // Update Count     : 812
     12// Last Modified On : Wed Jun 12 15:00:31 2019
     13// Update Count     : 819
    1414//
    1515
     
    740740        } // ?|?
    741741
    742         // istype & ?|?( istype & is, const char * fmt ) {
    743         //      fmt( is, fmt, "" );
    744         //      return is;
    745         // } // ?|?
     742        istype & ?|?( istype & is, const char * fmt ) {
     743                fmt( is, fmt, "" );
     744                return is;
     745        } // ?|?
    746746
    747747        istype & ?|?( istype & is, char * s ) {
     
    777777        // skip xxx
    778778        if ( ! f.s ) {
    779                 // printf( "skip %s %d\n", f.scanset, f.wd );
    780                 if ( f.wd == -1 ) fmt( is, f.scanset, "" ); // no input arguments
    781                 else for ( f.wd ) fmt( is, "%*c" );
     779                //printf( "skip %s %d\n", f.scanset, f.wd );
     780                if ( f.wd != -1 ) for ( f.wd ) fmt( is, "%*c" ); // no input arguments
     781                else fmt( is, f.scanset, "" );
    782782                return is;
    783783        } // if
  • libcfa/src/iostream.hfa

    raba20d2 r8b34df0  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jun 13 17:20:21 2019
    13 // Update Count     : 325
     12// Last Modified On : Wed Jun 12 13:35:42 2019
     13// Update Count     : 331
    1414//
    1515
     
    316316        istype & ?|?( istype &, long double _Complex & );
    317317
    318 //      istype & ?|?( istype &, const char * );
     318        istype & ?|?( istype &, const char * );
    319319        istype & ?|?( istype &, char * );
    320320
     
    350350        _Istream_Cstr ignore( const char * s ) { return (_Istream_Cstr)@{ s, 0p, -1, { .flags.ignore : true } }; }
    351351        _Istream_Cstr & ignore( _Istream_Cstr & fmt ) { fmt.flags.ignore = true; return fmt; }
    352         _Istream_Cstr wdi( unsigned int w, char * s ) { return (_Istream_Cstr)@{ s, 0p, w, { .all : 0 } }; }
    353         _Istream_Cstr & wdi( unsigned int w, _Istream_Cstr & fmt ) { fmt.wd = w; return fmt; }
     352        _Istream_Cstr wd( unsigned int w, char * s ) { return (_Istream_Cstr)@{ s, 0p, w, { .all : 0 } }; }
     353        _Istream_Cstr & wd( unsigned int w, _Istream_Cstr & fmt ) { fmt.wd = w; return fmt; }
    354354} // distribution
    355355forall( dtype istype | istream( istype ) ) istype & ?|?( istype & is, _Istream_Cstr f );
     
    377377        _Istream_Manip(T) & ignore( _Istream_Manip(T) & fmt ) { fmt.ignore = true; return fmt; } \
    378378        _Istream_Manip(T) wdi( unsigned int w, T & val ) { return (_Istream_Manip(T))@{ val, w, false }; } \
    379         _Istream_Manip(T) & wdi( unsigned int w, _Istream_Manip(T) & fmt ) { fmt.wd = w; return fmt; } \
     379        _Istream_Manip(T) & wd( unsigned int w, _Istream_Manip(T) & fmt ) { fmt.wd = w; return fmt; } \
    380380} /* distribution */ \
    381381forall( dtype istype | istream( istype ) ) { \
  • src/AST/Convert.cpp

    raba20d2 r8b34df0  
    734734        const ast::Expr * visit( const ast::VariableExpr * node ) override final {
    735735                auto expr = new VariableExpr();
     736                visitBaseExpr( node, expr );
    736737                expr->var = get<DeclarationWithType>().accept1(node->var);
    737                 visitBaseExpr( node, expr );
     738                Type * type = expr->var->get_type()->clone();
     739                if(FunctionType * ft = dynamic_cast<FunctionType*>(type)) {
     740                        if(node->result.as<ast::PointerType>()) {
     741                                type = new PointerType({}, ft);
     742                        }
     743                }
     744
     745                type->set_lvalue( true );
     746                expr->result = type ;
    738747                this->node = expr;
    739748                return nullptr;
     
    741750
    742751        const ast::Expr * visit( const ast::ConstantExpr * node ) override final {
    743                 // Old world:   two types: rslt->constant.type, rslt->result
    744                 // New workd:   one public type: node->result, plus node->underlyer only to support roundtrip conversion
    745                 //              preserving underlyer because the correct type for string literals is complicated to construct,
    746             //              and distinguishing a string from other literals using the type is hard to do accurately
    747                 // Both worlds: the outer, expression-level type can change during resolution
    748                 //              for a string, that's char[k] before-resolve and char * after
    749                 // Old world:   the inner Constant type stays what it was built with
    750                 //              for a string, that's char[k] always
    751                 // Both worlds: the "rep" field of a constant is the C source file fragment that compiles to the desired value
    752         //              for a string, that includes outer quotes, backslashes, et al cases from the Literals test
    753                 ConstantExpr *rslt = new ConstantExpr(Constant(
    754                         get<Type>().accept1(node->underlyer),
    755                         node->rep,
    756                         node->ival));
     752                ConstantExpr *rslt = nullptr;
     753                switch ( node->kind ) {
     754                case ast::ConstantExpr::Integer:
     755                        rslt = new ConstantExpr{Constant{
     756                                get<Type>().accept1( node->result ),
     757                                node->rep,
     758                                (unsigned long long) node->intValue()
     759                        }};
     760                        break;
     761                case ast::ConstantExpr::FloatingPoint:
     762                        rslt = new ConstantExpr{Constant{
     763                                get<Type>().accept1(node->result),
     764                                node->rep,
     765                                (double) node->floatValue()
     766                        }};
     767                        break;
     768                case ast::ConstantExpr::String:
     769                        rslt = new ConstantExpr{Constant{
     770                                get<Type>().accept1( node->result ),
     771                                node->rep,
     772                                (long long unsigned int)0
     773                        }};
     774                        break;
     775                }
     776                assert(rslt);
    757777                auto expr = visitBaseExpr( node, rslt );
    758778                this->node = expr;
     
    21332153                );
    21342154
     2155                visitBaseExpr_SkipResultType( old,
     2156                        expr
     2157                );
     2158
    21352159                expr->var = GET_ACCEPT_1(var, DeclWithType);
    2136                 visitBaseExpr( old, expr );
    2137 
    2138                 this->node = expr;
     2160                expr->result = expr->var->get_type();
     2161                if(const ast::FunctionType * ft = expr->result.as<ast::FunctionType>()) {
     2162                        if(dynamic_cast<PointerType *>(old->result)) {
     2163                                expr->result = new ast::PointerType(ft);
     2164                        }
     2165                }
     2166                add_qualifiers( expr->result, ast::CV::Lvalue );
     2167                this->node = expr;
     2168        }
     2169
     2170        bool isIntlikeConstantType(const Type *t) {
     2171                if ( const BasicType * basicType = dynamic_cast< const BasicType * >( t ) ) {
     2172                        if ( basicType->isInteger() ) {
     2173                                return true;
     2174                        }
     2175                } else if ( dynamic_cast< const OneType * >( t ) ) {
     2176                        return true;
     2177                } else if ( dynamic_cast< const ZeroType * >( t ) ) {
     2178                        return true;
     2179                } else if ( dynamic_cast< const PointerType * >( t ) ) {
     2180                        // null pointer constants, with zero int-values
     2181                        return true;
     2182                }
     2183                return false;
     2184        }
     2185
     2186        int isFloatlikeConstantType(const Type *t) {
     2187                if ( const BasicType * bty = dynamic_cast< const BasicType * >( t ) ) {
     2188                        if ( ! bty->isInteger() ) {
     2189                                return true;
     2190                        }
     2191                }
     2192                return false;
     2193        }
     2194
     2195        int isStringlikeConstantType(const Type *t) {
     2196                const Type *referentType = nullptr;
     2197                if ( const ArrayType * aty = dynamic_cast< const ArrayType * >( t ) ) {
     2198                        referentType = aty->base;
     2199                } else if ( const PointerType * pty = dynamic_cast< const PointerType * >( t ) ) {
     2200                        referentType = pty->base;
     2201                }
     2202                if (referentType) {
     2203                        if ( const BasicType * bty = dynamic_cast< const BasicType * >( referentType ) ) {
     2204                           if ( bty->kind == BasicType::Kind::Char ) {
     2205                                   return true;
     2206                           }
     2207                        }
     2208                }
     2209                return false;
    21392210        }
    21402211
    21412212        virtual void visit( ConstantExpr * old ) override final {
    2142                 ast::ConstantExpr *rslt = new ast::ConstantExpr(
    2143                         old->location,
    2144                         GET_ACCEPT_1(result, Type),
    2145                         old->constant.get_value(),
    2146                         old->constant.ival
    2147                 );
    2148                 rslt->underlyer = getAccept1< ast::Type, Type* >( old->constant.get_type() );
     2213                ast::ConstantExpr *rslt = nullptr;
     2214                if (isStringlikeConstantType(old->result)) {
     2215                        rslt = new ast::ConstantExpr(
     2216                                old->location,
     2217                                GET_ACCEPT_1(result, Type),
     2218                                old->constant.get_value(),
     2219                                0,
     2220                                ast::ConstantExpr::Kind::String
     2221                        );
     2222                } else if (isIntlikeConstantType(old->result)) {
     2223                        rslt = new ast::ConstantExpr(
     2224                                old->location,
     2225                                GET_ACCEPT_1(result, Type),
     2226                                old->constant.get_value(),
     2227                                (unsigned long long) old->intValue(),
     2228                                ast::ConstantExpr::Kind::Integer
     2229                        );
     2230                } else if (isFloatlikeConstantType(old->result)) {
     2231                        rslt = new ast::ConstantExpr(
     2232                                old->location,
     2233                                GET_ACCEPT_1(result, Type),
     2234                                old->constant.get_value(),
     2235                                (double) old->constant.get_dval()
     2236                        );
     2237                }
     2238                assert(rslt);
    21492239                this->node = visitBaseExpr( old, rslt );
    21502240        }
  • src/AST/Expr.cpp

    raba20d2 r8b34df0  
    99// Author           : Aaron B. Moss
    1010// Created On       : Wed May 15 17:00:00 2019
    11 // Last Modified By : Andrew Beach
    12 // Created On       : Thr Jun 13 13:38:00 2019
    13 // Update Count     : 2
     11// Last Modified By : Aaron B. Moss
     12// Created On       : Wed May 15 17:00:00 2019
     13// Update Count     : 1
    1414//
    1515
     
    194194        if ( const BasicType * bty = result.as< BasicType >() ) {
    195195                if ( bty->isInteger() ) {
    196                         assert(ival);
    197                         return ival.value();
     196                        return val.ival;
    198197                }
    199198        } else if ( result.as< ZeroType >() ) {
     
    205204}
    206205
     206double ConstantExpr::floatValue() const {
     207        if ( const BasicType * bty = result.as< BasicType >() ) {
     208                if ( ! bty->isInteger() ) {
     209                        return val.dval;
     210                }
     211        }
     212        SemanticError( this, "Constant expression of non-floating-point type " );
     213}
     214
    207215ConstantExpr * ConstantExpr::from_bool( const CodeLocation & loc, bool b ) {
    208216        return new ConstantExpr{
    209217                loc, new BasicType{ BasicType::Bool }, b ? "1" : "0", (unsigned long long)b };
     218}
     219
     220ConstantExpr * ConstantExpr::from_char( const CodeLocation & loc, char c ) {
     221        return new ConstantExpr{
     222                loc, new BasicType{ BasicType::Char }, std::to_string( c ), (unsigned long long)c };
    210223}
    211224
     
    219232                loc, new BasicType{ BasicType::LongUnsignedInt }, std::to_string( i ),
    220233                (unsigned long long)i };
     234}
     235
     236ConstantExpr * ConstantExpr::from_double( const CodeLocation & loc, double d ) {
     237        return new ConstantExpr{ loc, new BasicType{ BasicType::Double }, std::to_string( d ), d };
     238}
     239
     240ConstantExpr * ConstantExpr::from_string( const CodeLocation & loc, const std::string & s ) {
     241        return new ConstantExpr{
     242                loc,
     243                new ArrayType{
     244                        new BasicType{ BasicType::Char, CV::Const },
     245                        ConstantExpr::from_int( loc, s.size() + 1 /* null terminator */ ),
     246                        FixedLen, DynamicDim },
     247                std::string{"\""} + s + "\"",
     248                (unsigned long long)0,
     249                ConstantExpr::String };
    221250}
    222251
     
    353382
    354383UniqueExpr::UniqueExpr( const CodeLocation & loc, const Expr * e, unsigned long long i )
    355 : Expr( loc, e->result ), expr( e ), id( i ) {
     384: Expr( loc, e->result ), id( i ) {
    356385        assert( expr );
    357386        if ( id == -1ull ) {
  • src/AST/Expr.hpp

    raba20d2 r8b34df0  
    2222#include <utility>        // for move
    2323#include <vector>
    24 #include <optional>
    2524
    2625#include "Fwd.hpp"        // for UniqueId
     
    3332
    3433class ConverterOldToNew;
    35 class ConverterNewToOld;
    3634
    3735namespace ast {
     
    348346};
    349347
    350 /// A compile-time constant.
    351 /// Mostly carries C-source text from parse to code-gen, without interpretation.  E.g. strings keep their outer quotes and never have backslashes interpreted.
    352 /// Integer constants get special treatment, e.g. for verifying array operations, when an integer constant occurs as the length of an array.
     348/// A compile-time constant
    353349class ConstantExpr final : public Expr {
    354 public:
    355         // Representation of this constant, as it occurs in .cfa source and .cfa.cc result.
     350        union Val {
     351                unsigned long long ival;
     352                double dval;
     353
     354                Val( unsigned long long i ) : ival( i ) {}
     355                Val( double d ) : dval( d ) {}
     356        } val;
     357public:
    356358        std::string rep;
     359        enum Kind { Integer, FloatingPoint, String } kind;
    357360
    358361        ConstantExpr(
    359                 const CodeLocation & loc, const Type * ty, const std::string & r,
    360                         std::optional<unsigned long long> i )
    361         : Expr( loc, ty ), rep( r ), ival( i ) {}
    362 
    363         /// Gets the integer value of this constant, if one is appropriate to its type.
    364         /// Throws a SemanticError if the type is not appropriate for value-as-integer.
    365         /// Suffers an assertion failure the type is appropriate but no integer value was supplied to the constructor.
     362                const CodeLocation & loc, const Type * ty, const std::string & r, unsigned long long v,
     363                Kind k = Integer )
     364        : Expr( loc, ty ), val( v ), rep( r ), kind( k ) {}
     365        ConstantExpr( const CodeLocation & loc, const Type * ty, const std::string & r, double v )
     366        : Expr( loc, ty ), val( v ), rep( r ), kind( FloatingPoint ) {}
     367
     368        /// Gets the value of this constant as an integer
    366369        long long int intValue() const;
     370        /// Gets the value of this constant as floating point
     371        double floatValue() const;
    367372
    368373        /// generates a boolean constant of the given bool
    369374        static ConstantExpr * from_bool( const CodeLocation & loc, bool b );
     375        /// generates a char constant of the given char
     376        static ConstantExpr * from_char( const CodeLocation & loc, char c );
    370377        /// generates an integer constant of the given int
    371378        static ConstantExpr * from_int( const CodeLocation & loc, int i );
    372379        /// generates an integer constant of the given unsigned long int
    373380        static ConstantExpr * from_ulong( const CodeLocation & loc, unsigned long i );
     381        /// generates a floating point constant of the given double
     382        static ConstantExpr * from_double( const CodeLocation & loc, double d );
     383        /// generates an array of chars constant of the given string
     384        static ConstantExpr * from_string( const CodeLocation & loc, const std::string & s );
    374385        /// generates a null pointer value for the given type. void * if omitted.
    375386        static ConstantExpr * null( const CodeLocation & loc, const Type * ptrType = nullptr );
     
    379390        ConstantExpr * clone() const override { return new ConstantExpr{ *this }; }
    380391        MUTATE_FRIEND
    381 
    382         std::optional<unsigned long long> ival;
    383 
    384         // Intended only for legacy support of roundtripping the old AST.
    385         // Captures the very-locally inferred type, before the resolver modifies the type of this ConstantExpression.
    386         // In the old AST it's constExpr->constant.type
    387         ptr<Type> underlyer;
    388         friend class ::ConverterOldToNew;
    389         friend class ::ConverterNewToOld;
    390392};
    391393
     
    691693        unsigned long long id;
    692694
    693         UniqueExpr( const CodeLocation & loc, const Expr * e, unsigned long long i = -1ull );
     695        UniqueExpr( const CodeLocation & loc, const Expr * e, unsigned long long i = -1 );
    694696
    695697        const Expr * accept( Visitor & v ) const override { return v.visit( this ); }
  • src/Parser/ExpressionNode.cc

    raba20d2 r8b34df0  
    357357                                                                        new ConstantExpr( Constant::from_ulong( str.size() + 1 - 2 ) ), // +1 for '\0' and -2 for '"'
    358358                                                                        false, false );
    359         Expression * ret = new ConstantExpr( Constant( at, str, std::nullopt ) );
     359        Expression * ret = new ConstantExpr( Constant( at, str, (unsigned long long int)0 ) ); // constant 0 is ignored for pure string value
    360360        if ( units.length() != 0 ) {
    361361                ret = new UntypedExpr( new NameExpr( units ), { ret } );
  • src/ResolvExpr/Candidate.hpp

    raba20d2 r8b34df0  
    99// Author           : Aaron B. Moss
    1010// Created On       : Wed Jun 5 14:30:00 2019
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Wed Jun 12 14:15:00 2019
    13 // Update Count     : 2
     11// Last Modified By : Aaron B. Moss
     12// Last Modified On : Wed Jun 5 14:30:00 2019
     13// Update Count     : 1
    1414//
    1515
     
    4949
    5050        Candidate() : expr(), cost( Cost::zero ), cvtCost( Cost::zero ), env(), open(), need() {}
    51 
     51       
    5252        Candidate( const ast::Expr * x, const ast::TypeEnvironment & e )
    5353        : expr( x ), cost( Cost::zero ), cvtCost( Cost::zero ), env( e ), open(), need() {}
    5454
    5555        Candidate( const Candidate & o, const ast::Expr * x )
    56         : expr( x ), cost( o.cost ), cvtCost( Cost::zero ), env( o.env ), open( o.open ),
     56        : expr( x ), cost( o.cost ), cvtCost( Cost::zero ), env( o.env ), open( o.open ), 
    5757          need( o.need ) {}
    58 
    59         Candidate(
    60                 const ast::Expr * x, ast::TypeEnvironment && e, ast::OpenVarSet && o,
    61                 ast::AssertionSet && n, const Cost & c, const Cost & cvt = Cost::zero )
    62         : expr( x ), cost( c ), cvtCost( cvt ), env( std::move( e ) ), open( std::move( o ) ),
     58       
     59        Candidate( 
     60                const ast::Expr * x, ast::TypeEnvironment && e, ast::OpenVarSet && o, 
     61                ast::AssertionSet && n, const Cost & c )
     62        : expr( x ), cost( c ), cvtCost( Cost::zero ), env( std::move( e ) ), open( std::move( o ) ),
    6363          need( n.begin(), n.end() ) {}
    6464};
  • src/SynTree/Constant.cc

    raba20d2 r8b34df0  
    2222#include "Type.h"    // for BasicType, Type, Type::Qualifiers, PointerType
    2323
    24 Constant::Constant( Type * type, std::string rep, std::optional<unsigned long long> ival ) : type( type ), rep( rep ), ival( ival ) {}
     24Constant::Constant( Type * type, std::string rep, unsigned long long val ) : type( type ), rep( rep ), val( val ) {}
     25Constant::Constant( Type * type, std::string rep, double val ) : type( type ), rep( rep ), val( val ) {}
    2526
    26 Constant::Constant( const Constant &other ) : BaseSyntaxNode( other ), rep( other.rep ), ival( other.ival ) {
     27Constant::Constant( const Constant &other ) : BaseSyntaxNode( other ), rep( other.rep ), val( other.val ) {
    2728        type = other.type->clone();
    2829}
     
    3435}
    3536
     37Constant Constant::from_char( char c ) {
     38        return Constant( new BasicType( Type::Qualifiers(), BasicType::Char ), std::to_string( c ), (unsigned long long int)c );
     39}
     40
    3641Constant Constant::from_int( int i ) {
    3742        return Constant( new BasicType( Type::Qualifiers(), BasicType::SignedInt ), std::to_string( i ), (unsigned long long int)i );
     
    4045Constant Constant::from_ulong( unsigned long i ) {
    4146        return Constant( new BasicType( Type::Qualifiers(), BasicType::LongUnsignedInt ), std::to_string( i ), (unsigned long long int)i );
     47}
     48
     49Constant Constant::from_double( double d ) {
     50        return Constant( new BasicType( Type::Qualifiers(), BasicType::Double ), std::to_string( d ), d );
     51}
     52
     53Constant Constant::from_string( std::string const & str ) {
     54        return Constant(
     55                new ArrayType(
     56                        noQualifiers,
     57                        new BasicType( Type::Qualifiers( Type::Const ), BasicType::Char ),
     58                        new ConstantExpr( Constant::from_int( str.size() + 1 /* \0 */ )),
     59                        false, false ),
     60                std::string("\"") + str + "\"", (unsigned long long int)0 );
    4261}
    4362
     
    5574unsigned long long Constant::get_ival() const {
    5675        assertf( strict_dynamic_cast<BasicType*>(type)->isInteger(), "Attempt to retrieve ival from non-integer constant." );
    57         return ival.value();
     76        return val.ival;
     77}
     78
     79double Constant::get_dval() const {
     80        assertf( ! strict_dynamic_cast<BasicType*>(type)->isInteger(), "Attempt to retrieve dval from integer constant." );
     81        return val.dval;
    5882}
    5983
    6084void Constant::print( std::ostream &os, Indenter ) const {
    61         os << "(" << rep << " " << (ival ? toString(ival.value()) : "") ;
     85        os << "(" << rep << " " << val.ival;
    6286        if ( type ) {
    6387                os << ": ";
  • src/SynTree/Constant.h

    raba20d2 r8b34df0  
    1818#include <iosfwd>     // for ostream
    1919#include <string>     // for string
    20 #include <optional>   // for optional
    2120
    2221#include "BaseSyntaxNode.h"
     
    2827class Constant : public BaseSyntaxNode {
    2928  public:
    30         Constant( Type * type, std::string rep, std::optional<unsigned long long> i );
     29        Constant( Type * type, std::string rep, unsigned long long val );
     30        Constant( Type * type, std::string rep, double val );
    3131        Constant( const Constant & other );
    3232        virtual ~Constant();
     
    3939        void set_value( std::string newValue ) { rep = newValue; }
    4040        unsigned long long get_ival() const;
     41        double get_dval() const;
    4142
    4243        /// generates a boolean constant of the given bool
    4344        static Constant from_bool( bool b );
     45        /// generates a char constant of the given char
     46        static Constant from_char( char c );
    4447        /// generates an integer constant of the given int
    4548        static Constant from_int( int i );
    4649        /// generates an integer constant of the given unsigned long int
    4750        static Constant from_ulong( unsigned long i );
     51        /// generates a floating point constant of the given double
     52        static Constant from_double( double d );
     53        /// generates an array of chars constant of the given string
     54        static Constant from_string( std::string const & s );
    4855
    4956        /// generates a null pointer value for the given type. void * if omitted.
     
    5663        Type * type;
    5764        std::string rep;
    58         std::optional<unsigned long long> ival;
    59 
    60         friend class ConverterOldToNew;
     65        union Val {
     66                unsigned long long ival;
     67                double dval;
     68                Val( unsigned long long ival ) : ival( ival ) {}
     69                Val( double dval ) : dval( dval ) {}
     70        } val;
    6171};
    6272
  • tests/.in/manipulatorsInput.txt

    raba20d2 r8b34df0  
    55abcyyy
    66aaaaaaaaxxxxxxxxaabbccbbdddwwwbbbbbbbbwwwwwwwwaaaaaaaawwwwwwww
     7abc
    78abc
    89xx
  • tests/io2.cfa

    raba20d2 r8b34df0  
    1010// Created On       : Wed Mar  2 16:56:02 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jun 13 16:43:14 2019
    13 // Update Count     : 120
     12// Last Modified On : Sun Jun  9 08:07:42 2019
     13// Update Count     : 117
    1414//
    1515
     
    4949        in       | f | d | ld;                                                                  // floating point
    5050        in       | fc | dc | ldc;                                                               // floating-point complex
    51         in       | s1 | wdi( size, s2 );                                                // C string, length unchecked and checked
     51        in       | cstr( s1 ) | wd( size, cstr( s2 ) );                 // C string, length unchecked and checked
    5252        sout | nl;
    5353
  • tests/manipulatorsInput.cfa

    raba20d2 r8b34df0  
    77// Created On       : Sat Jun  8 17:58:54 2019
    88// Last Modified By : Peter A. Buhr
    9 // Last Modified On : Thu Jun 13 17:41:43 2019
    10 // Update Count     : 37
     9// Last Modified On : Mon Jun 10 18:38:35 2019
     10// Update Count     : 8
    1111//
    1212
     
    1717        {
    1818                char s[] = "yyyyyyyyyyyyyyyyyyyy";
    19                 const char sk[] = "abc";
    20                 scanf( "abc " ); scanf( sk ); for ( 5 ) scanf( "%*c" ); printf( "1 %s\n", s );
    21                 scanf( "%s", s );                                               printf( "2 %s\n", s );
    22                 scanf( "%*s" );                                                 printf( "3 %s\n", s );
    23                 scanf( "%8s", s );                                              printf( "4 %s\n", s );
    24                 scanf( "%*8s" );                                                printf( "5 %s\n", s );
     19                scanf( "abc" );                 printf( "1 %s\n", s );
     20                scanf( "%s", s );                                       printf( "2 %s\n", s );
     21                scanf( "%*s" );                                         printf( "3 %s\n", s );
     22                scanf( "%8s", s );                                      printf( "4 %s\n", s );
     23                scanf( "%*8s" );                                        printf( "5 %s\n", s );
    2524
    26                 scanf( "%[abc]", s );                                   printf( "6 %s\n", s );
    27                 scanf( "%[^abc]", s );                                  printf( "7 %s\n", s );
    28                 scanf( "%*[abc]" );                                             printf( "8 %s\n", s );
    29                 scanf( "%*[^abc]" );                                    printf( "9 %s\n", s );
    30                 scanf( "%8[abc]", s );                                  printf( "10 %s\n", s );
    31                 scanf( "%8[^abc]", s );                                 printf( "11 %s\n", s );
    32                 scanf( "%*8[abc]" );                                    printf( "12 %s\n", s );
    33                 scanf( "%*8[^abc]" );                                   printf( "13 %s\n", s );
     25                scanf( "%[abc]", s );                           printf( "6 %s\n", s );
     26                scanf( "%[^abc]", s );                          printf( "7 %s\n", s );
     27                scanf( "%*[abc]" );                                     printf( "8 %s\n", s );
     28                scanf( "%*[^abc]" );                            printf( "9 %s\n", s );
     29                scanf( "%8[abc]", s );                          printf( "10 %s\n", s );
     30                scanf( "%8[^abc]", s );                         printf( "11 %s\n", s );
     31                scanf( "%*8[abc]" );                            printf( "12 %s\n", s );
     32                scanf( "%*8[^abc]" );                           printf( "13 %s\n", s );
    3433        }
    3534        {
    3635                char s[] = "yyyyyyyyyyyyyyyyyyyy";
    37                 char sk[] = "abc";
    38                 sin /*| "abc "*/ | skip( sk ) | skip( 5 );      sout | "1" | s;
    39                 sin | s;                                                                sout | "2" | s;
    40                 sin | ignore( s );                                              sout | "3" | s;
    41                 sin | wdi( 8, s );                                              sout | "4" | s;
    42                 sin | ignore( wdi( 8, s ) );                    sout | "5" | s;
     36                sin | skip( "abc" );                                    sout | "1" | s;
     37                sin | cstr( s );                                                sout | "2" | s;
     38                sin | ignore( cstr( s ) );                              sout | "3" | s;
     39                sin | wd( 8, cstr( s ) );                               sout | "4" | s;
     40                sin | ignore( wd( 8, cstr( s ) ) );             sout | "5" | s;
    4341
    4442                sin | incl( "abc", s );                                 sout | "6" | s;
     
    4644                sin | ignore( incl( "abc", s ) );               sout | "8" | s;
    4745                sin | ignore( excl( "abc", s ) );               sout | "9" | s;
    48                 sin | wdi( 8, incl( "abc", s ) );               sout | "10" | s;
    49                 sin | wdi( 8, excl( "abc", s ) );               sout | "11" | s;
    50                 sin | ignore( wdi( 8, incl( "abc", s ) ) );     sout | "12" | s;
    51                 sin | ignore( wdi( 8, excl( "abc", s ) ) );     sout | "13" | s;
     46                sin | wd( 8, incl( "abc", s ) );                sout | "10" | s;
     47                sin | wd( 8, excl( "abc", s ) );                sout | "11" | s;
     48                sin | ignore( wd( 8, incl( "abc", s ) ) );      sout | "12" | s;
     49                sin | ignore( wd( 8, excl( "abc", s ) ) );      sout | "13" | s;
    5250        }
    5351        {
    5452                char c;
    55                 sin | c;                                                                sout | c;
    56                 sin | ignore( c );                                              sout | c;
     53                sin | c;                                                        sout | c;
     54                sin | ignore( c );                                      sout | c;
    5755
    5856                signed char sc;
    59                 sin | sc;                                                               sout | sc;
    60                 sin | wdi( 3, sc );                                             sout | sc;
    61                 sin | ignore( sc );                                             sout | sc;
    62                 sin | ignore( wdi( 3, sc ) );                   sout | sc;
     57                sin | sc;                                                       sout | sc;
     58                sin | wdi( 3, sc );                                     sout | sc;
     59                sin | ignore( sc );                                     sout | sc;
     60                sin | ignore( wdi( 3, sc ) );           sout | sc;
    6361
    6462                unsigned char usc;
    65                 sin | usc;                                                              sout | usc;
    66                 sin | wdi( 3, usc );                                    sout | usc;
    67                 sin | ignore( usc );                                    sout | usc;
    68                 sin | ignore( wdi( 3, usc ) );                  sout | usc;
     63                sin | usc;                                                      sout | usc;
     64                sin | wdi( 3, usc );                            sout | usc;
     65                sin | ignore( usc );                            sout | usc;
     66                sin | ignore( wdi( 3, usc ) );          sout | usc;
    6967
    7068                signed short int ssi;
    71                 sin | ssi;                                                              sout | ssi;
    72                 sin | wdi( 3, ssi );                                    sout | ssi;
    73                 sin | ignore( ssi );                                    sout | ssi;
    74                 sin | ignore( wdi( 3, ssi ) );                  sout | ssi;
     69                sin | ssi;                                                      sout | ssi;
     70                sin | wdi( 3, ssi );                            sout | ssi;
     71                sin | ignore( ssi );                            sout | ssi;
     72                sin | ignore( wdi( 3, ssi ) );          sout | ssi;
    7573
    7674                unsigned short int usi;
    77                 sin | usi;                                                              sout | usi;
    78                 sin | wdi( 3, usi );                                    sout | usi;
    79                 sin | ignore( usi );                                    sout | usi;
    80                 sin | ignore( wdi( 3, usi ) );                  sout | usi;
     75                sin | usi;                                                      sout | usi;
     76                sin | wdi( 3, usi );                            sout | usi;
     77                sin | ignore( usi );                            sout | usi;
     78                sin | ignore( wdi( 3, usi ) );          sout | usi;
    8179
    8280                signed int si;
    83                 sin | si;                                                               sout | si;
    84                 sin | wdi( 3, si );                                             sout | si;
    85                 sin | ignore( si );                                             sout | si;
    86                 sin | ignore( wdi( 3, si ) );                   sout | si;
     81                sin | si;                                                       sout | si;
     82                sin | wdi( 3, si );                                     sout | si;
     83                sin | ignore( si );                                     sout | si;
     84                sin | ignore( wdi( 3, si ) );           sout | si;
    8785
    8886                unsigned int ui;
    89                 sin | ui;                                                               sout | ui;
    90                 sin | wdi( 3, ui );                                             sout | ui;
    91                 sin | ignore( ui );                                             sout | ui;
    92                 sin | ignore( wdi( 3, ui ) );                   sout | ui;
     87                sin | ui;                                                       sout | ui;
     88                sin | wdi( 3, ui );                                     sout | ui;
     89                sin | ignore( ui );                                     sout | ui;
     90                sin | ignore( wdi( 3, ui ) );           sout | ui;
    9391
    9492                signed long int sli;
    95                 sin | sli;                                                              sout | sli;
    96                 sin | wdi( 3, sli );                                    sout | sli;
    97                 sin | ignore( sli );                                    sout | sli;
    98                 sin | ignore( wdi( 3, sli ) );                  sout | sli;
     93                sin | sli;                                                      sout | sli;
     94                sin | wdi( 3, sli );                            sout | sli;
     95                sin | ignore( sli );                            sout | sli;
     96                sin | ignore( wdi( 3, sli ) );          sout | sli;
    9997
    10098                unsigned long int uli;
    101                 sin | uli;                                                              sout | uli;
    102                 sin | wdi( 3, uli );                                    sout | uli;
    103                 sin | ignore( uli );                                    sout | uli;
    104                 sin | ignore( wdi( 3, uli ) );                  sout | uli;
     99                sin | uli;                                                      sout | uli;
     100                sin | wdi( 3, uli );                            sout | uli;
     101                sin | ignore( uli );                            sout | uli;
     102                sin | ignore( wdi( 3, uli ) );          sout | uli;
    105103
    106104                signed long long int slli;
    107                 sin | slli;                                                             sout | slli;
    108                 sin | wdi( 3, slli );                                   sout | slli;
    109                 sin | ignore( slli );                                   sout | slli;
    110                 sin | ignore( wdi( 3, slli ) );                 sout | slli;
     105                sin | slli;                                                     sout | slli;
     106                sin | wdi( 3, slli );                           sout | slli;
     107                sin | ignore( slli );                           sout | slli;
     108                sin | ignore( wdi( 3, slli ) );         sout | slli;
    111109
    112110                unsigned long long int ulli;
    113                 sin | ulli;                                                             sout | ulli;
    114                 sin | wdi( 3, ulli );                                   sout | ulli;
    115                 sin | ignore( ulli );                                   sout | ulli;
    116                 sin | ignore( wdi( 3, ulli ) );                 sout | ulli;
     111                sin | ulli;                                                     sout | ulli;
     112                sin | wdi( 3, ulli );                           sout | ulli;
     113                sin | ignore( ulli );                           sout | ulli;
     114                sin | ignore( wdi( 3, ulli ) );         sout | ulli;
    117115
    118116                float f;
    119                 sin | f;                                                                sout | f;
    120                 sin | wdi( 8, f );                                              sout | f;
    121                 sin | ignore( f );                                              sout | f;
    122                 sin | ignore( wdi( 8, f ) );                    sout | f;
     117                sin | f;                                                        sout | f;
     118                sin | wdi( 8, f );                                      sout | f;
     119                sin | ignore( f );                                      sout | f;
     120                sin | ignore( wdi( 8, f ) );            sout | f;
    123121
    124122                double d;
    125                 sin | d;                                                                sout | d;
    126                 sin | wdi( 8, d );                                              sout | d;
    127                 sin | ignore( d );                                              sout | d;
    128                 sin | ignore( wdi( 8, d ) );                    sout | d;
     123                sin | d;                                                        sout | d;
     124                sin | wdi( 8, d );                                      sout | d;
     125                sin | ignore( d );                                      sout | d;
     126                sin | ignore( wdi( 8, d ) );            sout | d;
    129127
    130128                long double ld;
    131                 sin | ld;                                                               sout | ld;
    132                 sin | wdi( 8, ld );                                             sout | ld;
    133                 sin | ignore( ld );                                             sout | ld;
    134                 sin | ignore( wdi( 8, ld ) );                   sout | ld;
     129                sin | ld;                                                       sout | ld;
     130                sin | wdi( 8, ld );                                     sout | ld;
     131                sin | ignore( ld );                                     sout | ld;
     132                sin | ignore( wdi( 8, ld ) );           sout | ld;
    135133
    136134                float _Complex fc;
    137                 sin | fc;                                                               sout | fc;
    138                 sin | wdi( 8, fc );                                             sout | fc;
    139                 sin | ignore( fc );                                             sout | fc;
    140                 sin | ignore( wdi( 8, fc ) );                   sout | fc;
     135                sin | fc;                                                       sout | fc;
     136                sin | wdi( 8, fc );                                     sout | fc;
     137                sin | ignore( fc );                                     sout | fc;
     138                sin | ignore( wdi( 8, fc ) );           sout | fc;
    141139
    142140                double _Complex dc;
    143                 sin | dc;                                                               sout | dc;
    144                 sin | wdi( 8, dc );                                             sout | dc;
    145                 sin | ignore( dc );                                             sout | dc;
    146                 sin | ignore( wdi( 8, dc ) );                   sout | dc;
     141                sin | dc;                                                       sout | dc;
     142                sin | wdi( 8, dc );                                     sout | dc;
     143                sin | ignore( dc );                                     sout | dc;
     144                sin | ignore( wdi( 8, dc ) );           sout | dc;
    147145
    148146                long double _Complex ldc;
    149                 sin | ldc;                                                              sout | ldc;
    150                 sin | wdi( 8, ldc );                                    sout | ldc;
    151                 sin | ignore( ldc );                                    sout | ldc;
    152                 sin | ignore( wdi( 8, ldc ) );                  sout | ldc;
     147                sin | ldc;                                                      sout | ldc;
     148                sin | wdi( 8, ldc );                            sout | ldc;
     149                sin | ignore( ldc );                            sout | ldc;
     150                sin | ignore( wdi( 8, ldc ) );          sout | ldc;
    153151        }
    154152} // main
     
    156154// Local Variables: //
    157155// tab-width: 4 //
    158 // compile-command: "cfa -Wall -Wextra manipulatorsInput.cfa" //
     156// compile-command: "cfa manipulatorsInput.cfa" //
    159157// End: //
Note: See TracChangeset for help on using the changeset viewer.