Changeset c653b37


Ignore:
Timestamp:
Jun 28, 2018, 4:04:11 PM (5 years ago)
Author:
Rob Schluntz <rschlunt@…>
Branches:
ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, no_list, persistent-indexer, pthread-emulation, qualifiedEnum
Children:
e3b2474
Parents:
a12c81f3 (diff), 944ce47 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:/u/cforall/software/cfa/cfa-cc

Files:
3 added
2 deleted
40 edited

Legend:

Unmodified
Added
Removed
  • doc/bibliography/pl.bib

    ra12c81f3 rc653b37  
    11831183        that is ``compiled''.
    11841184    },
    1185     comment     = {
    1186         Imagine the program, including the subroutines, spread out over a
    1187         table, with the compiler dropping Jello on the parts as they are
    1188         compiled.  At first little drops appear in seemingly random places.
    1189         These get bigger and combine with other drops to form growing
    1190         globs.  When two globs meet, ripples will go out through each as
    1191         they adjust to each other's presence, although the parts of the
    1192         globs that formed first are less affected by the ripples.  When
    1193         compilation is complete, there is one congealed mass.
    1194     }
    11951185}
    11961186
     
    13881378        Process-valued expressions and process variables.  Processes have
    13891379        execution priority: Create {\em process-type-name}(args) [with
    1390         priority(p)],
    1391         and the priority can be changed on the fly.  Complicated guard/
    1392         screen structure on accept: accept {\em transaction}(param names)
     1380        priority(p)], and the priority can be changed on the fly.  Complicated
     1381        guard/screen structure on accept: accept {\em transaction}(param names)
    13931382        [suchthat (exp)] [by (exp)] [compoundstatement].  Accepts cannot
    13941383        appear in functions!  Can specify timeouts on transaction calls.
     
    18331822}
    18341823
    1835 @article{Moore75,
    1836     keywords    = {approximation methods, integrated circuits},
    1837     contributer = {pabuhr@plg},
    1838     author      = {Gordon E. Moore},
    1839     title       = {Progress in Digital Integrated Electronics},
    1840     journal     = {Technical Digest, International Electron Devices Meeting, IEEE},
    1841     year        = 1975,
    1842     pages       = {11-13},
     1824@misc{CS343,
     1825    keywords    = {uC++ teaching},
     1826    contributer = {pabuhr@plg},
     1827    key         = {Peter Buhr},
     1828    title       = {CS343},
     1829    year        = 2017,
     1830    howpublished= {\href{https://www.student.cs.uwaterloo.ca/~cs343}{https://\-www.student.cs.uwaterloo.ca/\-~cs343}},
    18431831}
    18441832
     
    25682556    year        = 1979,
    25692557    pages       = {24-32}
     2558}
     2559
     2560@inproceedings{XaaS,
     2561    keywords    = {Everything as a Service, Anything as a Service, Cloud computing, SOA},
     2562    contributer = {pabuhr@plg},
     2563    author      = {Duan, Yucong and Fu, Guohua and Zhou, Nianjun and Sun, Xiaobing and Narendra, Nanjangud C. and Hu, Bo},
     2564    title       = {Everything As a Service (XaaS) on the Cloud: Origins, Current and Future Trends},
     2565    booktitle   = {Proceedings of the 2015 IEEE 8th International Conference on Cloud Computing},
     2566    series      = {CLOUD'15},
     2567    year        = {2015},
     2568    pages       = {621--628},
     2569    publisher   = {IEEE Computer Society},
     2570    address     = {Washington, DC, USA},
    25702571}
    25712572
     
    27792780    title       = {Extending Modula-2 to Build Large, Integrated Systems},
    27802781    journal     = {IEEE Software},
    2781     month       = nov, year = 1986,
    2782     volume      = 3, number = 6, pages = {46-57},
     2782    month       = nov,
     2783    year        = 1986,
     2784    volume      = 3,
     2785    number      = 6,
     2786    pages       = {46-57},
    27832787    comment     = {
    27842788        Exceptions can have a parameter.  Procedures can declare the
     
    48934897}
    48944898
    4895 @techreport{OpenMP,
     4899@manual{OpenMP,
    48964900    keywords    = {concurrency, openmp, spmd},
    48974901    contributer = {pabuhr@plg},
    4898     author      = {OpenMP Architecture Review Board},
    4899     title       = {OpenMP Application Program Interface, Version 4.0},
    4900     month       = jul,
    4901     year        = 2013,
    4902     note        = {\href{http://www.openmp.org/mp-documents/OpenMP4.0.0.pdf}{http://\-www.openmp.org/\-mp-documents/\-OpenMP4.0.0.pdf}},
     4902    key         = {OpenMP},
     4903    title       = {OpenMP Application Program Interface, Version 4.5},
     4904    month       = nov,
     4905    year        = 2015,
     4906    note        = {\href{https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf}{https://\-www.openmp.org/\-wp-content/\-uploads/\-openmp-4.5.pdf}},
    49034907}
    49044908
     
    57535757}
    57545758
     5759@article{Moore75,
     5760    keywords    = {approximation methods, integrated circuits},
     5761    contributer = {pabuhr@plg},
     5762    author      = {Gordon E. Moore},
     5763    title       = {Progress in Digital Integrated Electronics},
     5764    journal     = {Technical Digest, International Electron Devices Meeting, IEEE},
     5765    year        = 1975,
     5766    pages       = {11-13},
     5767}
     5768
    57555769@article{promises,
    57565770    keywords    = {futures, Argus, call streams, rpc},
    57575771    contributer = {gjditchfield@plg},
    57585772    author      = {Barbara Liskov and Liuba Shrira},
    5759     title       = {Promises: Linguistic Support for Efficient Asynchronous
    5760           Procedure Calls in Distributed Systems},
     5773    title       = {Promises: Linguistic Support for Efficient Asynchronous Procedure Calls in Distributed Systems},
    57615774    journal     = sigplan,
    57625775    year        = 1988,
  • doc/papers/OOPSLA17/Makefile

    ra12c81f3 rc653b37  
    3333
    3434DOCUMENT = generic_types.pdf
     35BASE = ${basename ${DOCUMENT}}
    3536
    3637# Directives #
     
    4142
    4243clean :
    43         @rm -frv ${DOCUMENT} ${basename ${DOCUMENT}}.ps ${Build}
     44        @rm -frv ${DOCUMENT} ${BASE}.ps ${Build}
    4445
    4546# File Dependencies #
    4647
    47 ${DOCUMENT} : ${basename ${DOCUMENT}}.ps
     48${DOCUMENT} : ${BASE}.ps
    4849        ps2pdf $<
    4950
    50 ${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
     51${BASE}.ps : ${BASE}.dvi
    5152        dvips ${Build}/$< -o $@
    5253
    53 ${basename ${DOCUMENT}}.dvi : Makefile ${Build} ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ../../bibliography/pl.bib
     54${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     55                ../../bibliography/pl.bib | ${Build}
    5456        # Must have *.aux file containing citations for bibtex
    5557        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
     
    6365## Define the default recipes.
    6466
    65 ${Build}:
     67${Build} :
    6668        mkdir -p ${Build}
    6769
     
    6971        gnuplot -e Build="'${Build}/'" evaluation/timing.gp
    7072
    71 %.tex : %.fig
     73%.tex : %.fig | ${Build}
    7274        fig2dev -L eepic $< > ${Build}/$@
    7375
    74 %.ps : %.fig
     76%.ps : %.fig | ${Build}
    7577        fig2dev -L ps $< > ${Build}/$@
    7678
    77 %.pstex : %.fig
     79%.pstex : %.fig | ${Build}
    7880        fig2dev -L pstex $< > ${Build}/$@
    7981        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/papers/concurrency/Makefile

    ra12c81f3 rc653b37  
    1515SOURCES = ${addsuffix .tex, \
    1616Paper \
    17 style/style \
    18 style/cfa-format \
    1917}
    2018
    2119FIGURES = ${addsuffix .tex, \
    22 monitor \
    23 ext_monitor \
    2420int_monitor \
    2521dependency \
     22RunTimeStructure \
    2623}
    2724
    2825PICTURES = ${addsuffix .pstex, \
     26monitor \
     27ext_monitor \
    2928system \
    3029monitor_structs \
     
    5958        dvips ${Build}/$< -o $@
    6059
    61 ${BASE}.dvi : Makefile ${Build} ${BASE}.out.ps WileyNJD-AMA.bst ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    62                 annex/local.bib ../../bibliography/pl.bib
     60${BASE}.dvi : Makefile ${BASE}.out.ps WileyNJD-AMA.bst ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     61                annex/local.bib ../../bibliography/pl.bib | ${Build}
    6362        # Must have *.aux file containing citations for bibtex
    6463        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
    65         ${BibTeX} ${Build}/${basename $@}
     64        -${BibTeX} ${Build}/${basename $@}
    6665        # Some citations reference others so run again to resolve these citations
    6766        ${LaTeX} ${basename $@}.tex
    68         ${BibTeX} ${Build}/${basename $@}
     67        -${BibTeX} ${Build}/${basename $@}
    6968        # Run again to finish citations
    7069        ${LaTeX} ${basename $@}.tex
     
    7271## Define the default recipes.
    7372
    74 ${Build}:
     73${Build} :
    7574        mkdir -p ${Build}
    7675
    77 ${BASE}.out.ps: ${Build}
     76${BASE}.out.ps : | ${Build}
    7877        ln -fs ${Build}/Paper.out.ps .
    7978
    80 WileyNJD-AMA.bst:
     79WileyNJD-AMA.bst :
    8180        ln -fs ../AMA/AMA-stix/ama/WileyNJD-AMA.bst .
    8281
    83 %.tex : %.fig ${Build}
     82%.tex : %.fig | ${Build}
    8483        fig2dev -L eepic $< > ${Build}/$@
    8584
    86 %.ps : %.fig ${Build}
     85%.ps : %.fig | ${Build}
    8786        fig2dev -L ps $< > ${Build}/$@
    8887
    89 %.pstex : %.fig ${Build}
     88%.pstex : %.fig | ${Build}
    9089        fig2dev -L pstex $< > ${Build}/$@
    9190        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/papers/concurrency/Paper.tex

    ra12c81f3 rc653b37  
    2121\renewcommand{\thesubfigure}{(\Alph{subfigure})}
    2222\captionsetup{justification=raggedright,singlelinecheck=false}
    23 \usepackage{siunitx}
    24 \sisetup{binary-units=true}
     23\usepackage{dcolumn}                                            % align decimal points in tables
     24\usepackage{capt-of}
    2525
    2626\hypersetup{breaklinks=true}
     
    235235\CFA is a modern, polymorphic, \emph{non-object-oriented} extension of the C programming language.
    236236This paper discusses the design of the concurrency and parallelism features in \CFA, and the concurrent runtime-system.
    237 These features are created from scratch as ISO C lacks concurrency, relying largely on the pthreads library.
     237These features are created from scratch as ISO C lacks concurrency, relying largely on the pthreads library for concurrency.
    238238Coroutines and lightweight (user) threads are introduced into the language.
    239239In addition, monitors are added as a high-level mechanism for mutual exclusion and synchronization.
    240240A unique contribution is allowing multiple monitors to be safely acquired simultaneously.
    241241All features respect the expectations of C programmers, while being fully integrate with the \CFA polymorphic type-system and other language features.
    242 Finally, experimental results are presented to compare the performance of the new features with similar mechanisms in other concurrent programming-languages.
     242Finally, experimental results show comparable performance of the new features with similar mechanisms in other concurrent programming-languages.
    243243}%
    244244
     
    257257While the simplest concurrency system is a thread and a lock, this low-level approach is hard to master.
    258258An easier approach for programmers is to support higher-level constructs as the basis of concurrency.
    259 Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{Hochstein05}.
    260 Examples of high-level approaches are task (work) based~\cite{TBB}, implicit threading~\cite{OpenMP}, monitors~\cite{Java}, channels~\cite{CSP,Go}, and message passing~\cite{Erlang,MPI}.
     259Indeed, for highly-productive concurrent-programming, high-level approaches are much more popular~\cite{Hochstein05}.
     260Examples of high-level approaches are jobs (thread pool)~\cite{TBB}, implicit threading~\cite{OpenMP}, monitors~\cite{Java}, channels~\cite{CSP,Go}, and message passing~\cite{Erlang,MPI}.
    261261
    262262The following terminology is used.
    263263A \newterm{thread} is a fundamental unit of execution that runs a sequence of code and requires a stack to maintain state.
    264 Multiple simultaneous threads give rise to \newterm{concurrency}, which requires locking to ensure safe communication and access to shared data.
    265 % Correspondingly, concurrency is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, \etc) concurrent threads are introduced.
     264Multiple simultaneous threads give rise to \newterm{concurrency}, which requires locking to ensure access to shared data and safe communication.
    266265\newterm{Locking}, and by extension \newterm{locks}, are defined as a mechanism to prevent progress of threads to provide safety.
    267266\newterm{Parallelism} is running multiple threads simultaneously.
    268267Parallelism implies \emph{actual} simultaneous execution, where concurrency only requires \emph{apparent} simultaneous execution.
    269268As such, parallelism only affects performance, which is observed through differences in space and/or time at runtime.
    270 
    271269Hence, there are two problems to be solved: concurrency and parallelism.
    272270While these two concepts are often combined, they are distinct, requiring different tools~\cite[\S~2]{Buhr05a}.
    273 Concurrency tools handle synchronization and mutual exclusion, while parallelism tools handle performance, cost and resource utilization.
     271Concurrency tools handle mutual exclusion and synchronization, while parallelism tools handle performance, cost, and resource utilization.
    274272
    275273The proposed concurrency API is implemented in a dialect of C, called \CFA.
    276 The paper discusses how the language features are added to the \CFA translator with respect to parsing, semantic, and type checking, and the corresponding high-performance runtime-library to implement the concurrency features.
     274The paper discusses how the language features are added to the \CFA translator with respect to parsing, semantics, and type checking, and the corresponding high-performance runtime-library to implement the concurrent features.
    277275
    278276
     
    282280Extended versions and explanation of the following code examples are available at the \CFA website~\cite{Cforall} or in Moss~\etal~\cite{Moss18}.
    283281
    284 \CFA is an extension of ISO-C, and hence, supports all C paradigms.
     282\CFA is a non-object-oriented extension of ISO-C, and hence, supports all C paradigms.
    285283%It is a non-object-oriented system-language, meaning most of the major abstractions have either no runtime overhead or can be opted out easily.
    286 Like C, the basics of \CFA revolve around structures and routines.
     284Like C, the building blocks of \CFA are structures and routines.
    287285Virtually all of the code generated by the \CFA translator respects C memory layouts and calling conventions.
    288 While \CFA is not an object-oriented language, lacking the concept of a receiver (\eg @this@) and nominal inheritance-relationships, C does have a notion of objects: ``region of data storage in the execution environment, the contents of which can represent values''~\cite[3.15]{C11}.
    289 While some \CFA features are common in object-oriented programming-languages, they are an independent capability allowing \CFA to adopt them while retaining a procedural paradigm.
     286While \CFA is not object oriented, lacking the concept of a receiver (\eg @this@) and nominal inheritance-relationships, C does have a notion of objects: ``region of data storage in the execution environment, the contents of which can represent values''~\cite[3.15]{C11}.
     287While some \CFA features are common in object-oriented programming, they are independent capabilities allowing \CFA to adopt them while maintaining a procedural paradigm.
    290288
    291289
     
    296294int x = 1, y = 2, z = 3;
    297295int * p1 = &x, ** p2 = &p1,  *** p3 = &p2,      $\C{// pointers to x}$
    298         `&` r1 = x,  `&&` r2 = r1,  `&&&` r3 = r2;      $\C{// references to x}$
     296    `&` r1 = x,   `&&` r2 = r1,   `&&&` r3 = r2;        $\C{// references to x}$
    299297int * p4 = &z, `&` r4 = z;
    300298
     
    411409\end{cquote}
    412410Overloading is important for \CFA concurrency since the runtime system relies on creating different types to represent concurrency objects.
    413 Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions to prevent name clashes.
    414 As seen in Section~\ref{basics}, routine @main@ is heavily overloaded.
    415 
    416 Variable overloading is useful in the parallel semantics of the @with@ statement for fields with the same name:
     411Therefore, overloading eliminates long prefixes and other naming conventions to prevent name clashes.
     412As seen in Section~\ref{s:Concurrency}, routine @main@ is heavily overloaded.
     413For example, variable overloading is useful in the parallel semantics of the @with@ statement for fields with the same name:
    417414\begin{cfa}
    418415struct S { int `i`; int j; double m; } s;
     
    428425}
    429426\end{cfa}
    430 For parallel semantics, both @s.i@ and @t.i@ are visible the same type, so only @i@ is ambiguous without qualification.
     427For parallel semantics, both @s.i@ and @t.i@ are visible with the same type, so only @i@ is ambiguous without qualification.
    431428
    432429
     
    439436\begin{tabular}{@{}ll@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}}
    440437\begin{cfa}
    441 int ++? (int op);
    442 int ?++ (int op);
    443 int `?+?` (int op1, int op2);
     438int ++?(int op);
     439int ?++(int op);
     440int `?+?`(int op1, int op2);
    444441int ?<=?(int op1, int op2);
    445442int ?=? (int & op1, int op2);
     
    470467
    471468
     469\subsection{Constructors / Destructors}
     470
     471Object lifetime is a challenge in non-managed programming languages.
     472\CFA responds with \CC-like constructors and destructors:
     473\begin{cfa}
     474struct VLA { int len, * data; };                        $\C{// variable length array of integers}$
     475void ?{}( VLA & vla ) with ( vla ) { len = 10;  data = alloc( len ); }  $\C{// default constructor}$
     476void ?{}( VLA & vla, int size, char fill ) with ( vla ) { len = size;  data = alloc( len, fill ); } // initialization
     477void ?{}( VLA & vla, VLA other ) { vla.len = other.len;  vla.data = other.data; } $\C{// copy, shallow}$
     478void ^?{}( VLA & vla ) with ( vla ) { free( data ); } $\C{// destructor}$
     479{
     480        VLA  x,            y = { 20, 0x01 },     z = y; $\C{// z points to y}$
     481        // $\LstCommentStyle{\color{red}\ \ \ x\{\};\ \ \ \ \ \ \ \ \ y\{ 20, 0x01 \};\ \ \ \ \ \ \ \ \ \ z\{ z, y \};\ \ \ \ \ \ \ implicit calls}$
     482        ^x{};                                                                   $\C{// deallocate x}$
     483        x{};                                                                    $\C{// reallocate x}$
     484        z{ 5, 0xff };                                                   $\C{// reallocate z, not pointing to y}$
     485        ^y{};                                                                   $\C{// deallocate y}$
     486        y{ x };                                                                 $\C{// reallocate y, points to x}$
     487        x{};                                                                    $\C{// reallocate x, not pointing to y}$
     488}       //  $\LstCommentStyle{\color{red}\^{}z\{\};\ \ \^{}y\{\};\ \ \^{}x\{\};\ \ \ implicit calls}$
     489\end{cfa}
     490Like \CC, construction is implicit on allocation (stack/heap) and destruction is implicit on deallocation.
     491The object and all their fields are constructed/destructed.
     492\CFA also provides @new@ and @delete@, which behave like @malloc@ and @free@, in addition to constructing and destructing objects:
     493\begin{cfa}
     494{
     495        ... struct S s = {10}; ...                              $\C{// allocation, call constructor}$
     496}                                                                                       $\C{// deallocation, call destructor}$
     497struct S * s = new();                                           $\C{// allocation, call constructor}$
     498...
     499delete( s );                                                            $\C{// deallocation, call destructor}$
     500\end{cfa}
     501\CFA concurrency uses object lifetime as a means of mutual exclusion and/or synchronization.
     502
     503
    472504\subsection{Parametric Polymorphism}
    473505\label{s:ParametricPolymorphism}
    474506
    475 The signature feature of \CFA is parametric-polymorphic routines~\cite{} with routines generalized using a @forall@ clause (giving the language its name), which allow separately compiled routines to support generic usage over multiple types.
     507The signature feature of \CFA is parametric-polymorphic routines~\cite{Cforall} with routines generalized using a @forall@ clause (giving the language its name), which allow separately compiled routines to support generic usage over multiple types.
    476508For example, the following sum routine works for any type that supports construction from 0 and addition:
    477509\begin{cfa}
     
    486518int i = sum( sa, 5 );                                           $\C{// use S's 0 construction and +}$
    487519\end{cfa}
     520The builtin type @zero_t@ (and @one_t@) overload constant 0 (and 1) for a new types, where both 0 and 1 have special meaning in C.
    488521
    489522\CFA provides \newterm{traits} to name a group of type assertions, where the trait name allows specifying the same set of assertions in multiple locations, preventing repetition mistakes at each routine declaration:
     
    501534
    502535Assertions can be @otype@ or @dtype@.
    503 @otype@ refers to a ``complete'' object, \ie an object has a size, default constructor, copy constructor, destructor and an assignment operator.
    504 @dtype@ only guarantees an object has a size and alignment.
    505 
    506 Using the return type for discrimination, it is possible to write a type-safe @alloc@ based on the C @malloc@:
     536@otype@ refers to a \emph{complete} object, \ie an object has a size, alignment, default constructor, copy constructor, destructor and an assignment operator.
     537@dtype@ refers to an \emph{incomplete} object, \ie, an object only has a size and alignment.
     538
     539Using the return type for overload discrimination, it is possible to write a type-safe @alloc@ based on the C @malloc@:
    507540\begin{cfa}
    508541forall( dtype T | sized(T) ) T * alloc( void ) { return (T *)malloc( sizeof(T) ); }
     
    514547
    515548
    516 \subsection{Constructors / Destructors}
    517 
    518 Object lifetime is a challenge in non-managed programming languages.
    519 \CFA responds with \CC-like constructors and destructors:
    520 \begin{cfa}
    521 struct VLA { int len, * data; };                        $\C{// variable length array of integers}$
    522 void ?{}( VLA & vla ) with ( vla ) { len = 10;  data = alloc( len ); }  $\C{// default constructor}$
    523 void ?{}( VLA & vla, int size, char fill ) with ( vla ) { len = size;  data = alloc( len, fill ); } // initialization
    524 void ?{}( VLA & vla, VLA other ) { vla.len = other.len;  vla.data = other.data; } $\C{// copy, shallow}$
    525 void ^?{}( VLA & vla ) with ( vla ) { free( data ); } $\C{// destructor}$
    526 {
    527         VLA  x,            y = { 20, 0x01 },     z = y; $\C{// z points to y}$
    528         //    x{};         y{ 20, 0x01 };          z{ z, y };
    529         ^x{};                                                                   $\C{// deallocate x}$
    530         x{};                                                                    $\C{// reallocate x}$
    531         z{ 5, 0xff };                                                   $\C{// reallocate z, not pointing to y}$
    532         ^y{};                                                                   $\C{// deallocate y}$
    533         y{ x };                                                                 $\C{// reallocate y, points to x}$
    534         x{};                                                                    $\C{// reallocate x, not pointing to y}$
    535         //  ^z{};  ^y{};  ^x{};
    536 }
    537 \end{cfa}
    538 Like \CC, construction is implicit on allocation (stack/heap) and destruction is implicit on deallocation.
    539 The object and all their fields are constructed/destructed.
    540 \CFA also provides @new@ and @delete@, which behave like @malloc@ and @free@, in addition to constructing and destructing objects:
    541 \begin{cfa}
    542 {       struct S s = {10};                                              $\C{// allocation, call constructor}$
    543         ...
    544 }                                                                                       $\C{// deallocation, call destructor}$
    545 struct S * s = new();                                           $\C{// allocation, call constructor}$
    546 ...
    547 delete( s );                                                            $\C{// deallocation, call destructor}$
    548 \end{cfa}
    549 \CFA concurrency uses object lifetime as a means of synchronization and/or mutual exclusion.
    550 
    551 
    552 \section{Concurrency Basics}\label{basics}
     549\section{Concurrency}
     550\label{s:Concurrency}
    553551
    554552At its core, concurrency is based on multiple call-stacks and scheduling threads executing on these stacks.
    555553Multiple call stacks (or contexts) and a single thread of execution, called \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
    556 In coroutining, the single thread is self-scheduling across the stacks, so execution is deterministic, \ie given fixed inputs, the execution path to the outputs is fixed and predictable.
     554In coroutining, the single thread is self-scheduling across the stacks, so execution is deterministic, \ie the execution path from input to output is fixed and predictable.
    557555A \newterm{stackless} coroutine executes on the caller's stack~\cite{Python} but this approach is restrictive, \eg preventing modularization and supporting only iterator/generator-style programming;
    558556a \newterm{stackfull} coroutine executes on its own stack, allowing full generality.
    559 Only stackfull coroutines are a stepping-stone to concurrency.
    560 
    561 The transition to concurrency, even for execution with a single thread and multiple stacks, occurs when coroutines also context switch to a scheduling oracle, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
     557Only stackfull coroutines are a stepping stone to concurrency.
     558
     559The transition to concurrency, even for execution with a single thread and multiple stacks, occurs when coroutines also context switch to a \newterm{scheduling oracle}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
    562560Therefore, a minimal concurrency system is possible using coroutines (see Section \ref{coroutine}) in conjunction with a scheduler to decide where to context switch next.
    563561The resulting execution system now follows a cooperative threading-model, called \newterm{non-preemptive scheduling}.
     
    591589\subsection{Coroutines: A Stepping Stone}\label{coroutine}
    592590
    593 While the focus of this discussion is concurrency and parallelism, it is important to address coroutines, which are a significant building block of a concurrency system.
     591While the focus of this discussion is concurrency and parallelism, it is important to address coroutines, which are a significant building block of a concurrency system (but not concurrent among themselves).
    594592Coroutines are generalized routines allowing execution to be temporarily suspend and later resumed.
    595593Hence, unlike a normal routine, a coroutine may not terminate when it returns to its caller, allowing it to be restarted with the values and execution location present at the point of suspension.
     
    598596Therefore, the core \CFA coroutine-API for has two fundamental features: independent call-stacks and @suspend@/@resume@ operations.
    599597
    600 For example, a problem made easier with coroutines is unbounded generators, \eg generating an infinite sequence of Fibonacci numbers, where Figure~\ref{f:C-fibonacci} shows conventional approaches for writing a Fibonacci generator in C.
     598For example, a problem made easier with coroutines is unbounded generators, \eg generating an infinite sequence of Fibonacci numbers
    601599\begin{displaymath}
    602600\mathsf{fib}(n) = \left \{
     
    608606\right.
    609607\end{displaymath}
    610 Figure~\ref{f:GlobalVariables} illustrates the following problems:
    611 unique unencapsulated global variables necessary to retain state between calls;
    612 only one Fibonacci generator;
    613 execution state must be explicitly retained via explicit state variables.
    614 Figure~\ref{f:ExternalState} addresses these issues:
    615 unencapsulated program global variables become encapsulated structure variables;
    616 unique global variables are replaced by multiple Fibonacci objects;
    617 explicit execution state is removed by precomputing the first two Fibonacci numbers and returning $\mathsf{fib}(n-2)$.
     608where Figure~\ref{f:C-fibonacci} shows conventional approaches for writing a Fibonacci generator in C.
     609Figure~\ref{f:GlobalVariables} illustrates the following problems: unique unencapsulated global variables necessary to retain state between calls, only one Fibonacci generator, and execution state must be explicitly retained via explicit state variables.
     610Figure~\ref{f:ExternalState} addresses these issues: unencapsulated program global variables become encapsulated structure variables, unique global variables are replaced by multiple Fibonacci objects, and explicit execution state is removed by precomputing the first two Fibonacci numbers and returning $\mathsf{fib}(n-2)$.
    618611
    619612\begin{figure}
     
    723716\subfloat[1 State, internal variables]{\label{f:Coroutine1State}\usebox\myboxB}
    724717\caption{\CFA Coroutine Fibonacci Implementations}
    725 \label{f:fibonacci-cfa}
     718\label{f:cfa-fibonacci}
    726719\end{figure}
    727720
    728721Using a coroutine, it is possible to express the Fibonacci formula directly without any of the C problems.
    729 Figure~\ref{f:Coroutine3States} creates a @coroutine@ type:
    730 \begin{cfa}
    731 `coroutine` Fib { int fn; };
    732 \end{cfa}
    733 which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface routines @next@.
     722Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface routines, \eg @next@.
    734723Like the structure in Figure~\ref{f:ExternalState}, the coroutine type allows multiple instances, where instances of this type are passed to the (overloaded) coroutine main.
    735 The coroutine main's stack holds the state for the next generation, @f1@ and @f2@, and the code has the three suspend points, representing the three states in the Fibonacci formula, to context switch back to the caller's resume.
     724The coroutine main's stack holds the state for the next generation, @f1@ and @f2@, and the code has the three suspend points, representing the three states in the Fibonacci formula, to context switch back to the caller's @resume@.
    736725The interface routine @next@, takes a Fibonacci instance and context switches to it using @resume@;
    737726on restart, the Fibonacci field, @fn@, contains the next value in the sequence, which is returned.
    738 The first @resume@ is special because it cocalls the coroutine at its coroutine main and allocates the stack;
     727The first @resume@ is special because it allocates the coroutine stack and cocalls its coroutine main on that stack;
    739728when the coroutine main returns, its stack is deallocated.
    740729Hence, @Fib@ is an object at creation, transitions to a coroutine on its first resume, and transitions back to an object when the coroutine main finishes.
     
    758747\end{quote}
    759748The example takes advantage of resuming a coroutine in the constructor to prime the loops so the first character sent for formatting appears inside the nested loops.
    760 The destruction provides a newline if formatted text ends with a full line.
     749The destruction provides a newline, if formatted text ends with a full line.
    761750Figure~\ref{f:CFmt} shows the C equivalent formatter, where the loops of the coroutine are flatten (linearized) and rechecked on each call because execution location is not retained between calls.
     751(Linearized code is the bane of device drivers.)
    762752
    763753\begin{figure}
     
    843833\end{figure}
    844834
    845 The previous examples are \newterm{asymmetric (semi) coroutine}s because one coroutine always calls a resuming routine for another coroutine, and the resumed coroutine always suspends back to its last resumer, similar to call/return for normal routines
    846 However, there is no stack growth because @resume@/@suspend@ context switch to existing stack-frames rather than create new ones.
    847 \newterm{Symmetric (full) coroutine}s have a coroutine call a resuming routine for another coroutine, which eventually forms a resuming-call cycle.
     835The previous examples are \newterm{asymmetric (semi) coroutine}s because one coroutine always calls a resuming routine for another coroutine, and the resumed coroutine always suspends back to its last resumer, similar to call/return for normal routines.
     836However, @resume@ and @suspend@ context switch among existing stack-frames, rather than create new ones so there is no stack growth.
     837\newterm{Symmetric (full) coroutine}s have a coroutine call to a resuming routine for another coroutine, and its coroutine main has a call to another resuming routine, which eventually forms a resuming-call cycle.
    848838(The trivial cycle is a coroutine resuming itself.)
    849839This control flow is similar to recursion for normal routines, but again there is no stack growth from the context switch.
     
    927917Figure~\ref{f:ProdCons} shows a producer/consumer symmetric-coroutine performing bi-directional communication.
    928918Since the solution involves a full-coroutining cycle, the program main creates one coroutine in isolation, passes this coroutine to its partner, and closes the cycle at the call to @start@.
    929 The @start@ routine communicates both the number of elements to be produced and the consumer into the producer's coroutine structure.
     919The @start@ routine communicates both the number of elements to be produced and the consumer into the producer's coroutine-structure.
    930920Then the @resume@ to @prod@ creates @prod@'s stack with a frame for @prod@'s coroutine main at the top, and context switches to it.
    931921@prod@'s coroutine main starts, creates local variables that are retained between coroutine activations, and executes $N$ iterations, each generating two random values, calling the consumer to deliver the values, and printing the status returned from the consumer.
     
    933923The producer call to @delivery@ transfers values into the consumer's communication variables, resumes the consumer, and returns the consumer status.
    934924For the first resume, @cons@'s stack is initialized, creating local variables retained between subsequent activations of the coroutine.
    935 The consumer iterates until the @done@ flag is set, prints, increments status, and calls back to the producer via @payment@, and on return from @payment@, prints the receipt from the producer and increments @money@ (inflation).
    936 The call from the consumer to the @payment@ introduces the cycle between producer and consumer.
     925The consumer iterates until the @done@ flag is set, prints the values delivered by the producer, increments status, and calls back to the producer via @payment@, and on return from @payment@, prints the receipt from the producer and increments @money@ (inflation).
     926The call from the consumer to @payment@ introduces the cycle between producer and consumer.
    937927When @payment@ is called, the consumer copies values into the producer's communication variable and a resume is executed.
    938928The context switch restarts the producer at the point where it was last context switched, so it continues in @delivery@ after the resume.
     
    959949
    960950Object-oriented inheritance provides extra fields and code in a restricted context, but it requires programmers to explicitly perform the inheritance:
    961 \begin{cfa}
    962 struct mycoroutine $\textbf{\textsf{inherits}}$ baseCoroutine { ... }
     951\begin{cfa}[morekeywords={class,inherits}]
     952class mycoroutine inherits baseCoroutine { ... }
    963953\end{cfa}
    964954and the programming language (and possibly its tool set, \eg debugger) may need to understand @baseCoroutine@ because of the stack.
    965 Furthermore, the execution of constructs/destructors is in the wrong order for certain operations, \eg for threads;
    966 \eg, if the thread is implicitly started, it must start \emph{after} all constructors, because the thread relies on a completely initialized object, but the inherited constructor runs \emph{before} the derived.
     955Furthermore, the execution of constructs/destructors is in the wrong order for certain operations.
     956For example, for threads if the thread is implicitly started, it must start \emph{after} all constructors, because the thread relies on a completely initialized object, but the inherited constructor runs \emph{before} the derived.
    967957
    968958An alternatively is composition:
     
    984974symmetric_coroutine<>::yield_type
    985975\end{cfa}
    986 Similarly, the canonical threading paradigm is often based on routine pointers, \eg @pthread@~\cite{pthreads}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}.
     976Similarly, the canonical threading paradigm is often based on routine pointers, \eg @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}.
    987977However, the generic thread-handle (identifier) is limited (few operations), unless it is wrapped in a custom type.
    988978\begin{cfa}
     
    1001991Note, the type @coroutine_t@ must be an abstract handle to the coroutine, because the coroutine descriptor and its stack are non-copyable.
    1002992Copying the coroutine descriptor results in copies being out of date with the current state of the stack.
    1003 Correspondingly, copying the stack results is copies being out of date with coroutine descriptor, and pointers in the stack being out of date to data on the stack.
     993Correspondingly, copying the stack results is copies being out of date with the coroutine descriptor, and pointers in the stack being out of date to data on the stack.
    1004994(There is no mechanism in C to find all stack-specific pointers and update them as part of a copy.)
    1005995
     
    10151005Furthermore, implementing coroutines without language supports also displays the power of a programming language.
    10161006While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed without using the language support.
    1017 The reserved keyword eases use for the common cases.
     1007The reserved keyword simply eases use for the common cases.
    10181008
    10191009Part of the mechanism to generalize coroutines is using a \CFA trait, which defines a coroutine as anything satisfying the trait @is_coroutine@, and this trait is used to restrict coroutine-manipulation routines:
     
    10301020The @main@ routine has no return value or additional parameters because the coroutine type allows an arbitrary number of interface routines with corresponding arbitrary typed input/output values versus fixed ones.
    10311021The generic routines @suspend@ and @resume@ can be redefined, but any object passed to them is a coroutine since it must satisfy the @is_coroutine@ trait to compile.
    1032 The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@ routine, and possibly redefining @suspend@ and @resume@.
     1022The advantage of this approach is that users can easily create different types of coroutines, \eg changing the memory layout of a coroutine is trivial when implementing the @get_coroutine@ routine, and possibly redefining @suspend@ and @resume@.
    10331023The \CFA keyword @coroutine@ implicitly implements the getter and forward declarations required for implementing the coroutine main:
    10341024\begin{cquote}
     
    10941084\end{tabular}
    10951085\end{cquote}
    1096 (The qualifier @mutex@ for the destructor parameter is discussed in Section~\ref{s:Monitors}.)
     1086(The qualifier @mutex@ for the destructor parameter is discussed in Section~\ref{s:Monitor}.)
    10971087Like a coroutine, the statically-typed @main@ routine is the starting point (first stack frame) of a user thread.
    10981088The difference is that a coroutine borrows a thread from its caller, so the first thread resuming a coroutine creates an instance of @main@;
    10991089whereas, a user thread receives its own thread from the runtime system, which starts in @main@ as some point after the thread constructor is run.\footnote{
    1100 The \lstinline@main@ routine is already a special routine in C (where the program begins), so it is a natural extension of the semantics to use overloading to declare mains for different coroutines/threads (the normal main being the main of the initial thread).}
     1090The \lstinline@main@ routine is already a special routine in C, \ie where the program's initial thread begins, so it is a natural extension of this semantics to use overloading to declare \lstinline@main@s for user coroutines and threads.}
    11011091No return value or additional parameters are necessary for this routine because the task type allows an arbitrary number of interface routines with corresponding arbitrary typed input/output values.
    11021092
     
    11751165The program uses heap-based threads because each thread needs different constructor values.
    11761166(Python provides a simple iteration mechanism to initialize array elements to different values allowing stack allocation.)
    1177 The allocation/deallocation pattern appears unusual because allocated objects are immediately deleted without any intervening code.
     1167The allocation/deallocation pattern appears unusual because allocated objects are immediately deallocated without any intervening code.
    11781168However, for threads, the deletion provides implicit synchronization, which is the intervening code.
    11791169While the subtotals are added in linear order rather than completion order, which slight inhibits concurrency, the computation is restricted by the critical-path thread (\ie the thread that takes the longest), and so any inhibited concurrency is very small as totalling the subtotals is trivial.
     
    11891179void main( Adder & adder ) with( adder ) {
    11901180    subtotal = 0;
    1191     for ( int c = 0; c < cols; c += 1 ) {
    1192                 subtotal += row[c];
    1193     }
     1181    for ( int c = 0; c < cols; c += 1 ) { subtotal += row[c]; }
    11941182}
    11951183int main() {
     
    12161204
    12171205Uncontrolled non-deterministic execution is meaningless.
    1218 To reestablish meaningful execution requires mechanisms to reintroduce determinism (\ie restrict non-determinism), called mutual exclusion and synchronization, where mutual exclusion is an access-control mechanism on data shared by threads, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}.
     1206To reestablish meaningful execution requires mechanisms to reintroduce determinism, \ie restrict non-determinism, called mutual exclusion and synchronization, where mutual exclusion is an access-control mechanism on data shared by threads, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}.
    12191207Since many deterministic challenges appear with the use of mutable shared state, some languages/libraries disallow it, \eg Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka~\cite{Akka} (Scala).
    1220 In these paradigms, interaction among concurrent objects is performed by stateless message-passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (\eg channels~\cite{CSP,Go}).
    1221 However, in call/return-based languages, these approaches force a clear distinction (\ie introduce a new programming paradigm) between regular and concurrent computation (\ie routine call versus message passing).
     1208In these paradigms, interaction among concurrent objects is performed by stateless message-passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts, \eg channels~\cite{CSP,Go}.
     1209However, in call/return-based languages, these approaches force a clear distinction, \ie introduce a new programming paradigm between regular and concurrent computation, \eg routine call versus message passing.
    12221210Hence, a programmer must learn and manipulate two sets of design patterns.
    12231211While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
    12241212In contrast, approaches based on statefull models more closely resemble the standard call/return programming-model, resulting in a single programming paradigm.
    12251213
    1226 At the lowest level, concurrent control is implemented by atomic operations, upon which different kinds of locks mechanism are constructed, \eg semaphores~\cite{Dijkstra68b}, barriers, and path expressions~\cite{Campbell74}.
     1214At the lowest level, concurrent control is implemented by atomic operations, upon which different kinds of locking mechanisms are constructed, \eg semaphores~\cite{Dijkstra68b}, barriers, and path expressions~\cite{Campbell74}.
    12271215However, for productivity it is always desirable to use the highest-level construct that provides the necessary efficiency~\cite{Hochstein05}.
    12281216A newer approach for restricting non-determinism is transactional memory~\cite{Herlihy93}.
     
    12441232However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use.
    12451233Methods range from low-level locks, which are fast and flexible but require significant attention for correctness, to higher-level concurrency techniques, which sacrifice some performance to improve ease of use.
    1246 Ease of use comes by either guaranteeing some problems cannot occur (\eg deadlock free), or by offering a more explicit coupling between shared data and critical section.
    1247 For example, the \CC @std::atomic<T>@ offers an easy way to express mutual-exclusion on a restricted set of operations (\eg reading/writing) for numerical types.
     1234Ease of use comes by either guaranteeing some problems cannot occur, \eg deadlock free, or by offering a more explicit coupling between shared data and critical section.
     1235For example, the \CC @std::atomic<T>@ offers an easy way to express mutual-exclusion on a restricted set of operations, \eg reading/writing, for numerical types.
    12481236However, a significant challenge with locks is composability because it takes careful organization for multiple locks to be used while preventing deadlock.
    12491237Easing composability is another feature higher-level mutual-exclusion mechanisms can offer.
     
    12541242Synchronization enforces relative ordering of execution, and synchronization tools provide numerous mechanisms to establish these timing relationships.
    12551243Low-level synchronization primitives offer good performance and flexibility at the cost of ease of use;
    1256 higher-level mechanisms often simplify usage by adding better coupling between synchronization and data (\eg message passing), or offering a simpler solution to otherwise involved challenges, \eg barrier lock.
     1244higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg message passing, or offering a simpler solution to otherwise involved challenges, \eg barrier lock.
    12571245Often synchronization is used to order access to a critical section, \eg ensuring a reader thread is the next kind of thread to enter a critical section.
    12581246If a writer thread is scheduled for next access, but another reader thread acquires the critical section first, that reader has \newterm{barged}.
     
    12651253
    12661254
    1267 \section{Monitors}
    1268 \label{s:Monitors}
     1255\section{Monitor}
     1256\label{s:Monitor}
    12691257
    12701258A \textbf{monitor} is a set of routines that ensure mutual exclusion when accessing shared state.
     
    12721260The strong association with the call/return paradigm eases programmability, readability and maintainability, at a slight cost in flexibility and efficiency.
    12731261
    1274 Note, like coroutines/threads, both locks and monitors require an abstract handle to reference them, because at their core, both mechanisms are manipulating non-copyable shared state.
     1262Note, like coroutines/threads, both locks and monitors require an abstract handle to reference them, because at their core, both mechanisms are manipulating non-copyable shared-state.
    12751263Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data.
    12761264Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity.
     
    13261314\end{cfa}
    13271315
    1328 Mandatory monitor qualifiers have the benefit of being self-documented, but requiring both @mutex@ and \lstinline[morekeywords=nomutex]@nomutex@ for all monitor parameter is redundant.
     1316Mandatory monitor qualifiers have the benefit of being self-documenting, but requiring both @mutex@ and \lstinline[morekeywords=nomutex]@nomutex@ for all monitor parameter is redundant.
    13291317Instead, one of qualifier semantics can be the default, and the other required.
    1330 For example, assume the safe @mutex@ option for a monitor parameter because assuming \lstinline[morekeywords=nomutex]@nomutex@ may cause subtle errors.
    1331 On the other hand, assuming \lstinline[morekeywords=nomutex]@nomutex@ is the \emph{normal} parameter behaviour, stating explicitly ``this parameter is not special''.
     1318For example, assume the safe @mutex@ qualifier for all monitor parameters because assuming \lstinline[morekeywords=nomutex]@nomutex@ may cause subtle errors.
     1319On the other hand, assuming \lstinline[morekeywords=nomutex]@nomutex@ is the \emph{normal} parameter behaviour, stating explicitly \emph{this parameter is not special}.
    13321320Providing a default qualifier implies knowing whether a parameter is a monitor.
    13331321Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred.
     
    13751363\end{cfa}
    13761364(While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.)
    1377 In practice, writing multi-locking routines that do not deadlocks is tricky.
     1365In practice, writing multi-locking routines that do not deadlock is tricky.
    13781366Having language support for such a feature is therefore a significant asset for \CFA.
    13791367
    13801368The capability to acquire multiple locks before entering a critical section is called \newterm{bulk acquire}.
    1381 In previous example, \CFA guarantees the order of acquisition is consistent across calls to different routines using the same monitors as arguments.
     1369In the previous example, \CFA guarantees the order of acquisition is consistent across calls to different routines using the same monitors as arguments.
    13821370This consistent ordering means acquiring multiple monitors is safe from deadlock.
    13831371However, users can force the acquiring order.
     
    13951383In the calls to @bar@ and @baz@, the monitors are acquired in opposite order.
    13961384
    1397 However, such use leads to lock acquiring order problems resulting in deadlock~\cite{Lister77}, where detecting it requires dynamically tracking of monitor calls, and dealing with it requires implement rollback semantics~\cite{Dice10}.
     1385However, such use leads to lock acquiring order problems resulting in deadlock~\cite{Lister77}, where detecting it requires dynamic tracking of monitor calls, and dealing with it requires rollback semantics~\cite{Dice10}.
    13981386In \CFA, safety is guaranteed by using bulk acquire of all monitors to shared objects, whereas other monitor systems provide no aid.
    13991387While \CFA provides only a partial solution, the \CFA partial solution handles many useful cases.
     
    14061394}
    14071395\end{cfa}
    1408 This example shows a trivial solution to the bank-account transfer problem~\cite{BankTransfer}.
     1396This example shows a trivial solution to the bank-account transfer problem.
    14091397Without multi- and bulk acquire, the solution to this problem requires careful engineering.
    14101398
     
    14151403Like Java, \CFA offers an alternative @mutex@ statement to reduce refactoring and naming.
    14161404\begin{cquote}
    1417 \begin{tabular}{@{}c|@{\hspace{\parindentlnth}}c@{}}
    1418 routine call & @mutex@ statement \\
     1405\begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
    14191406\begin{cfa}
    14201407monitor M {};
     
    14361423
    14371424\end{cfa}
     1425\\
     1426\multicolumn{1}{c}{\textbf{routine call}} & \multicolumn{1}{c}{\lstinline@mutex@ \textbf{statement}}
    14381427\end{tabular}
    14391428\end{cquote}
    14401429
    14411430
    1442 \section{Internal Scheduling}
    1443 \label{s:InternalScheduling}
     1431\section{Scheduling}
     1432\label{s:Scheduling}
    14441433
    14451434While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed.
     
    14501439\newterm{Internal scheduling} is characterized by each thread entering the monitor and making an individual decision about proceeding or blocking, while \newterm{external scheduling} is characterized by an entering thread making a decision about proceeding for itself and on behalf of other threads attempting entry.
    14511440
    1452 Figure~\ref{f:BBInt} shows a \CFA bounded-buffer with internal scheduling, where producers/consumers enter the monitor, see the buffer is full/empty, and block on an appropriate condition lock, @full@/@empty@.
     1441Figure~\ref{f:BBInt} shows a \CFA generic bounded-buffer with internal scheduling, where producers/consumers enter the monitor, see the buffer is full/empty, and block on an appropriate condition lock, @full@/@empty@.
    14531442The @wait@ routine atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the routine's parameter list.
    14541443The appropriate condition lock is signalled to unblock an opposite kind of thread after an element is inserted/removed from the buffer.
    14551444Signalling is unconditional, because signalling an empty condition lock does nothing.
     1445
    14561446Signalling semantics cannot have the signaller and signalled thread in the monitor simultaneously, which means:
    14571447\begin{enumerate}
     
    14631453The signalling thread blocks but is marked for urgrent unblocking at the next scheduling point and the signalled thread continues.
    14641454\end{enumerate}
    1465 The first approach is too restrictive, as it precludes solving a reasonable class of problems (\eg dating service).
     1455The first approach is too restrictive, as it precludes solving a reasonable class of problems, \eg dating service (see Figure~\ref{f:DatingService}).
    14661456\CFA supports the next two semantics as both are useful.
    14671457Finally, while it is common to store a @condition@ as a field of the monitor, in \CFA, a @condition@ variable can be created/stored independently.
    1468 Furthermore, a condition variable is tied to a \emph{group} of monitors on first use (called \newterm{branding}), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors.
     1458Furthermore, a condition variable is tied to a \emph{group} of monitors on first use, called \newterm{branding}, which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors.
    14691459
    14701460\begin{figure}
     
    15351525\end{figure}
    15361526
    1537 Figure~\ref{f:BBExt} shows a \CFA bounded-buffer with external scheduling, where producers/consumers detecting a full/empty buffer block and prevent more producers/consumers from entering the monitor until the buffer has a free/empty slot.
     1527Figure~\ref{f:BBExt} shows a \CFA generic bounded-buffer with external scheduling, where producers/consumers detecting a full/empty buffer block and prevent more producers/consumers from entering the monitor until the buffer has a free/empty slot.
    15381528External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the routine calls that can next acquire mutual exclusion.
    15391529If the buffer is full, only calls to @remove@ can acquire the buffer, and if the buffer is empty, only calls to @insert@ can acquire the buffer.
    1540 Threads making calls to routines that are currently excluded block outside (external) of the monitor on a calling queue, versus blocking on condition queues inside (internal) of the monitor.
     1530Threads making calls to routines that are currently excluded, block outside (external) of the monitor on a calling queue, versus blocking on condition queues inside (internal) of the monitor.
     1531% External scheduling is more constrained and explicit, which helps programmers reduce the non-deterministic nature of concurrency.
     1532External scheduling allows users to wait for events from other threads without concern of unrelated events occurring.
     1533The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go channels.
     1534While both mechanisms have strengths and weaknesses, this project uses a control-flow mechanism to stay consistent with other language semantics.
     1535Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor routines (see Section~\ref{s:Multi-MonitorScheduling}).
    15411536
    15421537For internal scheduling, non-blocking signalling (as in the producer/consumer example) is used when the signaller is providing the cooperation for a waiting thread;
    15431538the signaller enters the monitor and changes state, detects a waiting threads that can use the state, performs a non-blocking signal on the condition queue for the waiting thread, and exits the monitor to run concurrently.
    1544 The waiter unblocks next, takes the state, and exits the monitor.
     1539The waiter unblocks next from the urgent queue, uses/takes the state, and exits the monitor.
    15451540Blocking signalling is the reverse, where the waiter is providing the cooperation for the signalling thread;
    15461541the signaller enters the monitor, detects a waiting thread providing the necessary state, performs a blocking signal to place it on the urgent queue and unblock the waiter.
    1547 The waiter changes state and exits the monitor, and the signaller unblocks next from the urgent queue to take the state.
    1548 
    1549 Figure~\ref{f:DatingService} shows a dating service demonstrating the two forms of signalling: non-blocking and blocking.
     1542The waiter changes state and exits the monitor, and the signaller unblocks next from the urgent queue to use/take the state.
     1543
     1544Figure~\ref{f:DatingService} shows a dating service demonstrating non-blocking and blocking signalling.
    15501545The dating service matches girl and boy threads with matching compatibility codes so they can exchange phone numbers.
    15511546A thread blocks until an appropriate partner arrives.
    1552 The complexity is exchanging phone number in the monitor,
    1553 While the non-barging monitor prevents a caller from stealing a phone number, the monitor mutual-exclusion property
    1554 
    1555 The dating service is an example of a monitor that cannot be written using external scheduling because:
    1556 
    1557 The example in table \ref{tbl:datingservice} highlights the difference in behaviour.
    1558 As mentioned, @signal@ only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed.
    1559 To avoid this explicit synchronization, the @condition@ type offers the @signal_block@ routine, which handles the two-way handshake as shown in the example.
    1560 This feature removes the need for a second condition variables and simplifies programming.
    1561 Like every other monitor semantic, @signal_block@ uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to @signal_block@, meaning no other thread can acquire the monitor either before or after the call.
     1547The complexity is exchanging phone numbers in the monitor because of the mutual-exclusion property.
     1548For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the oppose number, post its phone number, and unblock the partner.
     1549For signal-block scheduling, the implicit urgent-queue replaces the explict @exchange@-condition and @signal_block@ puts the finding thread on the urgent condition and unblocks the matcher.
     1550
     1551The dating service is an example of a monitor that cannot be written using external scheduling because it requires knowledge of calling parameters to make scheduling decisions, and parameters of waiting threads are unavailable;
     1552as well, an arriving thread may not find a partner and must wait, which requires a condition variable, and condition variables imply internal scheduling.
    15621553
    15631554\begin{figure}
     
    15761567                wait( Girls[ccode] );
    15771568                GirlPhNo = phNo;
    1578                 exchange.signal();
     1569                `exchange.signal();`
    15791570        } else {
    15801571                GirlPhNo = phNo;
    1581                 signal( Boys[ccode] );
    1582                 exchange.wait();
     1572                `signal( Boys[ccode] );`
     1573                `exchange.wait();`
    15831574        } // if
    15841575        return BoyPhNo;
     
    16061597        } else {
    16071598                GirlPhNo = phNo; // make phone number available
    1608                 signal_block( Boys[ccode] ); // restart boy
     1599                `signal_block( Boys[ccode] );` // restart boy
    16091600
    16101601        } // if
     
    16551646}
    16561647\end{cfa}
    1657 must have acquired monitor locks that are greater than or equal to the number of locks for the waiting thread signalled from the front of the condition queue.
    1658 In general, the signaller does not know the order of waiting threads, so in general, it must acquire the maximum number of mutex locks for the worst-case waiting thread.
     1648must have acquired monitor locks that are greater than or equal to the number of locks for the waiting thread signalled from the condition queue.
    16591649
    16601650Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex types in the parameter list, \ie @waitfor( rtn, m1, m2 )@.
    16611651To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn, m1 )@.
    1662 Waitfor statically verifies the released monitors are the same as the acquired mutex-parameters of the given routine or routine pointer.
     1652@waitfor@ statically verifies the released monitors are the same as the acquired mutex-parameters of the given routine or routine pointer.
    16631653To statically verify the released monitors match with the accepted routine's mutex parameters, the routine (pointer) prototype must be accessible.
     1654
     1655When an overloaded routine appears in an @waitfor@ statement, calls to any routine with that name are accepted.
     1656The rationale is that members with the same name should perform a similar function, and therefore, all should be eligible to accept a call.
     1657As always, overloaded routines can be disambiguated using a cast:
     1658\begin{cfa}
     1659void rtn( M & mutex m );
     1660`int` rtn( M & mutex m );
     1661waitfor( (`int` (*)( M & mutex ))rtn, m1, m2 );
     1662\end{cfa}
    16641663
    16651664Given the ability to release a subset of acquired monitors can result in a \newterm{nested monitor}~\cite{Lister77} deadlock.
     
    16671666void foo( M & mutex m1, M & mutex m2 ) {
    16681667        ... wait( `e, m1` ); ...                                $\C{// release m1, keeping m2 acquired )}$
    1669 void baz( M & mutex m1, M & mutex m2 ) {        $\C{// must acquire m1 and m2 )}$
     1668void bar( M & mutex m1, M & mutex m2 ) {        $\C{// must acquire m1 and m2 )}$
    16701669        ... signal( `e` ); ...
    16711670\end{cfa}
    1672 The @wait@ only releases @m1@ so the signalling thread cannot acquire both @m1@ and @m2@ to  enter @baz@ to get to the @signal@.
     1671The @wait@ only releases @m1@ so the signalling thread cannot acquire both @m1@ and @m2@ to  enter @bar@ to get to the @signal@.
    16731672While deadlock issues can occur with multiple/nesting acquisition, this issue results from the fact that locks, and by extension monitors, are not perfectly composable.
    16741673
     
    16821681For example, there are no loops in either bounded buffer solution in Figure~\ref{f:GenericBoundedBuffer}.
    16831682Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
     1683Furthermore, \CFA concurrency has no superious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implict form of barging.
    16841684
    16851685
     
    17551755However, Figure~\ref{f:OtherWaitingThread} shows this solution is complex depending on other waiters, resulting is choices when the signaller finishes the inner mutex-statement.
    17561756The singaller can retain @m2@ until completion of the outer mutex statement and pass the locks to waiter W1, or it can pass @m2@ to waiter W2 after completing the inner mutex-statement, while continuing to hold @m1@.
    1757 In the latter case, waiter W2 must eventually pass @m2@ to waiter W1, which is complex because W2 may have waited before W1 so it is unaware of W1.
     1757In the latter case, waiter W2 must eventually pass @m2@ to waiter W1, which is complex because W1 may have waited before W2, so W2 is unaware of it.
    17581758Furthermore, there is an execution sequence where the signaller always finds waiter W2, and hence, waiter W1 starves.
    17591759
    17601760While a number of approaches were examined~\cite[\S~4.3]{Delisle18}, the solution chosen for \CFA is a novel techique called \newterm{partial signalling}.
    1761 Signalled threads are moved to an urgent queue and the waiter at the front defines the set of monitors necessary for it to unblock.
     1761Signalled threads are moved to the urgent queue and the waiter at the front defines the set of monitors necessary for it to unblock.
    17621762Partial signalling transfers ownership of monitors to the front waiter.
    17631763When the signaller thread exits or waits in the monitor the front waiter is unblocked if all its monitors are released.
    17641764This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met.
    17651765
    1766 \begin{comment}
    1767 Figure~\ref{f:dependency} shows a slightly different example where a third thread is waiting on monitor @A@, using a different condition variable.
    1768 Because the third thread is signalled when secretly holding @B@, the goal  becomes unreachable.
    1769 Depending on the order of signals (listing \ref{f:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen:
    1770 
    1771 \paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor @A@ needs to be passed to thread $\beta$ when thread $\alpha$ is done with it.
    1772 \paragraph{Case 2: thread $\beta$ goes first.} In this case, the problem is that monitor @B@ needs to be retained and passed to thread $\alpha$ along with monitor @A@, which can be done directly or possibly using thread $\beta$ as an intermediate.
    1773 \\
    1774 
    1775 Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order.
    1776 However, regardless of the answer, users can move line \ref{line:signal-a} before line \ref{line:signal-ab} and get the reverse effect for listing \ref{f:dependency}.
    1777 
    1778 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to release a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
    1779 
    1780 
    1781 \subsubsection{Dependency graphs}
    1782 
    1783 \begin{figure}
    1784 \begin{multicols}{3}
    1785 Thread $\alpha$
    1786 \begin{cfa}[numbers=left, firstnumber=1]
    1787 acquire A
    1788         acquire A & B
    1789                 wait A & B
    1790         release A & B
    1791 release A
    1792 \end{cfa}
    1793 \columnbreak
    1794 Thread $\gamma$
    1795 \begin{cfa}[numbers=left, firstnumber=6, escapechar=|]
    1796 acquire A
    1797         acquire A & B
    1798                 |\label{line:signal-ab}|signal A & B
    1799         |\label{line:release-ab}|release A & B
    1800         |\label{line:signal-a}|signal A
    1801 |\label{line:release-a}|release A
    1802 \end{cfa}
    1803 \columnbreak
    1804 Thread $\beta$
    1805 \begin{cfa}[numbers=left, firstnumber=12, escapechar=|]
    1806 acquire A
    1807         wait A
    1808 |\label{line:release-aa}|release A
    1809 \end{cfa}
    1810 \end{multicols}
    1811 \begin{cfa}[caption={Pseudo-code for the three thread example.},label={f:dependency}]
    1812 \end{cfa}
    1813 \begin{center}
    1814 \input{dependency}
    1815 \end{center}
    1816 \caption{Dependency graph of the statements in listing \ref{f:dependency}}
    1817 \label{fig:dependency}
    1818 \end{figure}
    1819 
    1820 In listing \ref{f:int-bulk-cfa}, there is a solution that satisfies both barging prevention and mutual exclusion.
    1821 If ownership of both monitors is transferred to the waiter when the signaller releases @A & B@ and then the waiter transfers back ownership of @A@ back to the signaller when it releases it, then the problem is solved (@B@ is no longer in use at this point).
    1822 Dynamically finding the correct order is therefore the second possible solution.
    1823 The problem is effectively resolving a dependency graph of ownership requirements.
    1824 Here even the simplest of code snippets requires two transfers and has a super-linear complexity.
    1825 This complexity can be seen in listing \ref{f:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions.
    1826 Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
    1827 \begin{figure}
    1828 \begin{multicols}{2}
    1829 \begin{cfa}
    1830 acquire A
    1831         acquire B
    1832                 acquire C
    1833                         wait A & B & C
    1834                 release C
    1835         release B
    1836 release A
    1837 \end{cfa}
    1838 
    1839 \columnbreak
    1840 
    1841 \begin{cfa}
    1842 acquire A
    1843         acquire B
    1844                 acquire C
    1845                         signal A & B & C
    1846                 release C
    1847         release B
    1848 release A
    1849 \end{cfa}
    1850 \end{multicols}
    1851 \begin{cfa}[caption={Extension to three monitors of listing \ref{f:int-bulk-cfa}},label={f:explosion}]
    1852 \end{cfa}
    1853 \end{figure}
    1854 
    1855 Given the three threads example in listing \ref{f:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (\eg $\alpha1$ must happen before $\alpha2$).
    1856 The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependencies unfold.
    1857 Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
    1858 
    1859 \subsubsection{Partial Signalling} \label{partial-sig}
    1860 \end{comment}
    1861 
    1862 
    1863 \section{External scheduling} \label{extsched}
    1864 
    1865 An alternative to internal scheduling is external scheduling (see Table~\ref{tbl:sched}).
    1866 
    1867 \begin{comment}
    1868 \begin{table}
    1869 \begin{tabular}{|c|c|c|}
    1870 Internal Scheduling & External Scheduling & Go\\
    1871 \hline
    1872 \begin{uC++}[tabsize=3]
    1873 _Monitor Semaphore {
    1874         condition c;
    1875         bool inUse;
    1876 public:
    1877         void P() {
    1878                 if(inUse)
    1879                         wait(c);
    1880                 inUse = true;
    1881         }
    1882         void V() {
    1883                 inUse = false;
    1884                 signal(c);
    1885         }
    1886 }
    1887 \end{uC++}&\begin{uC++}[tabsize=3]
    1888 _Monitor Semaphore {
    1889 
    1890         bool inUse;
    1891 public:
    1892         void P() {
    1893                 if(inUse)
    1894                         _Accept(V);
    1895                 inUse = true;
    1896         }
    1897         void V() {
    1898                 inUse = false;
    1899 
    1900         }
    1901 }
    1902 \end{uC++}&\begin{Go}[tabsize=3]
    1903 type MySem struct {
    1904         inUse bool
    1905         c     chan bool
    1906 }
    1907 
    1908 // acquire
    1909 func (s MySem) P() {
    1910         if s.inUse {
    1911                 select {
    1912                 case <-s.c:
    1913                 }
    1914         }
    1915         s.inUse = true
    1916 }
    1917 
    1918 // release
    1919 func (s MySem) V() {
    1920         s.inUse = false
    1921 
    1922         // This actually deadlocks
    1923         // when single thread
    1924         s.c <- false
    1925 }
    1926 \end{Go}
    1927 \end{tabular}
    1928 \caption{Different forms of scheduling.}
    1929 \label{tbl:sched}
    1930 \end{table}
    1931 \end{comment}
    1932 
    1933 This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency.
    1934 Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring.
    1935 External scheduling can generally be done either in terms of control flow (\eg Ada with @accept@, \uC with @_Accept@) or in terms of data (\eg Go with channels).
    1936 Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics.
    1937 Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines.
    1938 The previous example shows a simple use @_Accept@ versus @wait@/@signal@ and its advantages.
    1939 Note that while other languages often use @accept@/@select@ as the core external scheduling keyword, \CFA uses @waitfor@ to prevent name collisions with existing socket \textbf{api}s.
    1940 
    1941 For the @P@ member above using internal scheduling, the call to @wait@ only guarantees that @V@ is the last routine to access the monitor, allowing a third routine, say @isInUse()@, acquire mutual exclusion several times while routine @P@ is waiting.
    1942 On the other hand, external scheduling guarantees that while routine @P@ is waiting, no other routine than @V@ can acquire the monitor.
    1943 
    1944 % ======================================================================
    1945 % ======================================================================
     1766
    19461767\subsection{Loose Object Definitions}
    1947 % ======================================================================
    1948 % ======================================================================
    1949 In \uC, a monitor class declaration includes an exhaustive list of monitor operations.
    1950 Since \CFA is not object oriented, monitors become both more difficult to implement and less clear for a user:
    1951 
    1952 \begin{cfa}
    1953 monitor A {};
    1954 
    1955 void f(A & mutex a);
    1956 void g(A & mutex a) {
    1957         waitfor(f); // Obvious which f() to wait for
    1958 }
    1959 
    1960 void f(A & mutex a, int); // New different F added in scope
    1961 void h(A & mutex a) {
    1962         waitfor(f); // Less obvious which f() to wait for
    1963 }
    1964 \end{cfa}
    1965 
    1966 Furthermore, external scheduling is an example where implementation constraints become visible from the interface.
    1967 Here is the cfa-code for the entering phase of a monitor:
    1968 \begin{center}
    1969 \begin{tabular}{l}
    1970 \begin{cfa}
    1971         if monitor is free
    1972                 enter
    1973         elif already own the monitor
    1974                 continue
    1975         elif monitor accepts me
    1976                 enter
    1977         else
    1978                 block
    1979 \end{cfa}
    1980 \end{tabular}
    1981 \end{center}
     1768\label{s:LooseObjectDefinitions}
     1769
     1770In an object-oriented programming-language, a class includes an exhaustive list of operations.
     1771However, new members can be added via static inheritance or dynaic members, \eg JavaScript~\cite{JavaScript}.
     1772Similarly, monitor routines can be added at any time in \CFA, making it less clear for programmers and more difficult to implement.
     1773\begin{cfa}
     1774monitor M {};
     1775void `f`( M & mutex m );
     1776void g( M & mutex m ) { waitfor( `f` ); }       $\C{// clear which f}$
     1777void `f`( M & mutex m, int );                           $\C{// different f}$
     1778void h( M & mutex m ) { waitfor( `f` ); }       $\C{// unclear which f}$
     1779\end{cfa}
     1780Hence, the cfa-code for the entering a monitor looks like:
     1781\begin{cfa}
     1782if ( $\textrm{\textit{monitor is free}}$ ) $\LstCommentStyle{// \color{red}enter}$
     1783else if ( $\textrm{\textit{already own monitor}}$ ) $\LstCommentStyle{// \color{red}continue}$
     1784else if ( $\textrm{\textit{monitor accepts me}}$ ) $\LstCommentStyle{// \color{red}enter}$
     1785else $\LstCommentStyle{// \color{red}block}$
     1786\end{cfa}
    19821787For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions.
    1983 However, a fast check for @monitor accepts me@ is much harder to implement depending on the constraints put on the monitors.
    1984 Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}.
     1788However, a fast check for \emph{monitor accepts me} is much harder to implement depending on the constraints put on the monitors.
     1789Figure~\ref{fig:ClassicalMonitor} shows monitors are often expressed as an entry (calling) queue, some acceptor queues, and an urgent stack/queue.
    19851790
    19861791\begin{figure}
    19871792\centering
    1988 \subfloat[Classical Monitor] {
     1793\subfloat[Classical monitor] {
    19891794\label{fig:ClassicalMonitor}
    1990 {\resizebox{0.45\textwidth}{!}{\input{monitor}}}
     1795{\resizebox{0.45\textwidth}{!}{\input{monitor.pstex_t}}}
    19911796}% subfloat
    1992 \qquad
    1993 \subfloat[bulk acquire Monitor] {
     1797\quad
     1798\subfloat[Bulk acquire monitor] {
    19941799\label{fig:BulkMonitor}
    1995 {\resizebox{0.45\textwidth}{!}{\input{ext_monitor}}}
     1800{\resizebox{0.45\textwidth}{!}{\input{ext_monitor.pstex_t}}}
    19961801}% subfloat
    1997 \caption{External Scheduling Monitor}
     1802\caption{Monitor Implementation}
     1803\label{f:MonitorImplementation}
    19981804\end{figure}
    19991805
    2000 There are other alternatives to these pictures, but in the case of the left picture, implementing a fast accept check is relatively easy.
    2001 Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (\eg 128) of mutex members.
    2002 This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units.
    2003 For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance.
    2004 However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit.
    2005 This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
    2006 
    2007 The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}.
    2008 Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate.
    2009 Generating a mask dynamically means that the storage for the mask information can vary between calls to @waitfor@, allowing for more flexibility and extensions.
    2010 Storing an array of accepted routine pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search.
    2011 Furthermore, supporting nested external scheduling (\eg listing \ref{f:nest-ext}) may now require additional searches for the @waitfor@ statement to check if a routine is already queued.
     1806For a fixed (small) number of mutex routines (\eg 128), the accept check reduces to a bitmask of allowed callers, which can be checked with a single instruction.
     1807This approach requires a unique dense ordering of routines with a small upper-bound and the ordering must be consistent across translation units.
     1808For object-oriented languages these constraints are common, but \CFA mutex routines can be added in any scope and are only visible in certain translation unit, precluding program-wide dense-ordering among mutex routines.
     1809
     1810Figure~\ref{fig:BulkMonitor} shows the \CFA monitor implementation.
     1811The mutex routine called is associated with each thread on the entry queue, while a list of acceptable routines is kept separately.
     1812The accepted list is a variable-sized array of accepted routine pointers, so the single instruction bitmask comparison is replaced by dereferencing a pointer followed by a (usually short) linear search.
     1813
     1814
     1815\subsection{Multi-Monitor Scheduling}
     1816\label{s:Multi-MonitorScheduling}
     1817
     1818External scheduling, like internal scheduling, becomes significantly more complex for multi-monitor semantics.
     1819Even in the simplest case, new semantics needs to be established.
     1820\begin{cfa}
     1821monitor M {};
     1822void f( M & mutex m1 );
     1823void g( M & mutex m1, M & mutex m2 ) {
     1824        waitfor( f );                                                   $\C{\color{red}// pass m1 or m2 to f?}$
     1825}
     1826\end{cfa}
     1827The solution is for the programmer to disambiguate:
     1828\begin{cfa}
     1829        waitfor( f, m2 );                                               $\C{\color{red}// wait for call to f with argument m2}$
     1830\end{cfa}
     1831Routine @g@ has acquired both locks, so when routine @f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@.
     1832This behaviour can be extended to the multi-monitor @waitfor@ statement.
     1833\begin{cfa}
     1834monitor M {};
     1835void f( M & mutex m1, M & mutex m2 );
     1836void g( M & mutex m1, M & mutex m2 ) {
     1837        waitfor( f, m1, m2 );                                   $\C{\color{red}// wait for call to f with arguments m1 and m2}$
     1838}
     1839\end{cfa}
     1840Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting routine.
     1841
     1842Note, for internal and external scheduling with multiple monitors, a signalling or accepting thread must match exactly, \ie partial matching results in waiting.
     1843\begin{cquote}
     1844\lstDeleteShortInline@%
     1845\begin{tabular}{@{}l@{\hspace{\parindentlnth}}|@{\hspace{\parindentlnth}}l@{}}
     1846\begin{cfa}
     1847monitor M1 {} m11, m12;
     1848monitor M2 {} m2;
     1849condition c;
     1850void f( M1 & mutex m1, M2 & mutex m2 ) {
     1851        signal( c );
     1852}
     1853void g( M1 & mutex m1, M2 & mutex m2 ) {
     1854        wait( c );
     1855}
     1856g( `m11`, m2 ); // block on accept
     1857f( `m12`, m2 ); // cannot fulfil
     1858\end{cfa}
     1859&
     1860\begin{cfa}
     1861monitor M1 {} m11, m12;
     1862monitor M2 {} m2;
     1863
     1864void f( M1 & mutex m1, M2 & mutex m2 ) {
     1865
     1866}
     1867void g( M1 & mutex m1, M2 & mutex m2 ) {
     1868        waitfor( f, m1, m2 );
     1869}
     1870g( `m11`, m2 ); // block on accept
     1871f( `m12`, m2 ); // cannot fulfil
     1872\end{cfa}
     1873\end{tabular}
     1874\lstMakeShortInline@%
     1875\end{cquote}
     1876For both internal and external scheduling, the set of monitors is disjoint so unblocking is impossible.
     1877
     1878
     1879\subsection{Extended \protect\lstinline@waitfor@}
     1880
     1881The extended form of the @waitfor@ statement conditionally accepts one of a group of mutex routines and allows a specific action to be performed \emph{after} the mutex routine finishes.
     1882\begin{cfa}
     1883`when` ( $\emph{conditional-expression}$ )      $\C{// optional guard}$
     1884        waitfor( $\emph{mutex-member-name}$ )
     1885                $\emph{statement}$                                      $\C{// action after call}$
     1886`or` `when` ( $\emph{conditional-expression}$ ) $\C{// optional guard}$
     1887        waitfor( $\emph{mutex-member-name}$ )
     1888                $\emph{statement}$                                      $\C{// action after call}$
     1889`or`    ...                                                                     $\C{// list of waitfor clauses}$
     1890`when` ( $\emph{conditional-expression}$ )      $\C{// optional guard}$
     1891        `timeout`                                                               $\C{// optional terminating timeout clause}$
     1892                $\emph{statement}$                                      $\C{// action after timeout}$
     1893`when` ( $\emph{conditional-expression}$ )      $\C{// optional guard}$
     1894        `else`                                                                  $\C{// optional terminating clause}$
     1895                $\emph{statement}$                                      $\C{// action when no immediate calls}$
     1896\end{cfa}
     1897For a @waitfor@ clause to be executed, its @when@ must be true and an outstanding call to its corresponding member(s) must exist.
     1898The \emph{conditional-expression} of a @when@ may call a routine, but the routine must not block or context switch.
     1899If there are several mutex calls that can be accepted, selection occurs top-to-bottom in the @waitfor@ clauses versus non-deterministically.
     1900If some accept guards are true and there are no outstanding calls to these members, the acceptor is accept-blocked until a call to one of these members is made.
     1901If all the accept guards are false, the statement does nothing, unless there is a terminating @else@ clause with a true guard, which is executed instead.
     1902Hence, the terminating @else@ clause allows a conditional attempt to accept a call without blocking.
     1903If there is a @timeout@ clause, it provides an upper bound on waiting, and can only appear with a conditional @else@, otherwise the timeout cannot be triggered.
     1904In all cases, the statement following is executed \emph{after} a clause is executed to know which of the clauses executed.
     1905
     1906Note, a group of conditional @waitfor@ clauses is \emph{not} the same as a group of @if@ statements, e.g.:
     1907\begin{cfa}
     1908if ( C1 ) waitfor( mem1 );                       when ( C1 ) waitfor( mem1 );
     1909else if ( C2 ) waitfor( mem2 );         or when ( C2 ) waitfor( mem2 );
     1910\end{cfa}
     1911The left example accepts only @mem1@ if @C1@ is true or only @mem2@ if @C2@ is true.
     1912The right example accepts either @mem1@ or @mem2@ if @C1@ and @C2@ are true.
     1913
     1914An interesting use of @waitfor@ is accepting the @mutex@ destructor to know when an object is deallocated.
     1915\begin{cfa}
     1916void insert( Buffer(T) & mutex buffer, T elem ) with( buffer ) {
     1917        if ( count == BufferSize )
     1918                waitfor( remove, buffer ) {
     1919                        elements[back] = elem;
     1920                        back = ( back + 1 ) % BufferSize;
     1921                        count += 1;
     1922                } or `waitfor( ^?{}, buffer )` throw insertFail;
     1923}
     1924\end{cfa}
     1925When the buffer is deallocated, the current waiter is unblocked and informed, so it can perform an appropriate action.
     1926However, the basic @waitfor@ semantics do not support this functionality, since using an object after its destructor is called is undefined.
     1927Therefore, to make this useful capability work, the semantics for accepting the destructor is the same as @signal@, \ie the call to the destructor is placed on the urgent queue and the acceptor continues execution, which throws an exception to the acceptor and then the caller is unbloced from the urgent queue to deallocate the object.
     1928Accepting the destructor is an idiomatic way to terminate a thread in \CFA.
     1929
     1930
     1931\subsection{\protect\lstinline@mutex@ Threads}
     1932
     1933Threads in \CFA are monitors to allow direct communication among threads, \ie threads can have mutex routines that are called by other threads.
     1934Hence, all monitor features are available when using threads.
     1935Figure~\ref{f:pingpong} shows an example of two threads directly calling each other and accepting calls from each other in a cycle.
     1936Note, both ping/pong threads are globally declared, @pi@/@po@, and hence, start (and possibly complete) before the program main starts.
    20121937
    20131938\begin{figure}
    2014 \begin{cfa}[caption={Example of nested external scheduling},label={f:nest-ext}]
    2015 monitor M {};
    2016 void foo( M & mutex a ) {}
    2017 void bar( M & mutex b ) {
    2018         // Nested in the waitfor(bar, c) call
    2019         waitfor(foo, b);
    2020 }
    2021 void baz( M & mutex c ) {
    2022         waitfor(bar, c);
    2023 }
    2024 
    2025 \end{cfa}
     1939\lstDeleteShortInline@%
     1940\begin{cquote}
     1941\begin{cfa}
     1942thread Ping {} pi;
     1943thread Pong {} po;
     1944void ping( Ping & mutex ) {}
     1945void pong( Pong & mutex ) {}
     1946int main() {}
     1947\end{cfa}
     1948\begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
     1949\begin{cfa}
     1950void main( Ping & pi ) {
     1951        for ( int i = 0; i < 10; i += 1 ) {
     1952                `waitfor( ping, pi );`
     1953                `pong( po );`
     1954        }
     1955}
     1956\end{cfa}
     1957&
     1958\begin{cfa}
     1959void main( Pong & po ) {
     1960        for ( int i = 0; i < 10; i += 1 ) {
     1961                `ping( pi );`
     1962                `waitfor( pong, po );`
     1963        }
     1964}
     1965\end{cfa}
     1966\end{tabular}
     1967\lstMakeShortInline@%
     1968\end{cquote}
     1969\caption{Threads ping/pong using external scheduling}
     1970\label{f:pingpong}
    20261971\end{figure}
    20271972
    2028 Note that in the right picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a routine pointer and a set of monitors, as is discussed in the next section.
    2029 These details are omitted from the picture for the sake of simplicity.
    2030 
    2031 At this point, a decision must be made between flexibility and performance.
    2032 Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost.
    2033 Here, however, the cost of flexibility cannot be trivially removed.
    2034 In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be  hard to write.
    2035 This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problem than writing locks that are as flexible as external scheduling in \CFA.
    2036 
    2037 % ======================================================================
    2038 % ======================================================================
    2039 \subsection{Multi-Monitor Scheduling}
    2040 % ======================================================================
    2041 % ======================================================================
    2042 
    2043 External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax.
    2044 Even in the simplest possible case, some new semantics needs to be established:
    2045 \begin{cfa}
    2046 monitor M {};
    2047 
    2048 void f(M & mutex a);
    2049 
    2050 void g(M & mutex b, M & mutex c) {
    2051         waitfor(f); // two monitors M => unknown which to pass to f(M & mutex)
    2052 }
    2053 \end{cfa}
    2054 The obvious solution is to specify the correct monitor as follows:
    2055 
    2056 \begin{cfa}
    2057 monitor M {};
    2058 
    2059 void f(M & mutex a);
    2060 
    2061 void g(M & mutex a, M & mutex b) {
    2062         // wait for call to f with argument b
    2063         waitfor(f, b);
    2064 }
    2065 \end{cfa}
    2066 This syntax is unambiguous.
    2067 Both locks are acquired and kept by @g@.
    2068 When routine @f@ is called, the lock for monitor @b@ is temporarily transferred from @g@ to @f@ (while @g@ still holds lock @a@).
    2069 This behaviour can be extended to the multi-monitor @waitfor@ statement as follows.
    2070 
    2071 \begin{cfa}
    2072 monitor M {};
    2073 
    2074 void f(M & mutex a, M & mutex b);
    2075 
    2076 void g(M & mutex a, M & mutex b) {
    2077         // wait for call to f with arguments a and b
    2078         waitfor(f, a, b);
    2079 }
    2080 \end{cfa}
    2081 
    2082 Note that the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired in the routine. @waitfor@ used in any other context is undefined behaviour.
    2083 
    2084 An important behaviour to note is when a set of monitors only match partially:
    2085 
    2086 \begin{cfa}
    2087 mutex struct A {};
    2088 
    2089 mutex struct B {};
    2090 
    2091 void g(A & mutex a, B & mutex b) {
    2092         waitfor(f, a, b);
    2093 }
    2094 
    2095 A a1, a2;
    2096 B b;
    2097 
    2098 void foo() {
    2099         g(a1, b); // block on accept
    2100 }
    2101 
    2102 void bar() {
    2103         f(a2, b); // fulfill cooperation
    2104 }
    2105 \end{cfa}
    2106 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables.
    2107 In both cases, partially matching monitor sets does not wakeup the waiting thread.
    2108 It is also important to note that in the case of external scheduling the order of parameters is irrelevant; @waitfor(f,a,b)@ and @waitfor(f,b,a)@ are indistinguishable waiting condition.
    2109 
    2110 % ======================================================================
    2111 % ======================================================================
    2112 \subsection{\protect\lstinline|waitfor| Semantics}
    2113 % ======================================================================
    2114 % ======================================================================
    2115 
    2116 Syntactically, the @waitfor@ statement takes a routine identifier and a set of monitors.
    2117 While the set of monitors can be any list of expressions, the routine name is more restricted because the compiler validates at compile time the validity of the routine type and the parameters used with the @waitfor@ statement.
    2118 It checks that the set of monitors passed in matches the requirements for a routine call.
    2119 Figure~\ref{f:waitfor} shows various usages of the waitfor statement and which are acceptable.
    2120 The choice of the routine type is made ignoring any non-@mutex@ parameter.
    2121 One limitation of the current implementation is that it does not handle overloading, but overloading is possible.
     1973\section{Parallelism}
     1974
     1975Historically, computer performance was about processor speeds.
     1976However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}.
     1977Now, high-performance applications must care about parallelism, which requires concurrency.
     1978The lowest-level approach of parallelism is to use \newterm{kernel threads} in combination with semantics like @fork@, @join@, \etc.
     1979However, kernel threads are better as an implementation tool because of complexity and higher cost.
     1980Therefore, different abstractions are often layered onto kernel threads to simplify them, \eg pthreads.
     1981
     1982
     1983\subsection{User Threads with Preemption}
     1984
     1985A direct improvement on kernel threads is user threads, \eg Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
     1986This approach provides an interface that matches the language paradigms, more control over concurrency by the language runtime, and an abstract (and portable) interface to the underlying kernel threads across operating systems.
     1987In many cases, user threads can be used on a much larger scale (100,000 threads).
     1988Like kernel threads, user threads support preemption, which maximizes nondeterminism, but introduces the same concurrency errors: race, livelock, starvation, and deadlock.
     1989\CFA adopts user-threads as they represent the truest realization of concurrency and can build any other concurrency approach, \eg thread pools and actors~\cite{Actors}.
     1990
     1991
     1992\subsection{User Threads without Preemption (Fiber)}
     1993\label{s:fibers}
     1994
     1995A variant of user thread is \newterm{fibers}, which removes preemption, \eg Go~\cite{Go}.
     1996Like functional programming, which removes mutation and its associated problems, removing preemption from concurrency reduces nondeterminism, hence race and deadlock errors are more difficult to generate.
     1997However, preemption is necessary for concurrency that relies on spinning, so there are a class of problems that cannot be programmed without preemption.
     1998
     1999
     2000\subsection{Thread Pools}
     2001
     2002In contrast to direct threading is indirect \newterm{thread pools}, where small jobs (work units) are insert into a work pool for execution.
     2003If the jobs are dependent, \ie interact, there is an implicit/explicit dependency graph that ties them together.
     2004While removing direct concurrency, and hence the amount of context switching, thread pools significantly limit the interaction that can occur among jobs.
     2005Indeed, jobs should not block because that also block the underlying thread, which effectively means the CPU utilization, and therefore throughput, suffers.
     2006While it is possible to tune the thread pool with sufficient threads, it becomes difficult to obtain high throughput and good core utilization as job interaction increases.
     2007As well, concurrency errors return, which threads pools are suppose to mitigate.
     2008Intel's TBB library~\cite{TBB} is the gold standard for thread pools.
     2009
     2010
     2011\section{\protect\CFA Runtime Structure}
     2012
     2013Figure~\ref{f:RunTimeStructure} illustrates the runtime structure of a \CFA program.
     2014In addition to the new kinds of objects introduced by \CFA, there are two more runtime entities used to control parallel execution: cluster and (virtual) processor.
     2015An executing thread is illustrated by its containment in a processor.
     2016
    21222017\begin{figure}
    2123 \begin{cfa}[caption={Various correct and incorrect uses of the waitfor statement},label={f:waitfor}]
    2124 monitor A{};
    2125 monitor B{};
    2126 
    2127 void f1( A & mutex );
    2128 void f2( A & mutex, B & mutex );
    2129 void f3( A & mutex, int );
    2130 void f4( A & mutex, int );
    2131 void f4( A & mutex, double );
    2132 
    2133 void foo( A & mutex a1, A & mutex a2, B & mutex b1, B & b2 ) {
    2134         A * ap = & a1;
    2135         void (*fp)( A & mutex ) = f1;
    2136 
    2137         waitfor(f1, a1);     // Correct : 1 monitor case
    2138         waitfor(f2, a1, b1); // Correct : 2 monitor case
    2139         waitfor(f3, a1);     // Correct : non-mutex arguments are ignored
    2140         waitfor(f1, *ap);    // Correct : expression as argument
    2141 
    2142         waitfor(f1, a1, b1); // Incorrect : Too many mutex arguments
    2143         waitfor(f2, a1);     // Incorrect : Too few mutex arguments
    2144         waitfor(f2, a1, a2); // Incorrect : Mutex arguments don't match
    2145         waitfor(f1, 1);      // Incorrect : 1 not a mutex argument
    2146         waitfor(f9, a1);     // Incorrect : f9 routine does not exist
    2147         waitfor(*fp, a1 );   // Incorrect : fp not an identifier
    2148         waitfor(f4, a1);     // Incorrect : f4 ambiguous
    2149 
    2150         waitfor(f2, a1, b2); // Undefined behaviour : b2 not mutex
    2151 }
    2152 \end{cfa}
     2018\centering
     2019\input{RunTimeStructure}
     2020\caption{\CFA Runtime Structure}
     2021\label{f:RunTimeStructure}
    21532022\end{figure}
    21542023
    2155 Finally, for added flexibility, \CFA supports constructing a complex @waitfor@ statement using the @or@, @timeout@ and @else@.
    2156 Indeed, multiple @waitfor@ clauses can be chained together using @or@; this chain forms a single statement that uses baton pass to any routine that fits one of the routine+monitor set passed in.
    2157 To enable users to tell which accepted routine executed, @waitfor@s are followed by a statement (including the null statement @;@) or a compound statement, which is executed after the clause is triggered.
    2158 A @waitfor@ chain can also be followed by a @timeout@, to signify an upper bound on the wait, or an @else@, to signify that the call should be non-blocking, which checks for a matching routine call already arrived and otherwise continues.
    2159 Any and all of these clauses can be preceded by a @when@ condition to dynamically toggle the accept clauses on or off based on some current state.
    2160 Figure~\ref{f:waitfor2} demonstrates several complex masks and some incorrect ones.
    2161 
    2162 \begin{figure}
    2163 \lstset{language=CFA,deletedelim=**[is][]{`}{`}}
    2164 \begin{cfa}
    2165 monitor A{};
    2166 
    2167 void f1( A & mutex );
    2168 void f2( A & mutex );
    2169 
    2170 void foo( A & mutex a, bool b, int t ) {
    2171         waitfor(f1, a);                                                 $\C{// Correct : blocking case}$
    2172 
    2173         waitfor(f1, a) {                                                $\C{// Correct : block with statement}$
    2174                 sout | "f1" | endl;
    2175         }
    2176         waitfor(f1, a) {                                                $\C{// Correct : block waiting for f1 or f2}$
    2177                 sout | "f1" | endl;
    2178         } or waitfor(f2, a) {
    2179                 sout | "f2" | endl;
    2180         }
    2181         waitfor(f1, a); or else;                                $\C{// Correct : non-blocking case}$
    2182 
    2183         waitfor(f1, a) {                                                $\C{// Correct : non-blocking case}$
    2184                 sout | "blocked" | endl;
    2185         } or else {
    2186                 sout | "didn't block" | endl;
    2187         }
    2188         waitfor(f1, a) {                                                $\C{// Correct : block at most 10 seconds}$
    2189                 sout | "blocked" | endl;
    2190         } or timeout( 10`s) {
    2191                 sout | "didn't block" | endl;
    2192         }
    2193         // Correct : block only if b == true if b == false, don't even make the call
    2194         when(b) waitfor(f1, a);
    2195 
    2196         // Correct : block only if b == true if b == false, make non-blocking call
    2197         waitfor(f1, a); or when(!b) else;
    2198 
    2199         // Correct : block only of t > 1
    2200         waitfor(f1, a); or when(t > 1) timeout(t); or else;
    2201 
    2202         // Incorrect : timeout clause is dead code
    2203         waitfor(f1, a); or timeout(t); or else;
    2204 
    2205         // Incorrect : order must be waitfor [or waitfor... [or timeout] [or else]]
    2206         timeout(t); or waitfor(f1, a); or else;
    2207 }
    2208 \end{cfa}
    2209 \caption{Correct and incorrect uses of the or, else, and timeout clause around a waitfor statement}
    2210 \label{f:waitfor2}
    2211 \end{figure}
    2212 
    2213 % ======================================================================
    2214 % ======================================================================
    2215 \subsection{Waiting For The Destructor}
    2216 % ======================================================================
    2217 % ======================================================================
    2218 An interesting use for the @waitfor@ statement is destructor semantics.
    2219 Indeed, the @waitfor@ statement can accept any @mutex@ routine, which includes the destructor (see section \ref{data}).
    2220 However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour.
    2221 The simplest approach is to disallow @waitfor@ on a destructor.
    2222 However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current @mutex@ routine, similarly to how a condition is signalled.
    2223 \begin{figure}
    2224 \begin{cfa}[caption={Example of an executor which executes action in series until the destructor is called.},label={f:dtor-order}]
    2225 monitor Executer {};
    2226 struct  Action;
    2227 
    2228 void ^?{}   (Executer & mutex this);
    2229 void execute(Executer & mutex this, const Action & );
    2230 void run    (Executer & mutex this) {
    2231         while(true) {
    2232                    waitfor(execute, this);
    2233                 or waitfor(^?{}   , this) {
    2234                         break;
    2235                 }
    2236         }
    2237 }
    2238 \end{cfa}
    2239 \end{figure}
    2240 For example, listing \ref{f:dtor-order} shows an example of an executor with an infinite loop, which waits for the destructor to break out of this loop.
    2241 Switching the semantic meaning introduces an idiomatic way to terminate a task and/or wait for its termination via destruction.
    2242 
    2243 
    2244 % ######     #    ######     #    #       #       ####### #       ###  #####  #     #
    2245 % #     #   # #   #     #   # #   #       #       #       #        #  #     # ##   ##
    2246 % #     #  #   #  #     #  #   #  #       #       #       #        #  #       # # # #
    2247 % ######  #     # ######  #     # #       #       #####   #        #   #####  #  #  #
    2248 % #       ####### #   #   ####### #       #       #       #        #        # #     #
    2249 % #       #     # #    #  #     # #       #       #       #        #  #     # #     #
    2250 % #       #     # #     # #     # ####### ####### ####### ####### ###  #####  #     #
    2251 \section{Parallelism}
    2252 Historically, computer performance was about processor speeds and instruction counts.
    2253 However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}.
    2254 In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism.
    2255 Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization.
    2256 The lowest-level approach of parallelism is to use \textbf{kthread} in combination with semantics like @fork@, @join@, \etc.
    2257 However, since these have significant costs and limitations, \textbf{kthread} are now mostly used as an implementation tool rather than a user oriented one.
    2258 There are several alternatives to solve these issues that all have strengths and weaknesses.
    2259 While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
    2260 
    2261 \section{Paradigms}
    2262 \subsection{User-Level Threads}
    2263 A direct improvement on the \textbf{kthread} approach is to use \textbf{uthread}.
    2264 These threads offer most of the same features that the operating system already provides but can be used on a much larger scale.
    2265 This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads.
    2266 The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues.
    2267 These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees, but the parallelism toolkit offers very little to reduce complexity in itself.
    2268 
    2269 Examples of languages that support \textbf{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    2270 
    2271 \subsection{Fibers : User-Level Threads Without Preemption} \label{fibers}
    2272 A popular variant of \textbf{uthread} is what is often referred to as \textbf{fiber}.
    2273 However, \textbf{fiber} do not present meaningful semantic differences with \textbf{uthread}.
    2274 The significant difference between \textbf{uthread} and \textbf{fiber} is the lack of \textbf{preemption} in the latter.
    2275 Advocates of \textbf{fiber} list their high performance and ease of implementation as major strengths, but the performance difference between \textbf{uthread} and \textbf{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design.
    2276 Therefore this proposal largely ignores fibers.
    2277 
    2278 An example of a language that uses fibers is Go~\cite{Go}
    2279 
    2280 \subsection{Jobs and Thread Pools}
    2281 An approach on the opposite end of the spectrum is to base parallelism on \textbf{pool}.
    2282 Indeed, \textbf{pool} offer limited flexibility but at the benefit of a simpler user interface.
    2283 In \textbf{pool} based systems, users express parallelism as units of work, called jobs, and a dependency graph (either explicit or implicit) that ties them together.
    2284 This approach means users need not worry about concurrency but significantly limit the interaction that can occur among jobs.
    2285 Indeed, any \textbf{job} that blocks also block the underlying worker, which effectively means the CPU utilization, and therefore throughput, suffers noticeably.
    2286 It can be argued that a solution to this problem is to use more workers than available cores.
    2287 However, unless the number of jobs and the number of workers are comparable, having a significant number of blocked jobs always results in idles cores.
    2288 
    2289 The gold standard of this implementation is Intel's TBB library~\cite{TBB}.
    2290 
    2291 \subsection{Paradigm Performance}
    2292 While the choice between the three paradigms listed above may have significant performance implications, it is difficult to pin down the performance implications of choosing a model at the language level.
    2293 Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload.
    2294 Having a large amount of mostly independent units of work to execute almost guarantees equivalent performance across paradigms and that the \textbf{pool}-based system has the best efficiency thanks to the lower memory overhead (\ie no thread stack per job).
    2295 However, interactions among jobs can easily exacerbate contention.
    2296 User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance.
    2297 Finally, if the units of uninterrupted work are large, enough the paradigm choice is largely amortized by the actual work done.
    2298 
    2299 \section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
    2300 A \textbf{cfacluster} is a group of \textbf{kthread} executed in isolation. \textbf{uthread} are scheduled on the \textbf{kthread} of a given \textbf{cfacluster}, allowing organization between \textbf{uthread} and \textbf{kthread}.
    2301 It is important that \textbf{kthread} belonging to a same \textbf{cfacluster} have homogeneous settings, otherwise migrating a \textbf{uthread} from one \textbf{kthread} to the other can cause issues.
    2302 A \textbf{cfacluster} also offers a pluggable scheduler that can optimize the workload generated by the \textbf{uthread}.
    2303 
    2304 \textbf{cfacluster} have not been fully implemented in the context of this paper.
    2305 Currently \CFA only supports one \textbf{cfacluster}, the initial one.
    2306 
    2307 \subsection{Future Work: Machine Setup}\label{machine}
    2308 While this was not done in the context of this paper, another important aspect of clusters is affinity.
    2309 While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups.
    2310 For example, a system using \textbf{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certain CPU cores.
    2311 OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
    2312 
    2313 \subsection{Paradigms}\label{cfaparadigms}
    2314 Given these building blocks, it is possible to reproduce all three of the popular paradigms.
    2315 Indeed, \textbf{uthread} is the default paradigm in \CFA.
    2316 However, disabling \textbf{preemption} on a cluster means threads effectively become fibers.
    2317 Since several \textbf{cfacluster} with different scheduling policy can coexist in the same application, this allows \textbf{fiber} and \textbf{uthread} to coexist in the runtime of an application.
    2318 Finally, it is possible to build executors for thread pools from \textbf{uthread} or \textbf{fiber}, which includes specialized jobs like actors~\cite{Actors}.
    2319 
    2320 
    2321 
    2322 \section{Behind the Scenes}
    2323 There are several challenges specific to \CFA when implementing concurrency.
    2324 These challenges are a direct result of bulk acquire and loose object definitions.
    2325 These two constraints are the root cause of most design decisions in the implementation.
    2326 Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs.
    2327 This approach avoids the chicken and egg problem~\cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime.
    2328 This extra goal means that memory management is a constant concern in the design of the system.
    2329 
    2330 The main memory concern for concurrency is queues.
    2331 All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation.
    2332 Since several concurrency operations can use an unbound amount of memory (depending on bulk acquire), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers.
    2333 Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays.
    2334 Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array.
    2335 The threads and the condition both have a fixed amount of memory, while @mutex@ routines and blocking calls allow for an unbound amount, within the stack size.
    2336 
    2337 Note that since the major contributions of this paper are extending monitor semantics to bulk acquire and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed.
    2338 
    2339 % ======================================================================
    2340 % ======================================================================
    2341 \section{Mutex Routines}
    2342 % ======================================================================
    2343 % ======================================================================
    2344 
    2345 The first step towards the monitor implementation is simple @mutex@ routines.
    2346 In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{f:entry1}.
    2347 The entry/exit procedures do not have to be extended to support multiple monitors.
    2348 Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}.
    2349 In \CFA, ordering of monitor acquisition relies on memory ordering.
    2350 This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behaviour.
    2351 When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values.
     2024
     2025\subsection{Cluster}
     2026\label{s:RuntimeStructureCluster}
     2027
     2028A \newterm{cluster} is a collection of threads and virtual processors (abstract kernel-thread) that execute the threads (like a virtual machine).
     2029The purpose of a cluster is to control the amount of parallelism that is possible among threads, plus scheduling and other execution defaults.
     2030The default cluster-scheduler is single-queue multi-server, which provides automatic load-balancing of threads on processors.
     2031However, the scheduler is pluggable, supporting alternative schedulers.
     2032If several clusters exist, both threads and virtual processors, can be explicitly migrated from one cluster to another.
     2033No automatic load balancing among clusters is performed by \CFA.
     2034
     2035When a \CFA program begins execution, it creates a user cluster with a single processor and a special processor to handle preemption that does not execute user threads.
     2036The user cluster is created to contain the application user-threads.
     2037Having all threads execute on the one cluster often maximizes utilization of processors, which minimizes runtime.
     2038However, because of limitations of the underlying operating system, heterogeneous hardware, or scheduling requirements (real-time), it is sometimes necessary to have multiple clusters.
     2039
     2040
     2041\subsection{Virtual Processor}
     2042\label{s:RuntimeStructureProcessor}
     2043
     2044A virtual processor is implemented by a kernel thread (\eg UNIX process), which is subsequently scheduled for execution on a hardware processor by the underlying operating system.
     2045Programs may use more virtual processors than hardware processors.
     2046On a multiprocessor, kernel threads are distributed across the hardware processors resulting in virtual processors executing in parallel.
     2047(It is possible to use affinity to lock a virtual processor onto a particular hardware processor~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which is used when caching issues occur or for heterogeneous hardware processors.)
     2048The \CFA runtime attempts to block unused processors and unblock processors as the system load increases;
     2049balancing the workload with processors is difficult.
     2050Preemption occurs on virtual processors rather than user threads, via operating-system interrupts.
     2051Thus virtual processors execute user threads, where preemption frequency applies to a virtual processor, so preemption occurs randomly across the executed user threads.
     2052Turning off preemption transforms user threads into fibers.
     2053
     2054
     2055\subsection{Debug Kernel}
     2056
     2057There are two versions of the \CFA runtime kernel: debug and non-debug.
     2058The debugging version has many runtime checks and internal assertions, \eg stack (non-writable) guard page, and checks for stack overflow whenever context switches occur among coroutines and threads, which catches most stack overflows.
     2059After a program is debugged, the non-debugging version can be used to decrease space and increase performance.
     2060
     2061
     2062\section{Implementation}
     2063
     2064Currently, \CFA has fixed-sized stacks, where the stack size can be set at coroutine/thread creation but with no subsequent growth.
     2065Schemes exist for dynamic stack-growth, such as stack copying and chained stacks.
     2066However, stack copying requires pointer adjustment to items on the stack, which is impossible without some form of garage collection.
     2067As well, chained stacks require all modules be recompiled to use this feature, which breaks backward compatibility with existing C libraries.
     2068In the long term, it is likely C libraries will migrate to stack chaining to support concurrency, at only a minimal cost to sequential programs.
     2069Nevertheless, experience teaching \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue in most concurrent programs.
     2070
     2071A primary implementation challenge is avoiding contention from dynamically allocating memory because of bulk acquire, \eg the internal-scheduling design is (almost) free of allocations.
     2072All blocking operations are made by parking threads onto queues, therefore all queues are designed with intrusive nodes, where each node has preallocated link fields for chaining.
     2073Furthermore, several bulk-acquire operations need a variable amount of memory.
     2074This storage is allocated at the base of a thread's stack before blocking, which means programmers must add a small amount of extra space for stacks.
     2075
     2076In \CFA, ordering of monitor acquisition relies on memory ordering to prevent deadlock~\cite{Havender68}, because all objects are guaranteed to have distinct non-overlapping memory layouts, and mutual-exclusion for a monitor is only defined for its lifetime.
     2077When a mutex call is made, pointers to the concerned monitors are aggregated into a variable-length array and sorted.
    23522078This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
    2353 \begin{figure}
    2354 \begin{multicols}{2}
    2355 Entry
    2356 \begin{cfa}
    2357 if monitor is free
    2358         enter
    2359 elif already own the monitor
    2360         continue
    2361 else
    2362         block
    2363 increment recursions
    2364 \end{cfa}
    2365 \columnbreak
    2366 Exit
    2367 \begin{cfa}
    2368 decrement recursion
    2369 if recursion == 0
    2370         if entry queue not empty
    2371                 wake-up thread
    2372 \end{cfa}
    2373 \end{multicols}
    2374 \begin{cfa}[caption={Initial entry and exit routine for monitors},label={f:entry1}]
    2375 \end{cfa}
    2376 \end{figure}
    2377 
    2378 \subsection{Details: Interaction with polymorphism}
    2379 Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support.
    2380 However, it is shown that entry-point locking solves most of the issues.
    2381 
    2382 First of all, interaction between @otype@ polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying.
    2383 Therefore, the main question is how to support @dtype@ polymorphism.
    2384 It is important to present the difference between the two acquiring options: \textbf{callsite-locking} and entry-point locking, \ie acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call.
    2385 For example:
    2386 \begin{table}
    2387 \begin{center}
    2388 \begin{tabular}{|c|c|c|}
    2389 Mutex & \textbf{callsite-locking} & \textbf{entry-point-locking} \\
    2390 call & cfa-code & cfa-code \\
    2391 \hline
    2392 \begin{cfa}[tabsize=3]
    2393 void foo(monitor& mutex a){
    2394 
    2395         // Do Work
    2396         //...
    2397 
    2398 }
    2399 
    2400 void main() {
    2401         monitor a;
    2402 
    2403         foo(a);
    2404 
    2405 }
    2406 \end{cfa} & \begin{cfa}[tabsize=3]
    2407 foo(& a) {
    2408 
    2409         // Do Work
    2410         //...
    2411 
    2412 }
    2413 
    2414 main() {
    2415         monitor a;
    2416         acquire(a);
    2417         foo(a);
    2418         release(a);
    2419 }
    2420 \end{cfa} & \begin{cfa}[tabsize=3]
    2421 foo(& a) {
    2422         acquire(a);
    2423         // Do Work
    2424         //...
    2425         release(a);
    2426 }
    2427 
    2428 main() {
    2429         monitor a;
    2430 
    2431         foo(a);
    2432 
    2433 }
    2434 \end{cfa}
    2435 \end{tabular}
    2436 \end{center}
    2437 \caption{Call-site vs entry-point locking for mutex calls}
    2438 \label{tbl:locking-site}
    2439 \end{table}
    2440 
    2441 Note the @mutex@ keyword relies on the type system, which means that in cases where a generic monitor-routine is desired, writing the mutex routine is possible with the proper trait, \eg:
    2442 \begin{cfa}
    2443 // Incorrect: T may not be monitor
    2444 forall(dtype T)
    2445 void foo(T * mutex t);
    2446 
    2447 // Correct: this routine only works on monitors (any monitor)
    2448 forall(dtype T | is_monitor(T))
    2449 void bar(T * mutex t));
    2450 \end{cfa}
    2451 
    2452 Both entry point and \textbf{callsite-locking} are feasible implementations.
    2453 The current \CFA implementation uses entry-point locking because it requires less work when using \textbf{raii}, effectively transferring the burden of implementation to object construction/destruction.
    2454 It is harder to use \textbf{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, \ie the routine body.
    2455 For example, the monitor call can appear in the middle of an expression.
    2456 Furthermore, entry-point locking requires less code generation since any useful routine is called multiple times but there is only one entry point for many call sites.
    2457 
    2458 % ======================================================================
    2459 % ======================================================================
    2460 \section{Threading} \label{impl:thread}
    2461 % ======================================================================
    2462 % ======================================================================
    2463 
    2464 Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency.
    2465 Each component of the picture is explained in detail in the flowing sections.
    2466 
    2467 \begin{figure}
    2468 \begin{center}
    2469 {\resizebox{\textwidth}{!}{\input{system.pstex_t}}}
    2470 \end{center}
    2471 \caption{Overview of the entire system}
    2472 \label{fig:system1}
    2473 \end{figure}
    2474 
    2475 \subsection{Processors}
    2476 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically @pthread@s in the current implementation of \CFA.
    2477 Indeed, any parallelism must go through operating-system libraries.
    2478 However, \textbf{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism.
    2479 Indeed, processor \textbf{kthread} simply fetch a \textbf{uthread} from the scheduler and run it; they are effectively executers for user-threads.
    2480 The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling.
    2481 Processors internally use coroutines to take advantage of the existing context-switching semantics.
    2482 
    2483 \subsection{Stack Management}
    2484 One of the challenges of this system is to reduce the footprint as much as possible.
    2485 Specifically, all @pthread@s created also have a stack created with them, which should be used as much as possible.
    2486 Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \textbf{kthread} stack, effectively stealing the processor stack.
    2487 The exception to this rule is the Main Processor, \ie the initial \textbf{kthread} that is given to any program.
    2488 In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
    2489 
    2490 \subsection{Context Switching}
    2491 As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks.
    2492 To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific routine call.
    2493 This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread.
    2494 Note that the instruction pointer can be left untouched since the context-switch is always inside the same routine
    2495 Threads, however, do not context-switch between each other directly.
    2496 They context-switch to the scheduler.
    2497 This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen.
    2498 Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack.
    2499 The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread.
    2500 However, the performance of the 2-step context-switch is still superior to a @pthread_yield@ (see section \ref{results}).
    2501 Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft @SwitchToFiber@~\cite{switchToWindows} routine).
    2502 This option is not currently present in \CFA, but the changes required to add it are strictly additive.
    2503 
    2504 \subsection{Preemption} \label{preemption}
    2505 Finally, an important aspect for any complete threading system is preemption.
    2506 As mentioned in section \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution.
    2507 Indeed, preemption is desirable because it adds a degree of isolation among threads.
    2508 In a fully cooperative system, any thread that runs a long loop can starve other threads, while in a preemptive system, starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden.
    2509 Obviously, preemption is not optimal for every workload.
    2510 However any preemptive system can become a cooperative system by making the time slices extremely large.
    2511 Therefore, \CFA uses a preemptive threading system.
    2512 
    2513 Preemption in \CFA\footnote{Note that the implementation of preemption is strongly tied with the underlying threading system.
    2514 For this reason, only the Linux implementation is cover, \CFA does not run on Windows at the time of writting} is based on kernel timers, which are used to run a discrete-event simulation.
    2515 Every processor keeps track of the current time and registers an expiration time with the preemption system.
    2516 When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer.
    2517 These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread.
    2518 This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, \ie:
     2079
     2080To improve performance and simplicity, context switching occur inside a routine call, so only callee-saved registers are copied onto the stack and then the stack register is switched;
     2081the corresponding registers are then restored for the other context.
     2082Note, the instruction pointer is untouched since the context switch is always inside the same routine.
     2083Unlike coroutines, threads do not context switch among each other;
     2084they context switch to the cluster scheduler.
     2085This method is a 2-step context-switch and provides a clear distinction between user and kernel code, where scheduling and other system operations happen.
     2086The alternative 1-step context-switch uses the \emph{from} thread's stack to schedule and then context-switches directly to the \emph{to} thread's stack.
     2087Experimental results (not presented) show the performance difference between these two approaches is virtually equivalent, because the 1-step performance is dominated by a lock instruction to prevent a race condition.
     2088
     2089All kernel threads (@pthreads@) created a stack.
     2090Each \CFA virtual processor is implemented as a coroutine and these coroutines run directly on the kernel-thread stack, effectively stealing this stack.
     2091The exception to this rule is the program main, \ie the initial kernel thread that is given to any program.
     2092In order to respect C expectations, the stack of the initial kernel thread is used by program main rather than the main processor, allowing it to grow dynamically as in a normal C program.
     2093
     2094Finally, an important aspect for a complete threading system is preemption, which introduces extra non-determinism via transparent interleaving, rather than cooperation among threads for proper scheduling and processor fairness from long-running threads.
     2095Because preemption frequency is usually long, 1 millisecond, performance cost is negligible.
     2096Preemption is normally handled by setting a count-down timer on each virtual processor.
     2097When the timer expires, an interrupt is delivered, and the interrupt handler resets the count-down timer, and if the virtual processor is executing in user code, the signal handler performs a user-level context-switch, or if executing in the language runtime-kernel, the preemption is ignored or rolled forward to the point where the runtime kernel context switches back to user code.
     2098Multiple signal handlers may be pending.
     2099When control eventually switches back to the signal handler, it returns normally, and execution continues in the interrupted user thread, even though the return from the signal handler may be on a different kernel thread than the one where the signal was delivered.
     2100The only issue with this approach is that signal masks from one kernel thread may be restored on another as part of returning from the signal handler;
     2101therefore, all virtual processors in a cluster need to have the same signal mask.
     2102
     2103However, on current UNIX systems:
    25192104\begin{quote}
    25202105A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked.
     
    25222107SIGNAL(7) - Linux Programmer's Manual
    25232108\end{quote}
    2524 For the sake of simplicity, and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one.
    2525 
    2526 Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.
    2527 Hence, involuntary context-switching is done by sending signal {\tt SIGUSR1} to the corresponding proces\-sor and having the thread yield from inside the signal handler.
    2528 This approach effectively context-switches away from the signal handler back to the kernel and the signal handler frame is eventually unwound when the thread is scheduled again.
    2529 As a result, a signal handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread).
    2530 It is important to note that signal handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another.
    2531 This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' routines from other routines}.
    2532 However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks.
    2533 For this reason, the alarm thread is in a tight loop around a system call to @sigwaitinfo@, requiring very little CPU time for preemption.
    2534 One final detail about the alarm thread is how to wake it when additional communication is required (\eg on thread termination).
    2535 This unblocking is also done using {\tt SIGALRM}, but sent through the @pthread_sigqueue@.
    2536 Indeed, @sigwait@ can differentiate signals sent from @pthread_sigqueue@ from signals sent from alarms or the kernel.
    2537 
    2538 \subsection{Scheduler}
    2539 Finally, an aspect that was not mentioned yet is the scheduling algorithm.
    2540 Currently, the \CFA scheduler uses a single ready queue for all processors, which is the simplest approach to scheduling.
    2541 Further discussion on scheduling is present in section \ref{futur:sched}.
    2542 
    2543 % ======================================================================
    2544 % ======================================================================
    2545 \section{Internal Scheduling} \label{impl:intsched}
    2546 % ======================================================================
    2547 % ======================================================================
    2548 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience):
    2549 
    2550 \begin{figure}
    2551 \begin{center}
    2552 {\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    2553 \end{center}
    2554 \caption{Traditional illustration of a monitor}
    2555 \end{figure}
    2556 
    2557 This picture has several components, the two most important being the entry queue and the AS-stack.
    2558 The entry queue is an (almost) FIFO list where threads waiting to enter are parked, while the acceptor/signaller (AS) stack is a FILO list used for threads that have been signalled or otherwise marked as running next.
    2559 
    2560 For \CFA, this picture does not have support for blocking multiple monitors on a single condition.
    2561 To support bulk acquire two changes to this picture are required.
    2562 First, it is no longer helpful to attach the condition to \emph{a single} monitor.
    2563 Secondly, the thread waiting on the condition has to be separated across multiple monitors, seen in figure \ref{fig:monitor_cfa}.
    2564 
    2565 \begin{figure}
    2566 \begin{center}
    2567 {\resizebox{0.8\textwidth}{!}{\input{int_monitor}}}
    2568 \end{center}
    2569 \caption{Illustration of \CFA Monitor}
    2570 \label{fig:monitor_cfa}
    2571 \end{figure}
    2572 
    2573 This picture and the proper entry and leave algorithms (see listing \ref{f:entry2}) is the fundamental implementation of internal scheduling.
    2574 Note that when a thread is moved from the condition to the AS-stack, it is conceptually split into N pieces, where N is the number of monitors specified in the parameter list.
    2575 The thread is woken up when all the pieces have popped from the AS-stacks and made active.
    2576 In this picture, the threads are split into halves but this is only because there are two monitors.
    2577 For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
    2578 
    2579 \begin{figure}
    2580 \begin{multicols}{2}
    2581 Entry
    2582 \begin{cfa}
    2583 if monitor is free
    2584         enter
    2585 elif already own the monitor
    2586         continue
    2587 else
    2588         block
    2589 increment recursion
    2590 
    2591 \end{cfa}
    2592 \columnbreak
    2593 Exit
    2594 \begin{cfa}
    2595 decrement recursion
    2596 if recursion == 0
    2597         if signal_stack not empty
    2598                 set_owner to thread
    2599                 if all monitors ready
    2600                         wake-up thread
    2601 
    2602         if entry queue not empty
    2603                 wake-up thread
    2604 \end{cfa}
    2605 \end{multicols}
    2606 \begin{cfa}[caption={Entry and exit routine for monitors with internal scheduling},label={f:entry2}]
    2607 \end{cfa}
    2608 \end{figure}
    2609 
    2610 The solution discussed in \ref{s:InternalScheduling} can be seen in the exit routine of listing \ref{f:entry2}.
    2611 Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership.
    2612 This solution is deadlock safe as well as preventing any potential barging.
    2613 The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the @wait@ and @signal_block@ routines.
    2614 
    2615 \begin{figure}
    2616 \begin{center}
    2617 {\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}}
    2618 \end{center}
    2619 \caption{Data structures involved in internal/external scheduling}
    2620 \label{fig:structs}
    2621 \end{figure}
    2622 
    2623 Figure \ref{fig:structs} shows a high-level representation of these data structures.
    2624 The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors.
    2625 The @condition node@ is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each @condition criterion@ is moved to the AS-stack.
    2626 Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{f:entry2}.
    2627 
    2628 % ======================================================================
    2629 % ======================================================================
    2630 \section{External Scheduling}
    2631 % ======================================================================
    2632 % ======================================================================
    2633 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}.
    2634 For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (\ie no signal statement uses multiple conditions).
    2635 However, in the case of external scheduling, there is no equivalent object which is associated with @waitfor@ statements.
    2636 This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired.
    2637 These monitors being the only objects that have sufficient lifetime and are available on both sides of the @waitfor@ statement.
    2638 This requires an algorithm to choose which monitor holds the relevant queue.
    2639 It is also important that said algorithm be independent of the order in which users list parameters.
    2640 The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue.
    2641 This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
    2642 
    2643 This algorithm choice has two consequences:
    2644 \begin{itemize}
    2645         \item The queue of the monitor with the lowest address is no longer a true FIFO queue because threads can be moved to the front of the queue.
    2646 These queues need to contain a set of monitors for each of the waiting threads.
    2647 Therefore, another thread whose set contains the same lowest address monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
    2648         \item The queue of the lowest priority monitor is both required and potentially unused.
    2649 Indeed, since it is not known at compile time which monitor is the monitor which has the lowest address, every monitor needs to have the correct queues even though it is possible that some queues go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
    2650 \end{itemize}
    2651 Therefore, the following modifications need to be made to support external scheduling:
    2652 \begin{itemize}
    2653         \item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors.
    2654 The @mutex@ routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information.
    2655         \item The monitors need to keep a mask of acceptable routines.
    2656 This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it.
    2657 It also needs storage to keep track of which routine was accepted.
    2658 Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread.
    2659 Note that if a thread has acquired two monitors but executes a @waitfor@ with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless.
    2660 This becomes relevant when @when@ clauses affect the number of monitors passed to a @waitfor@ statement.
    2661         \item The entry/exit routines need to be updated as shown in listing \ref{f:entry3}.
    2662 \end{itemize}
    2663 
    2664 \subsection{External Scheduling - Destructors}
    2665 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine.
    2666 This routine is needed because of the storage requirements of the call order inversion.
    2667 Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for.
    2668 For regular @waitfor@ statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later.
    2669 The @waitfor@ semantics can then be adjusted correspondingly, as seen in listing \ref{f:entry-dtor}
    2670 
    2671 \begin{figure}
    2672 \begin{multicols}{2}
    2673 Entry
    2674 \begin{cfa}
    2675 if monitor is free
    2676         enter
    2677 elif already own the monitor
    2678         continue
    2679 elif matches waitfor mask
    2680         push criteria to AS-stack
    2681         continue
    2682 else
    2683         block
    2684 increment recursion
    2685 \end{cfa}
    2686 \columnbreak
    2687 Exit
    2688 \begin{cfa}
    2689 decrement recursion
    2690 if recursion == 0
    2691         if signal_stack not empty
    2692                 set_owner to thread
    2693                 if all monitors ready
    2694                         wake-up thread
    2695                 endif
    2696         endif
    2697 
    2698         if entry queue not empty
    2699                 wake-up thread
    2700         endif
    2701 \end{cfa}
    2702 \end{multicols}
    2703 \begin{cfa}[caption={Entry and exit routine for monitors with internal scheduling and external scheduling},label={f:entry3}]
    2704 \end{cfa}
    2705 \end{figure}
    2706 
    2707 \begin{figure}
    2708 \begin{multicols}{2}
    2709 Destructor Entry
    2710 \begin{cfa}
    2711 if monitor is free
    2712         enter
    2713 elif already own the monitor
    2714         increment recursion
    2715         return
    2716 create wait context
    2717 if matches waitfor mask
    2718         reset mask
    2719         push self to AS-stack
    2720         baton pass
    2721 else
    2722         wait
    2723 increment recursion
    2724 \end{cfa}
    2725 \columnbreak
    2726 Waitfor
    2727 \begin{cfa}
    2728 if matching thread is already there
    2729         if found destructor
    2730                 push destructor to AS-stack
    2731                 unlock all monitors
    2732         else
    2733                 push self to AS-stack
    2734                 baton pass
    2735         endif
    2736         return
    2737 endif
    2738 if non-blocking
    2739         Unlock all monitors
    2740         Return
    2741 endif
    2742 
    2743 push self to AS-stack
    2744 set waitfor mask
    2745 block
    2746 return
    2747 \end{cfa}
    2748 \end{multicols}
    2749 \begin{cfa}[caption={Pseudo code for the \protect\lstinline|waitfor| routine and the \protect\lstinline|mutex| entry routine for destructors},label={f:entry-dtor}]
    2750 \end{cfa}
    2751 \end{figure}
    2752 
    2753 
    2754 % ======================================================================
    2755 % ======================================================================
    2756 \section{Putting It All Together}
    2757 % ======================================================================
    2758 % ======================================================================
    2759 
    2760 
    2761 \section{Threads As Monitors}
    2762 As it was subtly alluded in section \ref{threads}, @thread@s in \CFA are in fact monitors, which means that all monitor features are available when using threads.
    2763 For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine:
    2764 \begin{figure}
    2765 \begin{cfa}[caption={Toy simulator using \protect\lstinline|thread|s and \protect\lstinline|monitor|s.},label={f:engine-v1}]
    2766 // Visualization declaration
    2767 thread Renderer {} renderer;
    2768 Frame * simulate( Simulator & this );
    2769 
    2770 // Simulation declaration
    2771 thread Simulator{} simulator;
    2772 void render( Renderer & this );
    2773 
    2774 // Blocking call used as communication
    2775 void draw( Renderer & mutex this, Frame * frame );
    2776 
    2777 // Simulation loop
    2778 void main( Simulator & this ) {
    2779         while( true ) {
    2780                 Frame * frame = simulate( this );
    2781                 draw( renderer, frame );
    2782         }
    2783 }
    2784 
    2785 // Rendering loop
    2786 void main( Renderer & this ) {
    2787         while( true ) {
    2788                 waitfor( draw, this );
    2789                 render( this );
    2790         }
    2791 }
    2792 \end{cfa}
    2793 \end{figure}
    2794 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever.
    2795 Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner:
    2796 \begin{figure}
    2797 \begin{cfa}[caption={Same toy simulator with proper termination condition.},label={f:engine-v2}]
    2798 // Visualization declaration
    2799 thread Renderer {} renderer;
    2800 Frame * simulate( Simulator & this );
    2801 
    2802 // Simulation declaration
    2803 thread Simulator{} simulator;
    2804 void render( Renderer & this );
    2805 
    2806 // Blocking call used as communication
    2807 void draw( Renderer & mutex this, Frame * frame );
    2808 
    2809 // Simulation loop
    2810 void main( Simulator & this ) {
    2811         while( true ) {
    2812                 Frame * frame = simulate( this );
    2813                 draw( renderer, frame );
    2814 
    2815                 // Exit main loop after the last frame
    2816                 if( frame->is_last ) break;
    2817         }
    2818 }
    2819 
    2820 // Rendering loop
    2821 void main( Renderer & this ) {
    2822         while( true ) {
    2823                    waitfor( draw, this );
    2824                 or waitfor( ^?{}, this ) {
    2825                         // Add an exit condition
    2826                         break;
    2827                 }
    2828 
    2829                 render( this );
    2830         }
    2831 }
    2832 
    2833 // Call destructor for simulator once simulator finishes
    2834 // Call destructor for renderer to signify shutdown
    2835 \end{cfa}
    2836 \end{figure}
    2837 
    2838 \section{Fibers \& Threads}
    2839 As mentioned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand.
    2840 Currently, using fibers is done by adding the following line of code to the program~:
    2841 \begin{cfa}
    2842 unsigned int default_preemption() {
    2843         return 0;
    2844 }
    2845 \end{cfa}
    2846 This routine is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice, \ie no preemption.
    2847 However, once clusters are fully implemented, it will be possible to create fibers and \textbf{uthread} in the same system, as in listing \ref{f:fiber-uthread}
    2848 \begin{figure}
    2849 \lstset{language=CFA,deletedelim=**[is][]{`}{`}}
    2850 \begin{cfa}[caption={Using fibers and \textbf{uthread} side-by-side in \CFA},label={f:fiber-uthread}]
    2851 // Cluster forward declaration
    2852 struct cluster;
    2853 
    2854 // Processor forward declaration
    2855 struct processor;
    2856 
    2857 // Construct clusters with a preemption rate
    2858 void ?{}(cluster& this, unsigned int rate);
    2859 // Construct processor and add it to cluster
    2860 void ?{}(processor& this, cluster& cluster);
    2861 // Construct thread and schedule it on cluster
    2862 void ?{}(thread& this, cluster& cluster);
    2863 
    2864 // Declare two clusters
    2865 cluster thread_cluster = { 10`ms };                     // Preempt every 10 ms
    2866 cluster fibers_cluster = { 0 };                         // Never preempt
    2867 
    2868 // Construct 4 processors
    2869 processor processors[4] = {
    2870         //2 for the thread cluster
    2871         thread_cluster;
    2872         thread_cluster;
    2873         //2 for the fibers cluster
    2874         fibers_cluster;
    2875         fibers_cluster;
    2876 };
    2877 
    2878 // Declares thread
    2879 thread UThread {};
    2880 void ?{}(UThread& this) {
    2881         // Construct underlying thread to automatically
    2882         // be scheduled on the thread cluster
    2883         (this){ thread_cluster }
    2884 }
    2885 
    2886 void main(UThread & this);
    2887 
    2888 // Declares fibers
    2889 thread Fiber {};
    2890 void ?{}(Fiber& this) {
    2891         // Construct underlying thread to automatically
    2892         // be scheduled on the fiber cluster
    2893         (this.__thread){ fibers_cluster }
    2894 }
    2895 
    2896 void main(Fiber & this);
    2897 \end{cfa}
    2898 \end{figure}
    2899 
    2900 
    2901 % ======================================================================
    2902 % ======================================================================
    2903 \section{Performance Results} \label{results}
    2904 % ======================================================================
    2905 % ======================================================================
    2906 \section{Machine Setup}
    2907 Table \ref{tab:machine} shows the characteristics of the machine used to run the benchmarks.
    2908 All tests were made on this machine.
     2109Hence, the timer-expiry signal, which is generated \emph{externally} by the UNIX kernel to the UNIX process, is delivered to any of its UNIX subprocesses (kernel threads).
     2110To ensure each virtual processor receives its own preemption signals, a discrete-event simulation is run on a special virtual processor, and only it sets and receives timer events.
     2111Virtual processors register an expiration time with the discrete-event simulator, which is inserted in sorted order.
     2112The simulation sets the count-down timer to the value at the head of the event list, and when the timer expires, all events less than or equal to the current time are processed.
     2113Processing a preemption event sends an \emph{internal} @SIGUSR1@ signal to the registered virtual processor, which is always delivered to that processor.
     2114
     2115
     2116\section{Performance}
     2117\label{results}
     2118
     2119To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with other widely used programming languages with concurrency.
     2120Table~\ref{t:machine} shows the specifications of the computer used to run the benchmarks, and the versions of the software used in the comparison.
     2121
    29092122\begin{table}
    2910 \begin{center}
    2911 \begin{tabular}{| l | r | l | r |}
     2123\centering
     2124\caption{Experiment environment}
     2125\label{t:machine}
     2126
     2127\begin{tabular}{|l|r||l|r|}
    29122128\hline
    2913 Architecture            & x86\_64                       & NUMA node(s)  & 8 \\
     2129Architecture            & x86\_64                               & NUMA node(s)  & 8 \\
    29142130\hline
    2915 CPU op-mode(s)          & 32-bit, 64-bit                & Model name    & AMD Opteron\texttrademark  Processor 6380 \\
     2131CPU op-mode(s)          & 32-bit, 64-bit                & Model name    & AMD Opteron\texttrademark\ Processor 6380 \\
    29162132\hline
    2917 Byte Order                      & Little Endian                 & CPU Freq              & 2.5\si{\giga\hertz} \\
     2133Byte Order                      & Little Endian                 & CPU Freq              & 2.5 GHz \\
    29182134\hline
    2919 CPU(s)                  & 64                            & L1d cache     & \SI{16}{\kibi\byte} \\
     2135CPU(s)                          & 64                                    & L1d cache     & 16 KiB \\
    29202136\hline
    2921 Thread(s) per core      & 2                             & L1i cache     & \SI{64}{\kibi\byte} \\
     2137Thread(s) per core      & 2                                     & L1i cache     & 64 KiB \\
    29222138\hline
    2923 Core(s) per socket      & 8                             & L2 cache              & \SI{2048}{\kibi\byte} \\
     2139Core(s) per socket      & 8                                     & L2 cache              & 2048 KiB \\
    29242140\hline
    2925 Socket(s)                       & 4                             & L3 cache              & \SI{6144}{\kibi\byte} \\
     2141Socket(s)                       & 4                                     & L3 cache              & 6144 KiB \\
    29262142\hline
    29272143\hline
    2928 Operating system                & Ubuntu 16.04.3 LTS    & Kernel                & Linux 4.4-97-generic \\
     2144Operating system        & Ubuntu 16.04.3 LTS    & Kernel                & Linux 4.4-97-generic \\
    29292145\hline
    2930 Compiler                        & GCC 6.3               & Translator    & CFA 1 \\
     2146gcc                                     & 6.3                                   & \CFA                  & 1.0.0 \\
    29312147\hline
    2932 Java version            & OpenJDK-9             & Go version    & 1.9.2 \\
     2148Java                            & OpenJDK-9                     & Go                    & 1.9.2 \\
    29332149\hline
    29342150\end{tabular}
    2935 \end{center}
    2936 \caption{Machine setup used for the tests}
    2937 \label{tab:machine}
    29382151\end{table}
    29392152
    2940 \section{Micro Benchmarks}
    2941 All benchmarks are run using the same harness to produce the results, seen as the @BENCH()@ macro in the following examples.
    2942 This macro uses the following logic to benchmark the code:
    2943 \begin{cfa}
    2944 #define BENCH(run, result) \
    2945         before = gettime(); \
    2946         run; \
    2947         after  = gettime(); \
    2948         result = (after - before) / N;
    2949 \end{cfa}
    2950 The method used to get time is @clock_gettime(CLOCK_THREAD_CPUTIME_ID);@.
    2951 Each benchmark is using many iterations of a simple call to measure the cost of the call.
    2952 The specific number of iterations depends on the specific benchmark.
    2953 
    2954 \subsection{Context-Switching}
    2955 The first interesting benchmark is to measure how long context-switches take.
    2956 The simplest approach to do this is to yield on a thread, which executes a 2-step context switch.
    2957 Yielding causes the thread to context-switch to the scheduler and back, more precisely: from the \textbf{uthread} to the \textbf{kthread} then from the \textbf{kthread} back to the same \textbf{uthread} (or a different one in the general case).
    2958 In order to make the comparison fair, coroutines also execute a 2-step context-switch by resuming another coroutine which does nothing but suspending in a tight loop, which is a resume/suspend cycle instead of a yield.
    2959 Figure~\ref{f:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}.
    2960 All omitted tests are functionally identical to one of these tests.
    2961 The difference between coroutines and threads can be attributed to the cost of scheduling.
     2153All benchmarks are run using the following harness:
     2154\begin{cfa}
     2155unsigned int N = 10_000_000;
     2156#define BENCH( run ) Time before = getTimeNsec(); run; Duration result = (getTimeNsec() - before) / N;
     2157\end{cfa}
     2158The method used to get time is @clock_gettime( CLOCK_REALTIME )@.
     2159Each benchmark is performed @N@ times, where @N@ varies depending on the benchmark, the total time is divided by @N@ to obtain the average time for a benchmark.
     2160All omitted tests for other languages are functionally identical to the shown \CFA test.
     2161
     2162
     2163\paragraph{Context-Switching}
     2164
     2165In procedural programming, the cost of a routine call is important as modularization (refactoring) increases.
     2166(In many cases, a compiler inlines routine calls to eliminate this cost.)
     2167Similarly, when modularization extends to coroutines/tasks, the time for a context switch becomes a relevant factor.
     2168The coroutine context-switch is 2-step using resume/suspend, \ie from resumer to suspender and from suspender to resumer.
     2169The thread context switch is 2-step using yield, \ie enter and return from the runtime kernel.
     2170Figure~\ref{f:ctx-switch} shows the code for coroutines/threads with all results in Table~\ref{tab:ctx-switch}.
     2171The difference in performance between coroutine and thread context-switch is the cost of scheduling for threads, whereas coroutines are self-scheduling.
     2172
    29622173\begin{figure}
    2963 \begin{multicols}{2}
    2964 \CFA Coroutines
    2965 \begin{cfa}
    2966 coroutine GreatSuspender {};
    2967 void main(GreatSuspender& this) {
    2968         while(true) { suspend(); }
    2969 }
     2174\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2175
     2176\newbox\myboxA
     2177\begin{lrbox}{\myboxA}
     2178\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2179coroutine C {} c;
     2180void main( C & ) { for ( ;; ) { @suspend();@ } }
    29702181int main() {
    2971         GreatSuspender s;
    2972         resume(s);
    29732182        BENCH(
    2974                 for(size_t i=0; i<n; i++) {
    2975                         resume(s);
    2976                 },
    2977                 result
    2978         )
    2979         printf("%llu\n", result);
    2980 }
    2981 \end{cfa}
    2982 \columnbreak
    2983 \CFA Threads
    2984 \begin{cfa}
    2985 
    2986 
     2183                for ( size_t i = 0; i < N; i += 1 ) { @resume( c );@ } )
     2184        sout | result`ns | endl;
     2185}
     2186\end{cfa}
     2187\end{lrbox}
     2188
     2189\newbox\myboxB
     2190\begin{lrbox}{\myboxB}
     2191\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    29872192
    29882193
    29892194int main() {
    2990 
    2991 
    29922195        BENCH(
    2993                 for(size_t i=0; i<n; i++) {
    2994                         yield();
    2995                 },
    2996                 result
    2997         )
    2998         printf("%llu\n", result);
    2999 }
    3000 \end{cfa}
    3001 \end{multicols}
    3002 \begin{cfa}[caption={\CFA benchmark code used to measure context-switches for coroutines and threads.},label={f:ctx-switch}]
    3003 \end{cfa}
    3004 \end{figure}
    3005 
    3006 \begin{table}
    3007 \begin{center}
    3008 \begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
     2196                for ( size_t i = 0; i < N; i += 1 ) { @yield();@ } )
     2197        sout | result`ns | endl;
     2198}
     2199\end{cfa}
     2200\end{lrbox}
     2201
     2202\subfloat[Coroutine]{\usebox\myboxA}
     2203\quad
     2204\subfloat[Thread]{\usebox\myboxB}
     2205\captionof{figure}{\CFA context-switch benchmark}
     2206\label{f:ctx-switch}
     2207
     2208\centering
     2209
     2210\captionof{table}{Context switch comparison (nanoseconds)}
     2211\label{tab:ctx-switch}
     2212\bigskip
     2213\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
    30092214\cline{2-4}
    3010 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
     2215\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
    30112216\hline
    3012 Kernel Thread   & 241.5 & 243.86        & 5.08 \\
     2217Kernel Thread   & 241.5         & 243.86        & 5.08 \\
    30132218\CFA Coroutine  & 38            & 38            & 0    \\
    30142219\CFA Thread             & 103           & 102.96        & 2.96 \\
    3015 \uC Coroutine   & 46            & 45.86 & 0.35 \\
    3016 \uC Thread              & 98            & 99.11 & 1.42 \\
     2220\uC Coroutine   & 46            & 45.86         & 0.35 \\
     2221\uC Thread              & 98            & 99.11         & 1.42 \\
    30172222Goroutine               & 150           & 149.96        & 3.16 \\
    30182223Java Thread             & 289           & 290.68        & 8.72 \\
    30192224\hline
    30202225\end{tabular}
    3021 \end{center}
    3022 \caption{Context Switch comparison.
    3023 All numbers are in nanoseconds(\si{\nano\second})}
    3024 \label{tab:ctx-switch}
    3025 \end{table}
    3026 
    3027 \subsection{Mutual-Exclusion}
    3028 The next interesting benchmark is to measure the overhead to enter/leave a critical-section.
    3029 For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine.
    3030 Figure~\ref{f:mutex} shows the code for \CFA.
    3031 To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
    3032 The results can be shown in table \ref{tab:mutex}.
    3033 
    3034 \begin{figure}
    3035 \begin{cfa}[caption={\CFA benchmark code used to measure mutex routines.},label={f:mutex}]
    3036 monitor M {};
    3037 void __attribute__((noinline)) call( M & mutex m /*, m2, m3, m4*/ ) {}
    3038 
     2226
     2227\bigskip
     2228\bigskip
     2229
     2230\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2231\begin{cfa}
     2232monitor M {} m1/*, m2, m3, m4*/;
     2233void __attribute__((noinline)) do_call( M & mutex m/*, m2, m3, m4*/ ) {}
    30392234int main() {
    3040         M m/*, m2, m3, m4*/;
    3041         BENCH(
    3042                 for(size_t i=0; i<n; i++) {
    3043                         call(m/*, m2, m3, m4*/);
    3044                 },
    3045                 result
    3046         )
    3047         printf("%llu\n", result);
    3048 }
    3049 \end{cfa}
    3050 \end{figure}
    3051 
    3052 \begin{table}
    3053 \begin{center}
    3054 \begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
     2235        BENCH( for( size_t i = 0; i < N; i += 1 ) { @do_call( m1/*, m2, m3, m4*/ );@ } )
     2236        sout | result`ns | endl;
     2237}
     2238\end{cfa}
     2239\captionof{figure}{\CFA acquire/release mutex benchmark}
     2240\label{f:mutex}
     2241
     2242\centering
     2243
     2244\captionof{table}{Mutex comparison (nanoseconds)}
     2245\label{tab:mutex}
     2246\bigskip
     2247
     2248\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
    30552249\cline{2-4}
    3056 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
     2250\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
    30572251\hline
    3058 C routine                                               & 2             & 2             & 0    \\
    3059 FetchAdd + FetchSub                             & 26            & 26            & 0    \\
    3060 Pthreads Mutex Lock                             & 31            & 31.86 & 0.99 \\
     2252C routine                                                       & 2                     & 2                     & 0    \\
     2253FetchAdd + FetchSub                                     & 26            & 26            & 0    \\
     2254Pthreads Mutex Lock                                     & 31            & 31.86         & 0.99 \\
    30612255\uC @monitor@ member routine            & 30            & 30            & 0    \\
    3062 \CFA @mutex@ routine, 1 argument        & 41            & 41.57 & 0.9  \\
    3063 \CFA @mutex@ routine, 2 argument        & 76            & 76.96 & 1.57 \\
     2256\CFA @mutex@ routine, 1 argument        & 41            & 41.57         & 0.9  \\
     2257\CFA @mutex@ routine, 2 argument        & 76            & 76.96         & 1.57 \\
    30642258\CFA @mutex@ routine, 4 argument        & 145           & 146.68        & 3.85 \\
    3065 Java synchronized routine                       & 27            & 28.57 & 2.6  \\
     2259Java synchronized routine                       & 27            & 28.57         & 2.6  \\
    30662260\hline
    30672261\end{tabular}
    3068 \end{center}
    3069 \caption{Mutex routine comparison.
    3070 All numbers are in nanoseconds(\si{\nano\second})}
    3071 \label{tab:mutex}
    3072 \end{table}
    3073 
    3074 \subsection{Internal Scheduling}
    3075 The internal-scheduling benchmark measures the cost of waiting on and signalling a condition variable.
    3076 Figure~\ref{f:int-sched} shows the code for \CFA, with results table \ref{tab:int-sched}.
    3077 As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
     2262\end{figure}
     2263
     2264
     2265\paragraph{Mutual-Exclusion}
     2266
     2267Mutual exclusion is measured by entering/leaving a critical section.
     2268For monitors, entering and leaving a monitor routine is measured.
     2269Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
     2270To put the results in context, the cost of entering a non-inline routine and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
     2271Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
     2272
     2273
     2274\paragraph{Internal Scheduling}
     2275
     2276Internal scheduling is measured by waiting on and signalling a condition variable.
     2277Figure~\ref{f:int-sched} shows the code for \CFA, with results in Table~\ref{tab:int-sched}.
     2278Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    30782279
    30792280\begin{figure}
    3080 \begin{cfa}[caption={Benchmark code for internal scheduling},label={f:int-sched}]
     2281\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2282\begin{cfa}
    30812283volatile int go = 0;
    30822284condition c;
    3083 monitor M {};
    3084 M m1;
    3085 
    3086 void __attribute__((noinline)) do_call( M & mutex a1 ) { signal(c); }
    3087 
     2285monitor M {} m;
     2286void __attribute__((noinline)) do_call( M & mutex a1 ) { signal( c ); }
    30882287thread T {};
    3089 void ^?{}( T & mutex this ) {}
    30902288void main( T & this ) {
    3091         while(go == 0) { yield(); }
    3092         while(go == 1) { do_call(m1); }
    3093 }
    3094 int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
    3095         go = 1;
    3096         BENCH(
    3097                 for(size_t i=0; i<n; i++) {
    3098                         wait(c);
    3099                 },
    3100                 result
    3101         )
    3102         printf("%llu\n", result);
    3103         go = 0;
    3104         return 0;
     2289        while ( go == 0 ) { yield(); }  // wait for other thread to start
     2290        while ( go == 1 ) { @do_call( m );@ }
     2291}
     2292int  __attribute__((noinline)) do_wait( M & mutex m ) {
     2293        go = 1; // continue other thread
     2294        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @wait( c );@ } );
     2295        go = 0; // stop other thread
     2296        sout | result`ns | endl;
    31052297}
    31062298int main() {
    31072299        T t;
    3108         return do_wait(m1);
    3109 }
    3110 \end{cfa}
    3111 \end{figure}
    3112 
    3113 \begin{table}
    3114 \begin{center}
    3115 \begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
     2300        do_wait( m );
     2301}
     2302\end{cfa}
     2303\captionof{figure}{\CFA Internal scheduling benchmark}
     2304\label{f:int-sched}
     2305
     2306\centering
     2307\captionof{table}{Internal scheduling comparison (nanoseconds)}
     2308\label{tab:int-sched}
     2309\bigskip
     2310
     2311\begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
    31162312\cline{2-4}
    3117 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
     2313\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
    31182314\hline
    3119 Pthreads Condition Variable                     & 5902.5        & 6093.29       & 714.78 \\
    3120 \uC @signal@                                    & 322           & 323   & 3.36   \\
    3121 \CFA @signal@, 1 @monitor@      & 352.5 & 353.11        & 3.66   \\
    3122 \CFA @signal@, 2 @monitor@      & 430           & 430.29        & 8.97   \\
    3123 \CFA @signal@, 4 @monitor@      & 594.5 & 606.57        & 18.33  \\
    3124 Java @notify@                           & 13831.5       & 15698.21      & 4782.3 \\
     2315Pthreads Condition Variable             & 5902.5        & 6093.29       & 714.78 \\
     2316\uC @signal@                                    & 322           & 323           & 3.36   \\
     2317\CFA @signal@, 1 @monitor@              & 352.5         & 353.11        & 3.66   \\
     2318\CFA @signal@, 2 @monitor@              & 430           & 430.29        & 8.97   \\
     2319\CFA @signal@, 4 @monitor@              & 594.5         & 606.57        & 18.33  \\
     2320Java @notify@                                   & 13831.5       & 15698.21      & 4782.3 \\
    31252321\hline
    31262322\end{tabular}
    3127 \end{center}
    3128 \caption{Internal scheduling comparison.
    3129 All numbers are in nanoseconds(\si{\nano\second})}
    3130 \label{tab:int-sched}
    3131 \end{table}
    3132 
    3133 \subsection{External Scheduling}
    3134 The Internal scheduling benchmark measures the cost of the @waitfor@ statement (@_Accept@ in \uC).
    3135 Figure~\ref{f:ext-sched} shows the code for \CFA, with results in table \ref{tab:ext-sched}.
    3136 As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
     2323\end{figure}
     2324
     2325
     2326\paragraph{External Scheduling}
     2327
     2328External scheduling is measured by accepting a call using the @waitfor@ statement (@_Accept@ in \uC).
     2329Figure~\ref{f:ext-sched} shows the code for \CFA, with results in Table~\ref{tab:ext-sched}.
     2330Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    31372331
    31382332\begin{figure}
    3139 \begin{cfa}[caption={Benchmark code for external scheduling},label={f:ext-sched}]
     2333\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2334\begin{cfa}
    31402335volatile int go = 0;
    3141 monitor M {};
    3142 M m1;
     2336monitor M {} m;
    31432337thread T {};
    3144 
    3145 void __attribute__((noinline)) do_call( M & mutex a1 ) {}
    3146 
    3147 void ^?{}( T & mutex this ) {}
    3148 void main( T & this ) {
    3149         while(go == 0) { yield(); }
    3150         while(go == 1) { do_call(m1); }
    3151 }
    3152 int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
    3153         go = 1;
    3154         BENCH(
    3155                 for(size_t i=0; i<n; i++) {
    3156                         waitfor(call, a1);
    3157                 },
    3158                 result
    3159         )
    3160         printf("%llu\n", result);
    3161         go = 0;
    3162         return 0;
     2338void __attribute__((noinline)) do_call( M & mutex ) {}
     2339void main( T & ) {
     2340        while ( go == 0 ) { yield(); }  // wait for other thread to start
     2341        while ( go == 1 ) { @do_call( m );@ }
     2342}
     2343int __attribute__((noinline)) do_wait( M & mutex m ) {
     2344        go = 1; // continue other thread
     2345        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @waitfor( do_call, m );@ } )
     2346        go = 0; // stop other thread
     2347        sout | result`ns | endl;
    31632348}
    31642349int main() {
    31652350        T t;
    3166         return do_wait(m1);
    3167 }
    3168 \end{cfa}
    3169 \end{figure}
    3170 
    3171 \begin{table}
    3172 \begin{center}
    3173 \begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
     2351        do_wait( m );
     2352}
     2353\end{cfa}
     2354\captionof{figure}{\CFA external scheduling benchmark}
     2355\label{f:ext-sched}
     2356
     2357\centering
     2358
     2359\captionof{table}{External scheduling comparison (nanoseconds)}
     2360\label{tab:ext-sched}
     2361\bigskip
     2362\begin{tabular}{|r|*{3}{D{.}{.}{3.2}|}}
    31742363\cline{2-4}
    3175 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
     2364\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} &\multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
    31762365\hline
    3177 \uC @Accept@                                    & 350           & 350.61        & 3.11  \\
    3178 \CFA @waitfor@, 1 @monitor@     & 358.5 & 358.36        & 3.82  \\
     2366\uC @_Accept@                           & 350           & 350.61        & 3.11  \\
     2367\CFA @waitfor@, 1 @monitor@     & 358.5         & 358.36        & 3.82  \\
    31792368\CFA @waitfor@, 2 @monitor@     & 422           & 426.79        & 7.95  \\
    3180 \CFA @waitfor@, 4 @monitor@     & 579.5 & 585.46        & 11.25 \\
     2369\CFA @waitfor@, 4 @monitor@     & 579.5         & 585.46        & 11.25 \\
    31812370\hline
    31822371\end{tabular}
    3183 \end{center}
    3184 \caption{External scheduling comparison.
    3185 All numbers are in nanoseconds(\si{\nano\second})}
    3186 \label{tab:ext-sched}
    3187 \end{table}
    3188 
    3189 
    3190 \subsection{Object Creation}
    3191 Finally, the last benchmark measures the cost of creation for concurrent objects.
    3192 Figure~\ref{f:creation} shows the code for @pthread@s and \CFA threads, with results shown in table \ref{tab:creation}.
    3193 As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
    3194 The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
    3195 
    3196 \begin{figure}
    3197 \begin{center}
    3198 @pthread@
    3199 \begin{cfa}
     2372
     2373\bigskip
     2374\medskip
     2375
     2376\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2377\begin{cfa}
     2378thread MyThread {};
     2379void main( MyThread & ) {}
    32002380int main() {
    3201         BENCH(
    3202                 for(size_t i=0; i<n; i++) {
    3203                         pthread_t thread;
    3204                         if(pthread_create(&thread,NULL,foo,NULL)<0) {
    3205                                 perror( "failure" );
    3206                                 return 1;
    3207                         }
    3208 
    3209                         if(pthread_join(thread, NULL)<0) {
    3210                                 perror( "failure" );
    3211                                 return 1;
    3212                         }
    3213                 },
    3214                 result
    3215         )
    3216         printf("%llu\n", result);
    3217 }
    3218 \end{cfa}
    3219 
    3220 
    3221 
    3222 \CFA Threads
    3223 \begin{cfa}
    3224 int main() {
    3225         BENCH(
    3226                 for(size_t i=0; i<n; i++) {
    3227                         MyThread m;
    3228                 },
    3229                 result
    3230         )
    3231         printf("%llu\n", result);
    3232 }
    3233 \end{cfa}
    3234 \end{center}
    3235 \caption{Benchmark code for \protect\lstinline|pthread|s and \CFA to measure object creation}
     2381        BENCH( for ( size_t i = 0; i < N; i += 1 ) { @MyThread m;@ } )
     2382        sout | result`ns | endl;
     2383}
     2384\end{cfa}
     2385\captionof{figure}{\CFA object creation benchmark}
    32362386\label{f:creation}
    3237 \end{figure}
    3238 
    3239 \begin{table}
    3240 \begin{center}
    3241 \begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
     2387
     2388\centering
     2389
     2390\captionof{table}{Creation comparison (nanoseconds)}
     2391\label{tab:creation}
     2392\bigskip
     2393
     2394\begin{tabular}{|r|*{3}{D{.}{.}{5.2}|}}
    32422395\cline{2-4}
    3243 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
     2396\multicolumn{1}{c|}{} & \multicolumn{1}{c|}{Median} & \multicolumn{1}{c|}{Average} & \multicolumn{1}{c|}{Std Dev} \\
    32442397\hline
    3245 Pthreads                        & 26996 & 26984.71      & 156.6  \\
    3246 \CFA Coroutine Lazy     & 6             & 5.71  & 0.45   \\
     2398Pthreads                                & 26996         & 26984.71      & 156.6  \\
     2399\CFA Coroutine Lazy             & 6                     & 5.71          & 0.45   \\
    32472400\CFA Coroutine Eager    & 708           & 706.68        & 4.82   \\
    3248 \CFA Thread                     & 1173.5        & 1176.18       & 15.18  \\
    3249 \uC Coroutine           & 109           & 107.46        & 1.74   \\
    3250 \uC Thread                      & 526           & 530.89        & 9.73   \\
    3251 Goroutine                       & 2520.5        & 2530.93       & 61,56  \\
    3252 Java Thread                     & 91114.5       & 92272.79      & 961.58 \\
     2401\CFA Thread                             & 1173.5        & 1176.18       & 15.18  \\
     2402\uC Coroutine                   & 109           & 107.46        & 1.74   \\
     2403\uC Thread                              & 526           & 530.89        & 9.73   \\
     2404Goroutine                               & 2520.5        & 2530.93       & 61.56  \\
     2405Java Thread                             & 91114.5       & 92272.79      & 961.58 \\
    32532406\hline
    32542407\end{tabular}
    3255 \end{center}
    3256 \caption{Creation comparison.
    3257 All numbers are in nanoseconds(\si{\nano\second}).}
    3258 \label{tab:creation}
    3259 \end{table}
    3260 
     2408\end{figure}
     2409
     2410
     2411\paragraph{Object Creation}
     2412
     2413Object creation is measured by creating/deleting the specific kind of concurrent object.
     2414Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
     2415The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
    32612416
    32622417
    32632418\section{Conclusion}
    3264 This paper has achieved a minimal concurrency \textbf{api} that is simple, efficient and usable as the basis for higher-level features.
    3265 The approach presented is based on a lightweight thread-system for parallelism, which sits on top of clusters of processors.
    3266 This M:N model is judged to be both more efficient and allow more flexibility for users.
    3267 Furthermore, this document introduces monitors as the main concurrency tool for users.
    3268 This paper also offers a novel approach allowing multiple monitors to be accessed simultaneously without running into the Nested Monitor Problem~\cite{Lister77}.
    3269 It also offers a full implementation of the concurrency runtime written entirely in \CFA, effectively the largest \CFA code base to date.
    3270 
    3271 
    3272 % ======================================================================
    3273 % ======================================================================
     2419
     2420This paper demonstrate a concurrency API that is simple, efficient, and able to build higher-level concurrency features.
     2421The approach provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism.
     2422The M:N model is judged to be efficient and provide greater flexibility than a 1:1 threading model.
     2423High-level objects (monitor/task) are the core mechanism for mutual exclusion and synchronization.
     2424A novel aspect is allowing multiple mutex-objects to be accessed simultaneously reducing the potential for deadlock for this complex scenario.
     2425These concepts and the entire \CFA runtime-system are written in the \CFA language, demonstrating the expressiveness of the \CFA language.
     2426Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all low-level operations, which translates directly into good performance in well-written concurrent applications.
     2427C programmers should feel comfortable using these mechanisms for developing concurrent applications, with the ability to obtain maximum available performance by mechanisms at the appropriate level.
     2428
     2429
    32742430\section{Future Work}
    3275 % ======================================================================
    3276 % ======================================================================
    3277 
    3278 \subsection{Performance} \label{futur:perf}
    3279 This paper presents a first implementation of the \CFA concurrency runtime.
    3280 Therefore, there is still significant work to improve performance.
    3281 Many of the data structures and algorithms may change in the future to more efficient versions.
    3282 For example, the number of monitors in a single bulk acquire is only bound by the stack size, this is probably unnecessarily generous.
    3283 It may be possible that limiting the number helps increase performance.
    3284 However, it is not obvious that the benefit would be significant.
    3285 
    3286 \subsection{Flexible Scheduling} \label{futur:sched}
     2431
     2432While concurrency in \CFA has a strong start, development is still underway and there are missing features.
     2433
     2434\paragraph{Flexible Scheduling}
     2435\label{futur:sched}
     2436
    32872437An important part of concurrency is scheduling.
    32882438Different scheduling algorithms can affect performance (both in terms of average and variation).
    32892439However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs.
    3290 One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted to the requirements of the workload.
    3291 However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbitrary scheduling algorithms.
    3292 For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server.
    3293 This path of flexible schedulers will be explored for \CFA.
    3294 
    3295 \subsection{Non-Blocking I/O} \label{futur:nbio}
    3296 While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service).
    3297 These types of workloads often require significant engineering around amortizing costs of blocking IO operations.
    3298 At its core, non-blocking I/O is an operating system level feature that allows queuing IO operations (\eg network operations) and registering for notifications instead of waiting for requests to complete.
    3299 In this context, the role of the language makes Non-Blocking IO easily available and with low overhead.
    3300 The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python.
    3301 However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
    3302 
    3303 \subsection{Other Concurrency Tools} \label{futur:tools}
    3304 While monitors offer a flexible and powerful concurrent core for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package.
    3305 Examples of such tools can include simple locks and condition variables, futures and promises~\cite{promises}, executors and actors.
     2440One solution is to offer various tweaking options, allowing the scheduler to be adjusted to the requirements of the workload.
     2441However, to be truly flexible, it is necessary to have a pluggable scheduler.
     2442Currently, the \CFA pluggable scheduler is too simple to handle complex scheduling, \eg quality of service and real-time, where the scheduler must interact with mutex objects to deal with issues like priority inversion.
     2443
     2444\paragraph{Non-Blocking I/O}
     2445\label{futur:nbio}
     2446
     2447Many modern workloads are not bound by computation but IO operations, a common case being web servers and XaaS~\cite{XaaS} (anything as a service).
     2448These types of workloads require significant engineering to amortizing costs of blocking IO-operations.
     2449At its core, non-blocking I/O is an operating-system level feature queuing IO operations, \eg network operations, and registering for notifications instead of waiting for requests to complete.
     2450Current trends use asynchronous programming like callbacks, futures, and/or promises, \eg Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java, and Django~\cite{Django} for Python.
     2451However, these solutions lead to code that is hard create, read and maintain.
     2452A better approach is to tie non-blocking I/O into the concurrency system to provide ease of use with low overhead, \eg thread-per-connection web-services.
     2453A non-blocking I/O library is currently under development for \CFA.
     2454
     2455\paragraph{Other Concurrency Tools}
     2456\label{futur:tools}
     2457
     2458While monitors offer a flexible and powerful concurrent for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package.
     2459Examples of such tools can include futures and promises~\cite{promises}, executors and actors.
    33062460These additional features are useful when monitors offer a level of abstraction that is inadequate for certain tasks.
    33072461
    3308 \subsection{Implicit Threading} \label{futur:implcit}
    3309 Simpler applications can benefit greatly from having implicit parallelism.
    3310 That is, parallelism that does not rely on the user to write concurrency.
    3311 This type of parallelism can be achieved both at the language level and at the library level.
    3312 The canonical example of implicit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithms~\cite{uC++book}.
    3313 Table \ref{f:parfor} shows three different code examples that accomplish point-wise sums of large arrays.
    3314 Note that none of these examples explicitly declare any concurrency or parallelism objects.
    3315 
    3316 \begin{table}
    3317 \begin{center}
    3318 \begin{tabular}[t]{|c|c|c|}
    3319 Sequential & Library Parallel & Language Parallel \\
    3320 \begin{cfa}[tabsize=3]
    3321 void big_sum(
    3322         int* a, int* b,
    3323         int* o,
    3324         size_t len)
    3325 {
    3326         for(
    3327                 int i = 0;
    3328                 i < len;
    3329                 ++i )
    3330         {
    3331                 o[i]=a[i]+b[i];
    3332         }
    3333 }
    3334 
    3335 
    3336 
    3337 
    3338 
    3339 int* a[10000];
    3340 int* b[10000];
    3341 int* c[10000];
    3342 //... fill in a & b
    3343 big_sum(a,b,c,10000);
    3344 \end{cfa} &\begin{cfa}[tabsize=3]
    3345 void big_sum(
    3346         int* a, int* b,
    3347         int* o,
    3348         size_t len)
    3349 {
    3350         range ar(a, a+len);
    3351         range br(b, b+len);
    3352         range or(o, o+len);
    3353         parfor( ai, bi, oi,
    3354         [](     int* ai,
    3355                 int* bi,
    3356                 int* oi)
    3357         {
    3358                 oi=ai+bi;
    3359         });
    3360 }
    3361 
    3362 
    3363 int* a[10000];
    3364 int* b[10000];
    3365 int* c[10000];
    3366 //... fill in a & b
    3367 big_sum(a,b,c,10000);
    3368 \end{cfa}&\begin{cfa}[tabsize=3]
    3369 void big_sum(
    3370         int* a, int* b,
    3371         int* o,
    3372         size_t len)
    3373 {
    3374         parfor (ai,bi,oi)
    3375             in (a, b, o )
    3376         {
    3377                 oi = ai + bi;
    3378         }
    3379 }
    3380 
    3381 
    3382 
    3383 
    3384 
    3385 
    3386 
    3387 int* a[10000];
    3388 int* b[10000];
    3389 int* c[10000];
    3390 //... fill in a & b
    3391 big_sum(a,b,c,10000);
    3392 \end{cfa}
    3393 \end{tabular}
    3394 \end{center}
    3395 \caption{For loop to sum numbers: Sequential, using library parallelism and language parallelism.}
    3396 \label{f:parfor}
    3397 \end{table}
    3398 
    3399 Implicit parallelism is a restrictive solution and therefore has its limitations.
    3400 However, it is a quick and simple approach to parallelism, which may very well be sufficient for smaller applications and reduces the amount of boilerplate needed to start benefiting from parallelism in modern CPUs.
    3401 
    3402 
    3403 % A C K N O W L E D G E M E N T S
    3404 % -------------------------------
     2462\paragraph{Implicit Threading}
     2463\label{futur:implcit}
     2464
     2465Basic concurrent (embarrassingly parallel) applications can benefit greatly from implicit concurrency, where sequential programs are converted to concurrent, possibly with some help from pragmas to guide the conversion.
     2466This type of concurrency can be achieved both at the language level and at the library level.
     2467The canonical example of implicit concurrency is concurrent nested @for@ loops, which are amenable to divide and conquer algorithms~\cite{uC++book}.
     2468The \CFA language features should make it possible to develop a reasonable number of implicit concurrency mechanism to solve basic HPC data-concurrency problems.
     2469However, implicit concurrency is a restrictive solution and has its limitations, so it can never replace explicit concurrent programming.
     2470
     2471
    34052472\section{Acknowledgements}
    34062473
    3407 Thanks to Aaron Moss, Rob Schluntz and Andrew Beach for their work on the \CFA project as well as all the discussions which helped concretize the ideas in this paper.
    3408 Partial funding was supplied by the Natural Sciences and Engineering Research Council of Canada and a corporate partnership with Huawei Ltd.
    3409 
    3410 
    3411 % B I B L I O G R A P H Y
    3412 % -----------------------------
    3413 %\bibliographystyle{plain}
     2474The authors would like to recognize the design assistance of Aaron Moss, Rob Schluntz and Andrew Beach on the features described in this paper.
     2475Funding for this project has been provided by Huawei Ltd.\ (\url{http://www.huawei.com}), and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
     2476
     2477{%
     2478\fontsize{9bp}{12bp}\selectfont%
    34142479\bibliography{pl,local}
    3415 
     2480}%
    34162481
    34172482\end{document}
  • doc/papers/concurrency/annex/local.bib

    ra12c81f3 rc653b37  
    4646    title       = {Thread Building Blocks},
    4747    howpublished= {Intel, \url{https://www.threadingbuildingblocks.org}},
    48     note        = {Accessed: 2018-3},
     48    optnote     = {Accessed: 2018-3},
    4949}
    5050
  • doc/papers/concurrency/figures/ext_monitor.fig

    ra12c81f3 rc653b37  
    88-2
    991200 2
    10 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 3150.000 3450.000 3150 3150 2850 3450 3150 3750
    11 5 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 3150.000 4350.000 3150 4050 2850 4350 3150 4650
    12 6 5850 1950 6150 2250
    13 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2100 105 105 6000 2100 6105 2205
    14 4 1 -1 0 0 0 10 0.0000 2 105 90 6000 2160 d\001
     105 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1575.000 3450.000 1575 3150 1275 3450 1575 3750
     115 1 0 1 -1 -1 0 0 -1 0.000 0 1 0 0 1575.000 4350.000 1575 4050 1275 4350 1575 4650
     126 4275 1950 4575 2250
     131 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2100 105 105 4425 2100 4530 2205
     144 1 -1 0 0 0 10 0.0000 2 105 90 4425 2160 d\001
    1515-6
    16 6 5100 2100 5400 2400
    17 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 5250 2250 105 105 5250 2250 5355 2250
    18 4 1 -1 0 0 0 10 0.0000 2 105 120 5250 2295 X\001
     166 4275 1650 4575 1950
     171 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 1800 105 105 4425 1800 4530 1905
     184 1 -1 0 0 0 10 0.0000 2 105 90 4425 1860 b\001
    1919-6
    20 6 5100 1800 5400 2100
    21 1 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 5250 1950 105 105 5250 1950 5355 1950
    22 4 1 -1 0 0 0 10 0.0000 2 105 120 5250 2010 Y\001
     206 1495 5445 5700 5655
     211 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 1575 5550 80 80 1575 5550 1655 5630
     221 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2925 5550 105 105 2925 5550 3030 5655
     231 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 4425 5550 105 105 4425 5550 4530 5655
     244 0 -1 0 0 0 12 0.0000 2 135 1035 3150 5625 blocked task\001
     254 0 -1 0 0 0 12 0.0000 2 135 870 1725 5625 active task\001
     264 0 -1 0 0 0 12 0.0000 2 135 1050 4650 5625 routine mask\001
    2327-6
    24 6 5850 1650 6150 1950
    25 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 1800 105 105 6000 1800 6105 1905
    26 4 1 -1 0 0 0 10 0.0000 2 105 90 6000 1860 b\001
     286 3525 1800 3825 2400
     291 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3675 1950 105 105 3675 1950 3780 1950
     302 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
     31         3525 1800 3825 1800 3825 2400 3525 2400 3525 1800
     324 1 4 0 0 0 10 0.0000 2 105 120 3675 2010 Y\001
    2733-6
    28 6 3070 5445 7275 5655
    29 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 5550 80 80 3150 5550 3230 5630
    30 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4500 5550 105 105 4500 5550 4605 5655
    31 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6000 5550 105 105 6000 5550 6105 5655
    32 4 0 -1 0 0 0 12 0.0000 2 135 1035 4725 5625 blocked task\001
    33 4 0 -1 0 0 0 12 0.0000 2 135 870 3300 5625 active task\001
    34 4 0 -1 0 0 0 12 0.0000 2 135 1050 6225 5625 routine mask\001
     346 3525 2100 3825 2400
     351 3 0 1 -1 -1 1 0 4 0.000 1 0.0000 3675 2250 105 105 3675 2250 3780 2250
     364 1 4 0 0 0 10 0.0000 2 105 120 3675 2295 X\001
    3537-6
    36 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3300 3600 105 105 3300 3600 3405 3705
    37 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3600 3600 105 105 3600 3600 3705 3705
    38 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6600 3900 105 105 6600 3900 6705 4005
    39 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6900 3900 105 105 6900 3900 7005 4005
    40 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2700 105 105 6000 2700 6105 2805
    41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6000 2400 105 105 6000 2400 6105 2505
    42 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5100 4575 80 80 5100 4575 5180 4655
     381 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 1725 3600 105 105 1725 3600 1830 3705
     391 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 2025 3600 105 105 2025 3600 2130 3705
     401 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5025 3900 105 105 5025 3900 5130 4005
     411 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5325 3900 105 105 5325 3900 5430 4005
     421 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2700 105 105 4425 2700 4530 2805
     431 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2400 105 105 4425 2400 4530 2505
     441 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3525 4575 80 80 3525 4575 3605 4655
    43452 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    44          4050 2925 5475 2925 5475 3225 4050 3225 4050 2925
     46         2475 2925 3900 2925 3900 3225 2475 3225 2475 2925
    45472 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    46          3150 3750 3750 3750 3750 4050 3150 4050
     48         1575 3750 2175 3750 2175 4050 1575 4050
    47492 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
    48          3150 3450 3750 3450 3900 3675
     50         1575 3450 2175 3450 2325 3675
    49512 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    50          3750 3150 3600 3375
     52         2175 3150 2025 3375
    51532 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 3
    52          3150 4350 3750 4350 3900 4575
     54         1575 4350 2175 4350 2325 4575
    53552 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    54          3750 4050 3600 4275
     56         2175 4050 2025 4275
    55572 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    56          3150 4650 3750 4650 3750 4950 4950 4950
     58         1575 4650 2175 4650 2175 4950 3375 4950
    57592 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    58          6450 3750 6300 3975
     60         4875 3750 4725 3975
    59612 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    60          4950 4950 5175 5100
     62         3375 4950 3600 5100
    61632 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
    62          5250 4950 6450 4950 6450 4050 7050 4050 7050 3750 6450 3750
    63          6450 2850 6150 2850 6150 1650
     64         3675 4950 4875 4950 4875 4050 5475 4050 5475 3750 4875 3750
     65         4875 2850 4575 2850 4575 1650
    64662 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
    65          5850 4200 5850 3300 4350 3300 4350 4200 5850 4200
    66 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 1 2
     67         4275 4200 4275 3300 2775 3300 2775 4200 4275 4200
     682 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
    6769        1 1 1.00 60.00 120.00
    68         7 1 1.00 60.00 120.00
    69          5250 3150 5250 2400
     70         3675 3075 3675 2400
     712 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     72         4125 2850 4575 3000
    70732 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    71          3150 3150 3750 3150 3750 2850 5700 2850 5700 1650
    72 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    73          5700 2850 6150 3000
    74 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
    75          5100 1800 5400 1800 5400 2400 5100 2400 5100 1800
    76 4 1 -1 0 0 0 10 0.0000 2 75 75 6000 2745 a\001
    77 4 1 -1 0 0 0 10 0.0000 2 75 75 6000 2445 c\001
    78 4 1 -1 0 0 0 12 0.0000 2 135 315 5100 5325 exit\001
    79 4 1 -1 0 0 0 12 0.0000 2 135 135 3300 3075 A\001
    80 4 1 -1 0 0 0 12 0.0000 2 135 795 3300 4875 condition\001
    81 4 1 -1 0 0 0 12 0.0000 2 135 135 3300 5100 B\001
    82 4 0 -1 0 0 0 12 0.0000 2 135 420 6600 3675 stack\001
    83 4 0 -1 0 0 0 12 0.0000 2 180 750 6600 3225 acceptor/\001
    84 4 0 -1 0 0 0 12 0.0000 2 180 750 6600 3450 signalled\001
    85 4 1 -1 0 0 0 12 0.0000 2 135 795 3300 2850 condition\001
    86 4 1 -1 0 0 0 12 0.0000 2 165 420 6000 1350 entry\001
    87 4 1 -1 0 0 0 12 0.0000 2 135 495 6000 1575 queue\001
    88 4 0 -1 0 0 0 12 0.0000 2 135 525 6300 2400 arrival\001
    89 4 0 -1 0 0 0 12 0.0000 2 135 630 6300 2175 order of\001
    90 4 1 -1 0 0 0 12 0.0000 2 135 525 5100 3675 shared\001
    91 4 1 -1 0 0 0 12 0.0000 2 135 735 5100 3975 variables\001
    92 4 0 0 50 -1 0 11 0.0000 2 165 855 4275 3150 Acceptables\001
    93 4 0 0 50 -1 0 11 0.0000 2 120 165 5775 2700 W\001
    94 4 0 0 50 -1 0 11 0.0000 2 120 135 5775 2400 X\001
    95 4 0 0 50 -1 0 11 0.0000 2 120 105 5775 2100 Z\001
    96 4 0 0 50 -1 0 11 0.0000 2 120 135 5775 1800 Y\001
     74         1575 3150 2175 3150 2175 2850 4125 2850 4125 1650
     754 1 -1 0 0 0 10 0.0000 2 75 75 4425 2745 a\001
     764 1 -1 0 0 0 10 0.0000 2 75 75 4425 2445 c\001
     774 1 -1 0 0 0 12 0.0000 2 135 315 3525 5325 exit\001
     784 1 -1 0 0 0 12 0.0000 2 135 135 1725 3075 A\001
     794 1 -1 0 0 0 12 0.0000 2 135 795 1725 4875 condition\001
     804 1 -1 0 0 0 12 0.0000 2 135 135 1725 5100 B\001
     814 0 -1 0 0 0 12 0.0000 2 135 420 5025 3675 stack\001
     824 0 -1 0 0 0 12 0.0000 2 180 750 5025 3225 acceptor/\001
     834 0 -1 0 0 0 12 0.0000 2 180 750 5025 3450 signalled\001
     844 1 -1 0 0 0 12 0.0000 2 135 795 1725 2850 condition\001
     854 1 -1 0 0 0 12 0.0000 2 165 420 4425 1350 entry\001
     864 1 -1 0 0 0 12 0.0000 2 135 495 4425 1575 queue\001
     874 0 -1 0 0 0 12 0.0000 2 135 525 4725 2400 arrival\001
     884 0 -1 0 0 0 12 0.0000 2 135 630 4725 2175 order of\001
     894 1 -1 0 0 0 12 0.0000 2 135 525 3525 3675 shared\001
     904 1 -1 0 0 0 12 0.0000 2 135 735 3525 3975 variables\001
     914 0 4 50 -1 0 11 0.0000 2 120 135 4150 1875 Y\001
     924 0 4 50 -1 0 11 0.0000 2 120 105 4150 2175 Z\001
     934 0 4 50 -1 0 11 0.0000 2 120 135 4150 2475 X\001
     944 0 4 50 -1 0 11 0.0000 2 120 165 4150 2775 W\001
     954 0 -1 0 0 3 12 0.0000 2 150 540 5025 4275 urgent\001
     964 1 0 50 -1 0 11 0.0000 2 165 600 3150 3150 accepted\001
  • doc/papers/concurrency/figures/monitor.fig

    ra12c81f3 rc653b37  
    72722 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    7373         3600 1500 3600 2100 4200 2100 4200 900
    74 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
    75          2700 1500 2700 2100 3300 2100 3300 1500
    76742 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 9
    7775         3600 4200 4800 4200 4800 3300 5400 3300 5400 3000 4800 3000
     
    79772 2 1 1 -1 -1 0 0 -1 4.000 0 0 0 0 0 5
    8078         4200 3450 4200 2550 2700 2550 2700 3450 4200 3450
     792 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
     80         2700 1500 2700 2100 3300 2100 3300 1500
    81814 1 -1 0 0 0 10 0.0000 2 75 75 4350 1995 a\001
    82824 1 -1 0 0 0 10 0.0000 2 75 75 4350 1695 c\001
     
    89894 0 -1 0 0 0 12 0.0000 2 180 750 4950 2700 signalled\001
    90904 1 -1 0 0 0 12 0.0000 2 135 795 1650 2100 condition\001
    91 4 1 -1 0 0 0 12 0.0000 2 135 135 2550 1425 X\001
    92 4 1 -1 0 0 0 12 0.0000 2 135 135 3450 1425 Y\001
     914 1 4 0 0 0 12 0.0000 2 135 135 2550 1425 X\001
     924 1 4 0 0 0 12 0.0000 2 135 135 3450 1425 Y\001
    93934 1 -1 0 0 0 12 0.0000 2 165 420 4350 600 entry\001
    94944 1 -1 0 0 0 12 0.0000 2 135 495 4350 825 queue\001
     
    1001004 1 -1 0 0 0 10 0.0000 2 75 75 3450 1995 c\001
    1011014 1 -1 0 0 0 12 0.0000 2 135 570 3000 1200 queues\001
     1024 0 -1 0 0 3 12 0.0000 2 150 540 4950 3525 urgent\001
  • doc/papers/general/Makefile

    ra12c81f3 rc653b37  
    5959        dvips ${Build}/$< -o $@
    6060
    61 ${BASE}.dvi : Makefile ${Build} ${BASE}.out.ps WileyNJD-AMA.bst ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    62                 ../../bibliography/pl.bib
     61${BASE}.dvi : Makefile ${BASE}.out.ps WileyNJD-AMA.bst ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     62                ../../bibliography/pl.bib | ${Build}
    6363        # Must have *.aux file containing citations for bibtex
    6464        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
     
    7575        mkdir -p ${Build}
    7676
    77 ${BASE}.out.ps : ${Build}
     77${BASE}.out.ps : | ${Build}
    7878        ln -fs ${Build}/Paper.out.ps .
    7979
     
    8484        gnuplot -e Build="'${Build}/'" evaluation/timing.gp
    8585
    86 %.tex : %.fig ${Build}
     86%.tex : %.fig | ${Build}
    8787        fig2dev -L eepic $< > ${Build}/$@
    8888
    89 %.ps : %.fig ${Build}
     89%.ps : %.fig | ${Build}
    9090        fig2dev -L ps $< > ${Build}/$@
    9191
    92 %.pstex : %.fig ${Build}
     92%.pstex : %.fig | ${Build}
    9393        fig2dev -L pstex $< > ${Build}/$@
    9494        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/proposals/ctordtor/Makefile

    ra12c81f3 rc653b37  
    1 ## Define the appropriate configuration variables.
     1## Define the configuration variables.
    22
    3 MACROS = ../../LaTeXmacros
    4 BIB = ../../bibliography
     3Build = build
     4Figures = figures
     5Macros = ../../LaTeXmacros
     6Bib = ../../bibliography
    57
    6 TeXLIB = .:$(MACROS):$(MACROS)/listings:$(MACROS)/enumitem:$(BIB)/:
    7 LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error
     8TeXLIB = .:${Macros}:${MACROS}/listings:${MACROS}/enumitem:${Bib}/:
     9LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${Build}
    810BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex
     11
     12MAKEFLAGS = --no-print-directory # --silent
     13VPATH = ${Build} ${Figures}
    914
    1015## Define the text source files.
     
    2934
    3035DOCUMENT = ctor.pdf
     36BASE = ${basename ${DOCUMENT}}
    3137
    3238# Directives #
     39
     40.PHONY : all clean                                      # not file names
    3341
    3442all : ${DOCUMENT}
    3543
    3644clean :
    37         rm -f *.bbl *.aux *.dvi *.idx *.ilg *.ind *.brf *.out *.log *.toc *.blg *.pstex_t *.cf \
    38                 ${FIGURES} ${PICTURES} ${PROGRAMS} ${GRAPHS} ${basename ${DOCUMENT}}.ps ${DOCUMENT}
     45        @rm -frv ${DOCUMENT} ${BASE}.ps ${Build}
    3946
    4047# File Dependencies #
    4148
    42 ${DOCUMENT} : ${basename ${DOCUMENT}}.ps
     49${DOCUMENT} : ${BASE}.ps
    4350        ps2pdf $<
    4451
    45 ${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
    46         dvips $< -o $@
     52${BASE}.ps : ${BASE}.dvi
     53        dvips ${Build}/$< -o $@
    4754
    48 ${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex \
    49                 $(MACROS)/common.tex $(MACROS)/indexstyle $(BIB)/cfa.bib
     55${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     56                ${Macros}/common.tex ${Macros}/indexstyle ${Bib}/pl.bib | ${Build}
    5057        # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
    51         if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi
     58        #if [ ! -r ${basename $@}.ind ] ; then touch ${Build}/${basename $@}.ind ; fi
    5259        # Must have *.aux file containing citations for bibtex
    5360        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
    54         -${BibTeX} ${basename $@}
    55         # Some citations reference others so run steps again to resolve these citations
     61        -${BibTeX} ${Build}/${basename $@}
     62        # Some citations reference others so run again to resolve these citations
    5663        ${LaTeX} ${basename $@}.tex
    57         -${BibTeX} ${basename $@}
     64        -${BibTeX} ${Build}/${basename $@}
    5865        # Make index from *.aux entries and input index at end of document
    59         makeindex -s $(MACROS)/indexstyle ${basename $@}.idx
     66        #makeindex -s ${Macros}/indexstyle ${Build}/${basename $@}.idx
     67        # Run again to finish citations
    6068        ${LaTeX} ${basename $@}.tex
    6169        # Run again to get index title into table of contents
     
    6775## Define the default recipes.
    6876
    69 %.tex : %.fig
    70         fig2dev -L eepic $< > $@
     77${Build}:
     78        mkdir -p ${Build}
    7179
    72 %.ps : %.fig
    73         fig2dev -L ps $< > $@
     80%.tex : %.fig | ${Build}
     81        fig2dev -L eepic $< > ${Build}/$@
    7482
    75 %.pstex : %.fig
    76         fig2dev -L pstex $< > $@
    77         fig2dev -L pstex_t -p $@ $< > $@_t
     83%.ps : %.fig | ${Build}
     84        fig2dev -L ps $< > ${Build}/$@
     85
     86%.pstex : %.fig | ${Build}
     87        fig2dev -L pstex $< > ${Build}/$@
     88        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
    7889
    7990# Local Variables: #
  • doc/proposals/ctordtor/ctor.tex

    ra12c81f3 rc653b37  
    1 % inline code ©...© (copyright symbol) emacs: C-q M-)
    2 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
    3 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
    4 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
    5 % LaTex escape §...§ (section symbol) emacs: C-q M-'
    6 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
    7 % math escape $...$ (dollar symbol)
    8 
    91\documentclass[twoside,11pt]{article}
    102
     
    157\usepackage{textcomp}
    168\usepackage[latin1]{inputenc}
     9
    1710\usepackage{fullpage,times,comment}
    1811\usepackage{epic,eepic}
    19 \usepackage{upquote}                                                                    % switch curled `'" to straight
     12\usepackage{upquote}                                    % switch curled `'" to straight
    2013\usepackage{calc}
    2114\usepackage{xspace}
    2215\usepackage{graphicx}
    23 \usepackage{varioref}                                                                   % extended references
    24 \usepackage{listings}                                                                   % format program code
    25 \usepackage[flushmargin]{footmisc}                                              % support label/reference in footnote
     16\usepackage{varioref}                                   % extended references
     17\usepackage{listings}                                   % format program code
     18\usepackage[flushmargin]{footmisc}                      % support label/reference in footnote
    2619\usepackage{latexsym}                                   % \Box glyph
    2720\usepackage{mathptmx}                                   % better math font with "times"
     
    3427\renewcommand{\UrlFont}{\small\sf}
    3528
    36 \setlength{\topmargin}{-0.45in}                                                 % move running title into header
     29\setlength{\topmargin}{-0.45in}                         % move running title into header
    3730\setlength{\headsep}{0.25in}
    3831
     
    4336
    4437\interfootnotelinepenalty=10000
     38
     39\CFAStyle                                               % use default CFA format-style
     40% inline code ©...© (copyright symbol) emacs: C-q M-)
     41% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
     42% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
     43% green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
     44% LaTex escape §...§ (section symbol) emacs: C-q M-'
     45% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
     46% math escape $...$ (dollar symbol)
     47
    4548
    4649\title{
     
    8386\thispagestyle{plain}
    8487\pagenumbering{arabic}
    85 
    8688
    8789
  • doc/proposals/tuples/Makefile

    ra12c81f3 rc653b37  
    1 ## Define the appropriate configuration variables.
     1## Define the configuration variables.
    22
    3 MACROS = ../../LaTeXmacros
    4 BIB = ../../bibliography
     3Build = build
     4Figures = figures
     5Macros = ../../LaTeXmacros
     6Bib = ../../bibliography
    57
    6 TeXLIB = .:$(MACROS):$(MACROS)/listings:$(MACROS)/enumitem:$(BIB)/:
    7 LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error
     8TeXLIB = .:${Macros}:${MACROS}/listings:${MACROS}/enumitem:${Bib}/:
     9LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=${Build}
    810BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex
     11
     12MAKEFLAGS = --no-print-directory --silent #
     13VPATH = ${Build} ${Figures}
    914
    1015## Define the text source files.
     
    2934
    3035DOCUMENT = tuples.pdf
     36BASE = ${basename ${DOCUMENT}}
    3137
    3238# Directives #
     39
     40.PHONY : all clean                                      # not file names
    3341
    3442all : ${DOCUMENT}
    3543
    3644clean :
    37         rm -f *.bbl *.aux *.dvi *.idx *.ilg *.ind *.brf *.out *.log *.toc *.blg *.pstex_t *.cf \
    38                 ${FIGURES} ${PICTURES} ${PROGRAMS} ${GRAPHS} ${basename ${DOCUMENT}}.ps ${DOCUMENT}
     45        @rm -frv ${DOCUMENT} ${BASE}.ps ${Build}
    3946
    4047# File Dependencies #
    4148
    42 ${DOCUMENT} : ${basename ${DOCUMENT}}.ps
     49${DOCUMENT} : ${BASE}.ps
    4350        ps2pdf $<
    4451
    45 ${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
    46         dvips $< -o $@
     52${BASE}.ps : ${BASE}.dvi
     53        dvips ${Build}/$< -o $@
    4754
    48 ${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex \
    49                 $(MACROS)/common.tex $(MACROS)/indexstyle $(BIB)/cfa.bib
     55${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     56                ${Macros}/common.tex ${Macros}/indexstyle ${Bib}/pl.bib | ${Build}
    5057        # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
    51         if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi
     58        #if [ ! -r ${basename $@}.ind ] ; then touch ${Build}/${basename $@}.ind ; fi
    5259        # Must have *.aux file containing citations for bibtex
    5360        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
    54         -${BibTeX} ${basename $@}
    55         # Some citations reference others so run steps again to resolve these citations
     61        -${BibTeX} ${Build}/${basename $@}
     62        # Some citations reference others so run again to resolve these citations
    5663        ${LaTeX} ${basename $@}.tex
    57         -${BibTeX} ${basename $@}
     64        -${BibTeX} ${Build}/${basename $@}
    5865        # Make index from *.aux entries and input index at end of document
    59         makeindex -s $(MACROS)/indexstyle ${basename $@}.idx
     66        #makeindex -s ${Macros}/indexstyle ${Build}/${basename $@}.idx
     67        # Run again to finish citations
    6068        ${LaTeX} ${basename $@}.tex
    6169        # Run again to get index title into table of contents
     
    6775## Define the default recipes.
    6876
    69 %.tex : %.fig
    70         fig2dev -L eepic $< > $@
     77${Build}:
     78        mkdir -p ${Build}
    7179
    72 %.ps : %.fig
    73         fig2dev -L ps $< > $@
     80%.tex : %.fig | ${Build}
     81        fig2dev -L eepic $< > ${Build}/$@
    7482
    75 %.pstex : %.fig
    76         fig2dev -L pstex $< > $@
    77         fig2dev -L pstex_t -p $@ $< > $@_t
     83%.ps : %.fig | ${Build}
     84        fig2dev -L ps $< > ${Build}/$@
     85
     86%.pstex : %.fig | ${Build}
     87        fig2dev -L pstex $< > ${Build}/$@
     88        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
    7889
    7990# Local Variables: #
  • doc/proposals/tuples/tuples.tex

    ra12c81f3 rc653b37  
    1 % inline code ©...© (copyright symbol) emacs: C-q M-)
    2 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
    3 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
    4 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
    5 % LaTex escape §...§ (section symbol) emacs: C-q M-'
    6 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
    7 % math escape $...$ (dollar symbol)
    8 
    91\documentclass[twoside,11pt]{article}
    102
     
    157\usepackage{textcomp}
    168\usepackage[latin1]{inputenc}
     9
    1710\usepackage{fullpage,times,comment}
    1811\usepackage{epic,eepic}
    19 \usepackage{upquote}                                                                    % switch curled `'" to straight
     12\usepackage{upquote}                                    % switch curled `'" to straight
    2013\usepackage{calc}
    2114\usepackage{xspace}
    2215\usepackage{graphicx}
    23 \usepackage{varioref}                                                                   % extended references
    24 \usepackage{listings}                                                                   % format program code
    25 \usepackage[flushmargin]{footmisc}                                              % support label/reference in footnote
     16\usepackage{varioref}                                   % extended references
     17\usepackage{listings}                                   % format program code
     18\usepackage[flushmargin]{footmisc}                      % support label/reference in footnote
    2619\usepackage{latexsym}                                   % \Box glyph
    2720\usepackage{mathptmx}                                   % better math font with "times"
     
    3427\renewcommand{\UrlFont}{\small\sf}
    3528
    36 \setlength{\topmargin}{-0.45in}                                                 % move running title into header
     29\setlength{\topmargin}{-0.45in}                         % move running title into header
    3730\setlength{\headsep}{0.25in}
    3831
     
    4235
    4336\interfootnotelinepenalty=10000
     37
     38\CFAStyle                                               % use default CFA format-style
     39% inline code ©...© (copyright symbol) emacs: C-q M-)
     40% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
     41% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
     42% green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
     43% LaTex escape §...§ (section symbol) emacs: C-q M-'
     44% keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
     45% math escape $...$ (dollar symbol)
     46
    4447
    4548\title{
  • doc/refrat/Makefile

    ra12c81f3 rc653b37  
    5353        dvips ${Build}/$< -o $@
    5454
    55 ${BASE}.dvi : Makefile ${Build} ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    56                 ${Macros}/common.tex ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib
     55${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     56                ${Macros}/common.tex ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib | ${Build}
    5757        # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
    5858        if [ ! -r ${basename $@}.ind ] ; then touch ${Build}/${basename $@}.ind ; fi
     
    7878        mkdir -p ${Build}
    7979
    80 %.tex : %.fig ${Build}
     80%.tex : %.fig | ${Build}
    8181        fig2dev -L eepic $< > ${Build}/$@
    8282
    83 %.ps : %.fig ${Build}
     83%.ps : %.fig | ${Build}
    8484        fig2dev -L ps $< > ${Build}/$@
    8585
    86 %.pstex : %.fig ${Build}
     86%.pstex : %.fig | ${Build}
    8787        fig2dev -L pstex $< > ${Build}/$@
    8888        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/theses/aaron_moss/comp_II/Makefile

    ra12c81f3 rc653b37  
    3232
    3333DOCUMENT = comp_II.pdf
     34BASE = ${basename ${DOCUMENT}}
    3435
    3536# Directives #
     
    4041
    4142clean :
    42         @rm -frv ${DOCUMENT} ${basename ${DOCUMENT}}.ps ${Build}
     43        @rm -frv ${DOCUMENT} ${BASE}.ps ${Build}
    4344
    4445# File Dependencies #
    4546
    46 ${DOCUMENT} : ${basename ${DOCUMENT}}.ps
     47${DOCUMENT} : ${BASE}.ps
    4748        ps2pdf $<
    4849
    49 ${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
     50${BASE}.ps : ${BASE}.dvi
    5051        dvips ${Build}/$< -o $@
    5152
    52 ${basename ${DOCUMENT}}.dvi : Makefile ${Build} ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    53                 ${Macros}/common.tex ${Macros}/indexstyle ../../../bibliography/pl.bib
     53${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     54                ${Macros}/common.tex ${Macros}/indexstyle ../../../bibliography/pl.bib | ${Build}
    5455        # Must have *.aux file containing citations for bibtex
    5556        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
     
    6667        mkdir -p ${Build}
    6768
    68 %.tex : %.fig
     69%.tex : %.fig ${Build}
    6970        fig2dev -L eepic $< > ${Build}/$@
    7071
    71 %.ps : %.fig
     72%.ps : %.fig | ${Build}
    7273        fig2dev -L ps $< > ${Build}/$@
    7374
    74 %.pstex : %.fig
     75%.pstex : %.fig | ${Build}
    7576        fig2dev -L pstex $< > ${Build}/$@
    7677        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/theses/thierry_delisle/Makefile

    ra12c81f3 rc653b37  
    5151
    5252DOCUMENT = thesis.pdf
     53BASE = ${basename ${DOCUMENT}}
    5354
    5455# Directives #
     
    5960
    6061clean :
    61         @rm -frv ${DOCUMENT} ${basename ${DOCUMENT}}.ps ${Build}
     62        @rm -frv ${DOCUMENT} ${BASE}.ps ${Build}
    6263
    6364# File Dependencies #
    6465
    65 ${DOCUMENT} : ${basename ${DOCUMENT}}.ps
     66${DOCUMENT} : ${BASE}.ps
    6667        ps2pdf $<
    6768
    68 ${basename ${DOCUMENT}}.ps : ${basename ${DOCUMENT}}.dvi
     69${BASE}.ps : ${BASE}.dvi
    6970        dvips ${Build}/$< -o $@
    7071
    71 ${basename ${DOCUMENT}}.dvi : Makefile ${Build} ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    72                 ${Macros}/common.tex ${Macros}/indexstyle annex/local.bib ../../bibliography/pl.bib
     72${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     73                ${Macros}/common.tex ${Macros}/indexstyle annex/local.bib ../../bibliography/pl.bib | ${Build}
    7374        # Must have *.aux file containing citations for bibtex
    7475        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
     
    9192        fig2dev -L eepic $< > ${Build}/$@
    9293
    93 %.ps : %.fig ${Build}
     94%.ps : %.fig | ${Build}
    9495        fig2dev -L ps $< > ${Build}/$@
    9596
    96 %.pstex : %.fig ${Build}
     97%.pstex : %.fig | ${Build}
    9798        fig2dev -L pstex $< > ${Build}/$@
    9899        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/user/Makefile

    ra12c81f3 rc653b37  
    5757        dvips ${Build}/$< -o $@
    5858
    59 ${BASE}.dvi : Makefile ${Build} ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
    60                 ${Macros}/common.tex ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib
     59${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
     60                ${Macros}/common.tex ${Macros}/lstlang.sty ${Macros}/indexstyle ../bibliography/pl.bib | ${Build}
    6161        # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
    6262        if [ ! -r ${basename $@}.ind ] ; then touch ${Build}/${basename $@}.ind ; fi
     
    7979        mkdir -p ${Build}
    8080
    81 %.tex : %.fig ${Build}
     81%.tex : %.fig | ${Build}
    8282        fig2dev -L eepic $< > ${Build}/$@
    8383
    84 %.ps : %.fig ${Build}
     84%.ps : %.fig | ${Build}
    8585        fig2dev -L ps $< > ${Build}/$@
    8686
    87 %.pstex : %.fig ${Build}
     87%.pstex : %.fig | ${Build}
    8888        fig2dev -L pstex $< > ${Build}/$@
    8989        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • src/Parser/TypedefTable.cc

    ra12c81f3 rc653b37  
    1010// Created On       : Sat May 16 15:20:13 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jun  7 13:17:56 2018
    13 // Update Count     : 192
     12// Last Modified On : Fri Jun 22 06:14:39 2018
     13// Update Count     : 206
    1414//
    1515
     
    7878        debugPrint( cerr << "Adding current at " << locn << " " << identifier << " as " << kindName( kind ) << " scope " << scope << endl );
    7979        auto ret = kindTable.insertAt( scope, identifier, kind );
    80         if ( ! ret.second ) ret.first->second = kind;           // exists => update
     80        //if ( ! ret.second ) ret.first->second = kind;         // exists => update
     81        assert( ret.first->second == kind );                            // exists
    8182} // TypedefTable::addToScope
    8283
    8384void TypedefTable::addToEnclosingScope( const string & identifier, int kind, const char * locn __attribute__((unused)) ) {
    84         assert( kindTable.currentScope() >= 1 );
    85         auto scope = kindTable.currentScope() - 1;
    86         debugPrint( cerr << "Adding enclosing at " << locn << " " << identifier << " as " << kindName( kind ) << " scope " << scope << endl );
     85        assert( kindTable.currentScope() >= 1 + level );
     86        auto scope = kindTable.currentScope() - 1 - level;
     87        debugPrint( cerr << "Adding enclosing at " << locn << " " << identifier << " as " << kindName( kind ) << " scope " << scope << " level " << level << endl );
    8788        auto ret = kindTable.insertAt( scope, identifier, kind );
    8889        if ( ! ret.second ) ret.first->second = kind;           // exists => update
     
    9192void TypedefTable::enterScope() {
    9293        kindTable.beginScope();
    93         debugPrint( cerr << "Entering scope " << kindTable.currentScope() << endl );
    94         debugPrint( print() );
     94        debugPrint( cerr << "Entering scope " << kindTable.currentScope() << endl; print() );
    9595} // TypedefTable::enterScope
    9696
    9797void TypedefTable::leaveScope() {
    98         debugPrint( cerr << "Leaving scope " << kindTable.currentScope() << endl );
    99         debugPrint( print() );
     98        debugPrint( cerr << "Leaving scope " << kindTable.currentScope() << endl; print() );
    10099        kindTable.endScope();
    101100} // TypedefTable::leaveScope
     
    114113                --scope;
    115114                debugPrint( cerr << endl << "[" << scope << "]" );
    116         }
     115        } // while
    117116        debugPrint( cerr << endl );
    118 }
     117} // TypedefTable::print
    119118
    120119// Local Variables: //
  • src/Parser/TypedefTable.h

    ra12c81f3 rc653b37  
    1010// Created On       : Sat May 16 15:24:36 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jun  7 12:10:17 2018
    13 // Update Count     : 85
     12// Last Modified On : Fri Jun 22 05:29:58 2018
     13// Update Count     : 86
    1414//
    1515
     
    2525        typedef ScopedMap< std::string, int > KindTable;
    2626        KindTable kindTable;   
     27        unsigned int level = 0;
    2728  public:
    2829        ~TypedefTable();
     
    3738        void leaveScope();
    3839
     40        void up() { level += 1; }
     41        void down() { level -= 1; }
     42
    3943        void print( void ) const;
    4044}; // TypedefTable
  • src/Parser/lex.ll

    ra12c81f3 rc653b37  
    1010 * Created On       : Sat Sep 22 08:58:10 2001
    1111 * Last Modified By : Peter A. Buhr
    12  * Last Modified On : Thu Jun  7 08:27:40 2018
    13  * Update Count     : 679
     12 * Last Modified On : Wed Jun 20 09:08:28 2018
     13 * Update Count     : 682
    1414 */
    1515
     
    2525//**************************** Includes and Defines ****************************
    2626
     27// trigger before each matching rule's action
     28#define YY_USER_ACTION \
     29        yylloc.first_line = yylineno; \
     30        yylloc.first_column = column; \
     31        column += yyleng; \
     32        yylloc.last_column = column; \
     33        yylloc.last_line = yylineno; \
     34        yylloc.filename = yyfilename ? yyfilename : "";
    2735unsigned int column = 0;                                                                // position of the end of the last token parsed
    28 #define YY_USER_ACTION yylloc.first_line = yylineno; yylloc.first_column = column; column += yyleng; yylloc.last_column = column; yylloc.last_line = yylineno; yylloc.filename = yyfilename ? yyfilename : "";                          // trigger before each matching rule's action
    2936
    3037#include <string>
     
    4956#define NUMERIC_RETURN(x)       rm_underscore(); RETURN_VAL( x ) // numeric constant
    5057#define KEYWORD_RETURN(x)       RETURN_CHAR( x )                        // keyword
    51 #define QKEYWORD_RETURN(x)      typedefTable.isKind( yytext ); RETURN_VAL(x); // quasi-keyword
     58#define QKEYWORD_RETURN(x)      RETURN_VAL(x);                          // quasi-keyword
    5259#define IDENTIFIER_RETURN()     RETURN_VAL( typedefTable.isKind( yytext ) )
    5360#define ATTRIBUTE_RETURN()      RETURN_VAL( ATTR_IDENTIFIER )
  • src/Parser/parser.yy

    ra12c81f3 rc653b37  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Jun  7 10:07:12 2018
    13 // Update Count     : 3527
     12// Last Modified On : Sun Jun 24 10:41:10 2018
     13// Update Count     : 3587
    1414//
    1515
     
    136136} // build_postfix_name
    137137
    138 bool forall = false, xxx = false;                                               // aggregate have one or more forall qualifiers ?
     138bool forall = false, xxx = false, yyy = false;                  // aggregate have one or more forall qualifiers ?
    139139
    140140// https://www.gnu.org/software/bison/manual/bison.html#Location-Type
     
    304304%type<en> enumerator_value_opt
    305305
    306 %type<decl> exception_declaration external_definition external_definition_list external_definition_list_no_pop_push external_definition_list_opt
     306%type<decl> external_definition external_definition_list external_definition_list_opt
     307
     308%type<decl> exception_declaration
    307309
    308310%type<decl> field_declaration field_declaration_list_opt field_declarator_opt field_declaring_list
     
    503505                { $$ = new ExpressionNode( build_func( new ExpressionNode( build_postfix_name( $5 ) ), $2 ) ); }
    504506        | type_name '.' no_attr_identifier                                      // CFA, nested type
    505                 // { SemanticError( yylloc, "Qualified name is currently unimplemented." ); $$ = nullptr; }
    506                 { $$ = nullptr; }
     507                { SemanticError( yylloc, "Qualified name is currently unimplemented." ); $$ = nullptr; }
    507508        | type_name '.' '[' field_list ']'                                      // CFA, nested type / tuple field selector
    508                 // { SemanticError( yylloc, "Qualified name is currently unimplemented." ); $$ = nullptr; }
    509                 { $$ = nullptr; }
     509                { SemanticError( yylloc, "Qualified name is currently unimplemented." ); $$ = nullptr; }
    510510        | GENERIC '(' assignment_expression ',' generic_assoc_list ')' // C11
    511511                {
     
    18211821        ;
    18221822
     1823fred:
     1824        // empty
     1825                { yyy = false; }
     1826        ;
     1827
    18231828aggregate_type:                                                                                 // struct, union
    18241829        aggregate_key attribute_list_opt '{' field_declaration_list_opt '}'
    18251830                { $$ = DeclarationNode::newAggregate( $1, new string( DeclarationNode::anonymous.newName() ), nullptr, $4, true )->addQualifiers( $2 ); }
    1826         | aggregate_key attribute_list_opt no_attr_identifier
     1831        | aggregate_key attribute_list_opt no_attr_identifier fred
    18271832                {
    18281833                        typedefTable.makeTypedef( *$3, forall ? TYPEGENname : TYPEDEFname ); // create typedef
    1829                         //if ( forall ) typedefTable.changeKind( *$3, TYPEGENname ); // possibly update
    18301834                        forall = false;                                                         // reset
    18311835                }
    18321836          '{' field_declaration_list_opt '}'
    1833                 { $$ = DeclarationNode::newAggregate( $1, $3, nullptr, $6, true )->addQualifiers( $2 ); }
    1834         | aggregate_key attribute_list_opt type_name
     1837                { $$ = DeclarationNode::newAggregate( $1, $3, nullptr, $7, true )->addQualifiers( $2 ); }
     1838        | aggregate_key attribute_list_opt type_name fred
    18351839                {
    18361840                        typedefTable.makeTypedef( *$3->type->symbolic.name, forall ? TYPEGENname : TYPEDEFname ); // create typedef
    1837                         //if ( forall ) typedefTable.changeKind( *$3->type->symbolic.name, TYPEGENname ); // possibly update
    18381841                        forall = false;                                                         // reset
    18391842                }
    18401843          '{' field_declaration_list_opt '}'
    1841                 { $$ = DeclarationNode::newAggregate( $1, $3->type->symbolic.name, nullptr, $6, true )->addQualifiers( $2 ); }
     1844                { $$ = DeclarationNode::newAggregate( $1, $3->type->symbolic.name, nullptr, $7, true )->addQualifiers( $2 ); }
    18421845        | aggregate_key attribute_list_opt '(' type_list ')' '{' field_declaration_list_opt '}' // CFA
    18431846                { $$ = DeclarationNode::newAggregate( $1, new string( DeclarationNode::anonymous.newName() ), $4, $7, false )->addQualifiers( $2 ); }
     
    18461849
    18471850aggregate_type_nobody:                                                                  // struct, union - {...}
    1848         aggregate_key attribute_list_opt no_attr_identifier
     1851        aggregate_key attribute_list_opt no_attr_identifier fred
    18491852                {
    18501853                        typedefTable.makeTypedef( *$3, forall ? TYPEGENname : TYPEDEFname );
    1851                         //if ( forall ) typedefTable.changeKind( *$3, TYPEGENname ); // possibly update
    18521854                        forall = false;                                                         // reset
    18531855                        $$ = DeclarationNode::newAggregate( $1, $3, nullptr, nullptr, false )->addQualifiers( $2 );
    18541856                }
    1855         | aggregate_key attribute_list_opt type_name
     1857        | aggregate_key attribute_list_opt type_name fred
    18561858                {
    18571859                        // Create new generic declaration with same name as previous forward declaration, where the IDENTIFIER is
     
    18671869aggregate_key:
    18681870        STRUCT
    1869                 { $$ = DeclarationNode::Struct; }
     1871                { yyy = true; $$ = DeclarationNode::Struct; }
    18701872        | UNION
    1871                 { $$ = DeclarationNode::Union; }
     1873                { yyy = true; $$ = DeclarationNode::Union; }
    18721874        | EXCEPTION
    1873                 { $$ = DeclarationNode::Exception; }
     1875                { yyy = true; $$ = DeclarationNode::Exception; }
    18741876        | COROUTINE
    1875                 { $$ = DeclarationNode::Coroutine; }
     1877                { yyy = true; $$ = DeclarationNode::Coroutine; }
    18761878        | MONITOR
    1877                 { $$ = DeclarationNode::Monitor; }
     1879                { yyy = true; $$ = DeclarationNode::Monitor; }
    18781880        | THREAD
    1879                 { $$ = DeclarationNode::Thread; }
     1881                { yyy = true; $$ = DeclarationNode::Thread; }
    18801882        ;
    18811883
     
    23202322
    23212323translation_unit:
    2322         // empty
    2323                 {}                                                                                              // empty input file
     2324        // empty, input file
    23242325        | external_definition_list
    23252326                { parseTree = parseTree ? parseTree->appendList( $1 ) : $1;     }
     
    23352336        ;
    23362337
    2337         // SKULLDUGGERY: Declarations in extern "X" and distribution need to be added to the current lexical scope.
    2338         // However, external_definition_list creates a new scope around each external_definition, but the pop loses all the
    2339         // types in the extern "X" and distribution at the end of the block. This version of external_definition_list does
    2340 
    2341         // not do push/pop for declarations at the level of the extern "X" and distribution block. Any recursive uses of
    2342         // external_definition_list within the extern "X" and distribution block correctly pushes/pops for that scope level.
    2343 external_definition_list_no_pop_push:
    2344         external_definition
    2345         | external_definition_list_no_pop_push
    2346                 { forall = xxx; }
    2347           external_definition
    2348                 { $$ = $1 ? $1->appendList( $3 ) : $3; }
    2349         ;
    2350 
    23512338external_definition_list_opt:
    23522339        // empty
    23532340                { $$ = nullptr; }
    2354         | external_definition_list_no_pop_push
     2341        | external_definition_list
     2342        ;
     2343
     2344up:
     2345                { typedefTable.up(); }
     2346        ;
     2347
     2348down:
     2349                { typedefTable.down(); }
    23552350        ;
    23562351
     
    23722367                        linkage = LinkageSpec::linkageUpdate( yylloc, linkage, $2 );
    23732368                }
    2374           '{' external_definition_list_opt '}'
     2369          '{' up external_definition_list_opt down '}'
    23752370                {
    23762371                        linkage = linkageStack.top();
    23772372                        linkageStack.pop();
    2378                         $$ = $5;
     2373                        $$ = $6;
    23792374                }
    23802375        | type_qualifier_list
    2381                 { if ( $1->type->forall ) xxx = forall = true; } // remember generic type
    2382           '{' external_definition_list_opt '}'                          // CFA, namespace
    2383                 {
    2384                         for ( DeclarationNode * iter = $4; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
     2376                {
     2377                        if ( $1->type->qualifiers.val ) { SemanticError( yylloc, "CV qualifiers cannot be distributed; only storage-class and forall qualifiers." ); }
     2378                        if ( $1->type->forall ) xxx = forall = true; // remember generic type
     2379                }
     2380          '{' up external_definition_list_opt down '}'          // CFA, namespace
     2381                {
     2382                        for ( DeclarationNode * iter = $5; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
    23852383                                if ( isMangled( iter->linkage ) ) {             // ignore extern "C"
    23862384                                        iter->addQualifiers( $1->clone() );
     
    23892387                        xxx = false;
    23902388                        delete $1;
    2391                         $$ = $4;
     2389                        $$ = $5;
    23922390                }
    23932391        | declaration_qualifier_list
    2394                 { if ( $1->type->forall ) xxx = forall = true; } // remember generic type
    2395           '{' external_definition_list_opt '}'                          // CFA, namespace
    2396                 {
    2397                         for ( DeclarationNode * iter = $4; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
     2392                {
     2393                        if ( $1->type->qualifiers.val ) { SemanticError( yylloc, "CV qualifiers cannot be distributed; only storage-class and forall qualifiers." ); }
     2394                        if ( $1->type->forall ) xxx = forall = true; // remember generic type
     2395                }
     2396          '{' up external_definition_list_opt down '}'          // CFA, namespace
     2397                {
     2398                        for ( DeclarationNode * iter = $5; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
    23982399                                if ( isMangled( iter->linkage ) ) {             // ignore extern "C"
    23992400                                        iter->addQualifiers( $1->clone() );
     
    24022403                        xxx = false;
    24032404                        delete $1;
    2404                         $$ = $4;
     2405                        $$ = $5;
    24052406                }
    24062407        | declaration_qualifier_list type_qualifier_list
    24072408                {
    2408                         // forall must be in the type_qualifier_list
    2409                         if ( $2->type->forall ) xxx = forall = true; // remember generic type
    2410                 }
    2411           '{' external_definition_list_opt '}'                          // CFA, namespace
    2412                 {
    2413                         for ( DeclarationNode * iter = $5; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
     2409                        if ( ($1->type && $1->type->qualifiers.val) || $2->type->qualifiers.val ) { SemanticError( yylloc, "CV qualifiers cannot be distributed; only storage-class and forall qualifiers." ); }
     2410                        if ( ($1->type && $1->type->forall) || $2->type->forall ) xxx = forall = true; // remember generic type
     2411                }
     2412          '{' up external_definition_list_opt down '}'          // CFA, namespace
     2413                {
     2414                        for ( DeclarationNode * iter = $6; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
    24142415                                if ( isMangled( iter->linkage ) && isMangled( $2->linkage ) ) { // ignore extern "C"
    24152416                                        iter->addQualifiers( $1->clone() );
     
    24202421                        delete $1;
    24212422                        delete $2;
    2422                         $$ = $5;
     2423                        $$ = $6;
    24232424                }
    24242425        ;
  • src/ResolvExpr/AlternativeFinder.cc

    ra12c81f3 rc653b37  
    10991099                        argExpansions.emplace_back();
    11001100                        auto& argE = argExpansions.back();
    1101                         argE.reserve( arg.alternatives.size() );
     1101                        // argE.reserve( arg.alternatives.size() );
    11021102
    11031103                        for ( const Alternative& actual : arg ) {
  • src/ResolvExpr/CommonType.cc

    ra12c81f3 rc653b37  
    2424#include "SynTree/Type.h"                // for BasicType, BasicType::Kind::...
    2525#include "SynTree/Visitor.h"             // for Visitor
    26 #include "Unify.h"                       // for unifyExact, bindVar, WidenMode
     26#include "Unify.h"                       // for unifyExact, WidenMode
    2727#include "typeops.h"                     // for isFtype
    2828
     
    238238                                AssertionSet need, have;
    239239                                WidenMode widen( widenFirst, widenSecond );
    240                                 if ( entry != openVars.end() && ! bindVar(var, voidPointer->get_base(), entry->second, env, need, have, openVars, widen, indexer ) ) return;
     240                                if ( entry != openVars.end() && ! env.bindVar(var, voidPointer->get_base(), entry->second, need, have, openVars, widen, indexer ) ) return;
    241241                        }
    242242                }
  • src/ResolvExpr/ExplodedActual.h

    ra12c81f3 rc653b37  
    3232
    3333                ExplodedActual() : env(), cost(Cost::zero), exprs() {}
    34 
    3534                ExplodedActual( const Alternative& actual, const SymTab::Indexer& indexer );
     35                ExplodedActual(ExplodedActual&&) = default;
     36                ExplodedActual& operator= (ExplodedActual&&) = default;
    3637        };
    3738}
  • src/ResolvExpr/Resolver.cc

    ra12c81f3 rc653b37  
    582582
    583583                                                        // Make sure we don't widen any existing bindings
    584                                                         for ( auto & i : resultEnv ) {
    585                                                                 i.allowWidening = false;
    586                                                         }
    587 
     584                                                        resultEnv.forbidWidening();
     585                                                       
    588586                                                        // Find any unbound type variables
    589587                                                        resultEnv.extractOpenVars( openVars );
  • src/ResolvExpr/TypeEnvironment.cc

    ra12c81f3 rc653b37  
    99// Author           : Richard C. Bilson
    1010// Created On       : Sun May 17 12:19:47 2015
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun May 17 12:23:36 2015
    13 // Update Count     : 3
     11// Last Modified By : Aaron B. Moss
     12// Last Modified On : Mon Jun 18 11:58:00 2018
     13// Update Count     : 4
    1414//
    1515
     
    1717#include <algorithm>                   // for copy, set_intersection
    1818#include <iterator>                    // for ostream_iterator, insert_iterator
     19#include <memory>                      // for unique_ptr
    1920#include <utility>                     // for pair, move
    2021
     
    2223#include "SynTree/Type.h"              // for Type, FunctionType, Type::Fora...
    2324#include "SynTree/TypeSubstitution.h"  // for TypeSubstitution
     25#include "Tuples/Tuples.h"             // for isTtype
    2426#include "TypeEnvironment.h"
     27#include "typeops.h"                   // for occurs
     28#include "Unify.h"                     // for unifyInexact
    2529
    2630namespace ResolvExpr {
     
    6569        }
    6670
     71        EqvClass::EqvClass( EqvClass &&other )
     72        : vars{std::move(other.vars)}, type{other.type},
     73          allowWidening{std::move(other.allowWidening)}, data{std::move(other.data)} {
     74                  other.type = nullptr;
     75        }
     76
    6777        EqvClass &EqvClass::operator=( const EqvClass &other ) {
    6878                if ( this == &other ) return *this;
     
    7282        }
    7383
     84        EqvClass &EqvClass::operator=( EqvClass &&other ) {
     85                if ( this == &other ) return *this;
     86                delete type;
     87               
     88                vars = std::move(other.vars);
     89                type = other.type;
     90                other.type = nullptr;
     91                allowWidening = std::move(other.allowWidening);
     92                data = std::move(other.data);
     93
     94                return *this;
     95        }
     96
    7497        EqvClass::~EqvClass() {
    7598                delete type;
     99        }
     100
     101        void EqvClass::set_type( Type* ty ) {
     102                if ( ty == type ) return;
     103                delete type;
     104                type = ty;
    76105        }
    77106
     
    92121        const EqvClass* TypeEnvironment::lookup( const std::string &var ) const {
    93122                for ( std::list< EqvClass >::const_iterator i = env.begin(); i != env.end(); ++i ) {
    94                         if ( i->vars.find( var ) != i->vars.end() ) {
    95 ///       std::cout << var << " is in class ";
    96 ///       i->print( std::cout );
    97                                 return &*i;
    98                         }
    99 ///     std::cout << var << " is not in class ";
    100 ///     i->print( std::cout );
     123                        if ( i->vars.find( var ) != i->vars.end() ) return &*i;
    101124                } // for
    102125                return nullptr;
     
    116139        }
    117140
    118         void TypeEnvironment::add( const EqvClass &eqvClass ) {
    119                 filterOverlappingClasses( env, eqvClass );
    120                 env.push_back( eqvClass );
    121         }
    122 
    123141        void TypeEnvironment::add( EqvClass &&eqvClass ) {
    124142                filterOverlappingClasses( env, eqvClass );
     
    131149                        newClass.vars.insert( (*i)->get_name() );
    132150                        newClass.data = TypeDecl::Data{ (*i) };
    133                         env.push_back( newClass );
     151                        env.push_back( std::move(newClass) );
    134152                } // for
    135153        }
     
    145163                        // transition to TypeSubstitution
    146164                        newClass.data = TypeDecl::Data{ TypeDecl::Dtype, false };
    147                         add( newClass );
     165                        add( std::move(newClass) );
    148166                }
    149167        }
     
    152170                for ( std::list< EqvClass >::const_iterator theClass = env.begin(); theClass != env.end(); ++theClass ) {
    153171                        for ( std::set< std::string >::const_iterator theVar = theClass->vars.begin(); theVar != theClass->vars.end(); ++theVar ) {
    154 ///       std::cerr << "adding " << *theVar;
    155172                                if ( theClass->type ) {
    156 ///         std::cerr << " bound to ";
    157 ///         theClass->type->print( std::cerr );
    158 ///         std::cerr << std::endl;
    159173                                        sub.add( *theVar, theClass->type );
    160174                                } else if ( theVar != theClass->vars.begin() ) {
    161175                                        TypeInstType *newTypeInst = new TypeInstType( Type::Qualifiers(), *theClass->vars.begin(), theClass->data.kind == TypeDecl::Ftype );
    162 ///         std::cerr << " bound to variable " << *theClass->vars.begin() << std::endl;
    163176                                        sub.add( *theVar, newTypeInst );
    164177                                        delete newTypeInst;
     
    166179                        } // for
    167180                } // for
    168 ///   std::cerr << "input env is:" << std::endl;
    169 ///   print( std::cerr, 8 );
    170 ///   std::cerr << "sub is:" << std::endl;
    171 ///   sub.print( std::cerr, 8 );
    172181                sub.normalize();
    173182        }
     
    181190        std::list< EqvClass >::iterator TypeEnvironment::internal_lookup( const std::string &var ) {
    182191                for ( std::list< EqvClass >::iterator i = env.begin(); i != env.end(); ++i ) {
    183                         if ( i->vars.find( var ) == i->vars.end() ) {
    184                                 return i;
    185                         } // if
     192                        if ( i->vars.count( var ) ) return i;
    186193                } // for
    187194                return env.end();
     
    190197        void TypeEnvironment::simpleCombine( const TypeEnvironment &second ) {
    191198                env.insert( env.end(), second.env.begin(), second.env.end() );
    192         }
    193 
    194         void TypeEnvironment::combine( const TypeEnvironment &second, Type *(*combineFunc)( Type*, Type* ) ) {
    195                 TypeEnvironment secondCopy( second );
    196                 for ( std::list< EqvClass >::iterator firstClass = env.begin(); firstClass != env.end(); ++firstClass ) {
    197                         EqvClass &newClass = *firstClass;
    198                         std::set< std::string > newVars;
    199                         for ( std::set< std::string >::const_iterator var = firstClass->vars.begin(); var != firstClass->vars.end(); ++var ) {
    200                                 std::list< EqvClass >::iterator secondClass = secondCopy.internal_lookup( *var );
    201                                 if ( secondClass != secondCopy.env.end() ) {
    202                                         newVars.insert( secondClass->vars.begin(), secondClass->vars.end() );
    203                                         if ( secondClass->type ) {
    204                                                 if ( newClass.type ) {
    205                                                         Type *newType = combineFunc( newClass.type, secondClass->type );
    206                                                         delete newClass.type;
    207                                                         newClass.type = newType;
    208                                                         newClass.allowWidening = newClass.allowWidening && secondClass->allowWidening;
    209                                                 } else {
    210                                                         newClass.type = secondClass->type->clone();
    211                                                         newClass.allowWidening = secondClass->allowWidening;
    212                                                 } // if
    213                                         } // if
    214                                         secondCopy.env.erase( secondClass );
    215                                 } // if
    216                         } // for
    217                         newClass.vars.insert( newVars.begin(), newVars.end() );
    218                 } // for
    219                 for ( std::list< EqvClass >::iterator secondClass = secondCopy.env.begin(); secondClass != secondCopy.env.end(); ++secondClass ) {
    220                         env.push_back( *secondClass );
    221                 } // for
    222199        }
    223200
     
    241218        }
    242219
     220        bool isFtype( Type *type ) {
     221                if ( dynamic_cast< FunctionType* >( type ) ) {
     222                        return true;
     223                } else if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( type ) ) {
     224                        return typeInst->get_isFtype();
     225                } // if
     226                return false;
     227        }
     228
     229        bool tyVarCompatible( const TypeDecl::Data & data, Type *type ) {
     230                switch ( data.kind ) {
     231                  case TypeDecl::Dtype:
     232                        // to bind to an object type variable, the type must not be a function type.
     233                        // if the type variable is specified to be a complete type then the incoming
     234                        // type must also be complete
     235                        // xxx - should this also check that type is not a tuple type and that it's not a ttype?
     236                        return ! isFtype( type ) && (! data.isComplete || type->isComplete() );
     237                  case TypeDecl::Ftype:
     238                        return isFtype( type );
     239                  case TypeDecl::Ttype:
     240                        // ttype unifies with any tuple type
     241                        return dynamic_cast< TupleType * >( type ) || Tuples::isTtype( type );
     242                } // switch
     243                return false;
     244        }
     245
     246        bool TypeEnvironment::bindVar( TypeInstType *typeInst, Type *bindTo, const TypeDecl::Data & data, AssertionSet &need, AssertionSet &have, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer ) {
     247               
     248                // remove references from other, so that type variables can only bind to value types
     249                bindTo = bindTo->stripReferences();
     250                OpenVarSet::const_iterator tyvar = openVars.find( typeInst->get_name() );
     251                assert( tyvar != openVars.end() );
     252                if ( ! tyVarCompatible( tyvar->second, bindTo ) ) {
     253                        return false;
     254                } // if
     255                if ( occurs( bindTo, typeInst->get_name(), *this ) ) {
     256                        return false;
     257                } // if
     258                auto curClass = internal_lookup( typeInst->get_name() );
     259                if ( curClass != env.end() ) {
     260                        if ( curClass->type ) {
     261                                Type *common = 0;
     262                                // attempt to unify equivalence class type (which has qualifiers stripped, so they must be restored) with the type to bind to
     263                                std::unique_ptr< Type > newType( curClass->type->clone() );
     264                                newType->get_qualifiers() = typeInst->get_qualifiers();
     265                                if ( unifyInexact( newType.get(), bindTo, *this, need, have, openVars, widenMode & WidenMode( curClass->allowWidening, true ), indexer, common ) ) {
     266                                        if ( common ) {
     267                                                common->get_qualifiers() = Type::Qualifiers{};
     268                                                curClass->set_type( common );
     269                                        } // if
     270                                } else return false;
     271                        } else {
     272                                Type* newType = bindTo->clone();
     273                                newType->get_qualifiers() = Type::Qualifiers{};
     274                                curClass->set_type( newType );
     275                                curClass->allowWidening = widenMode.widenFirst && widenMode.widenSecond;
     276                        } // if
     277                } else {
     278                        EqvClass newClass;
     279                        newClass.vars.insert( typeInst->get_name() );
     280                        newClass.type = bindTo->clone();
     281                        newClass.type->get_qualifiers() = Type::Qualifiers();
     282                        newClass.allowWidening = widenMode.widenFirst && widenMode.widenSecond;
     283                        newClass.data = data;
     284                        env.push_back( std::move(newClass) );
     285                } // if
     286                return true;
     287        }
     288
     289        bool TypeEnvironment::bindVarToVar( TypeInstType *var1, TypeInstType *var2, const TypeDecl::Data & data, AssertionSet &need, AssertionSet &have, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer ) {
     290
     291                auto class1 = internal_lookup( var1->get_name() );
     292                auto class2 = internal_lookup( var2->get_name() );
     293               
     294                // exit early if variables already bound together
     295                if ( class1 != env.end() && class1 == class2 ) {
     296                        class1->allowWidening &= widenMode;
     297                        return true;
     298                }
     299
     300                bool widen1 = false, widen2 = false;
     301                const Type *type1 = nullptr, *type2 = nullptr;
     302
     303                // check for existing bindings, perform occurs check
     304                if ( class1 != env.end() ) {
     305                        if ( class1->type ) {
     306                                if ( occurs( class1->type, var2->get_name(), *this ) ) return false;
     307                                type1 = class1->type;
     308                        } // if
     309                        widen1 = widenMode.widenFirst && class1->allowWidening;
     310                } // if
     311                if ( class2 != env.end() ) {
     312                        if ( class2->type ) {
     313                                if ( occurs( class2->type, var1->get_name(), *this ) ) return false;
     314                                type2 = class2->type;
     315                        } // if
     316                        widen2 = widenMode.widenSecond && class2->allowWidening;
     317                } // if
     318
     319                if ( type1 && type2 ) {
     320                        // both classes bound, merge if bound types can be unified
     321                        std::unique_ptr<Type> newType1{ type1->clone() }, newType2{ type2->clone() };
     322                        WidenMode newWidenMode{ widen1, widen2 };
     323                        Type *common = 0;
     324                        if ( unifyInexact( newType1.get(), newType2.get(), *this, need, have, openVars, newWidenMode, indexer, common ) ) {
     325                                class1->vars.insert( class2->vars.begin(), class2->vars.end() );
     326                                class1->allowWidening = widen1 && widen2;
     327                                if ( common ) {
     328                                        common->get_qualifiers() = Type::Qualifiers{};
     329                                        class1->set_type( common );
     330                                }
     331                                env.erase( class2 );
     332                        } else return false;
     333                } else if ( class1 != env.end() && class2 != env.end() ) {
     334                        // both classes exist, at least one unbound, merge unconditionally
     335                        if ( type1 ) {
     336                                class1->vars.insert( class2->vars.begin(), class2->vars.end() );
     337                                class1->allowWidening = widen1;
     338                                env.erase( class2 );
     339                        } else {
     340                                class2->vars.insert( class1->vars.begin(), class1->vars.end() );
     341                                class2->allowWidening = widen2;
     342                                env.erase( class1 );
     343                        } // if
     344                } else if ( class1 != env.end() ) {
     345                        // var2 unbound, add to class1
     346                        class1->vars.insert( var2->get_name() );
     347                        class1->allowWidening = widen1;
     348                } else if ( class2 != env.end() ) {
     349                        // var1 unbound, add to class2
     350                        class2->vars.insert( var1->get_name() );
     351                        class2->allowWidening = widen2;
     352                } else {
     353                        // neither var bound, create new class
     354                        EqvClass newClass;
     355                        newClass.vars.insert( var1->get_name() );
     356                        newClass.vars.insert( var2->get_name() );
     357                        newClass.allowWidening = widen1 && widen2;
     358                        newClass.data = data;
     359                        env.push_back( std::move(newClass) );
     360                } // if
     361                return true;
     362        }
     363
     364        void TypeEnvironment::forbidWidening() {
     365                for ( EqvClass& c : env ) c.allowWidening = false;
     366        }
     367
    243368        std::ostream & operator<<( std::ostream & out, const TypeEnvironment & env ) {
    244369                env.print( out );
  • src/ResolvExpr/TypeEnvironment.h

    ra12c81f3 rc653b37  
    99// Author           : Richard C. Bilson
    1010// Created On       : Sun May 17 12:24:58 2015
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Jul 22 09:35:45 2017
    13 // Update Count     : 3
     11// Last Modified By : Aaron B. Moss
     12// Last Modified On : Mon Jun 18 11:58:00 2018
     13// Update Count     : 4
    1414//
    1515
     
    2121#include <set>                         // for set
    2222#include <string>                      // for string
     23#include <utility>                     // for move, swap
     24
     25#include "WidenMode.h"                 // for WidenMode
    2326
    2427#include "SynTree/Declaration.h"       // for TypeDecl::Data, DeclarationWit...
     
    7780                EqvClass( const EqvClass &other );
    7881                EqvClass( const EqvClass &other, const Type *ty );
     82                EqvClass( EqvClass &&other );
    7983                EqvClass &operator=( const EqvClass &other );
     84                EqvClass &operator=( EqvClass &&other );
    8085                ~EqvClass();
    8186                void print( std::ostream &os, Indenter indent = {} ) const;
     87
     88                /// Takes ownership of `ty`, freeing old `type`
     89                void set_type(Type* ty);
    8290        };
    8391
     
    8593          public:
    8694                const EqvClass* lookup( const std::string &var ) const;
    87                 void add( const EqvClass &eqvClass );
     95          private:
    8896                void add( EqvClass &&eqvClass  );
     97          public:
    8998                void add( const Type::ForallList &tyDecls );
    9099                void add( const TypeSubstitution & sub );
     
    94103                bool isEmpty() const { return env.empty(); }
    95104                void print( std::ostream &os, Indenter indent = {} ) const;
    96                 void combine( const TypeEnvironment &second, Type *(*combineFunc)( Type*, Type* ) );
     105                // void combine( const TypeEnvironment &second, Type *(*combineFunc)( Type*, Type* ) );
    97106                void simpleCombine( const TypeEnvironment &second );
    98107                void extractOpenVars( OpenVarSet &openVars ) const;
     
    103112                void addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars );
    104113
    105                 typedef std::list< EqvClass >::iterator iterator;
    106                 iterator begin() { return env.begin(); }
    107                 iterator end() { return env.end(); }
    108                 typedef std::list< EqvClass >::const_iterator const_iterator;
    109                 const_iterator begin() const { return env.begin(); }
    110                 const_iterator end() const { return env.end(); }
     114                /// Binds the type class represented by `typeInst` to the type `bindTo`; will add
     115                /// the class if needed. Returns false on failure.
     116                bool bindVar( TypeInstType *typeInst, Type *bindTo, const TypeDecl::Data & data, AssertionSet &need, AssertionSet &have, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer );
     117               
     118                /// Binds the type classes represented by `var1` and `var2` together; will add
     119                /// one or both classes if needed. Returns false on failure.
     120                bool bindVarToVar( TypeInstType *var1, TypeInstType *var2, const TypeDecl::Data & data, AssertionSet &need, AssertionSet &have, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer );
     121
     122                /// Disallows widening for all bindings in the environment
     123                void forbidWidening();
     124
     125                using iterator = std::list< EqvClass >::const_iterator;
     126                iterator begin() const { return env.begin(); }
     127                iterator end() const { return env.end(); }
     128
    111129          private:
    112130                std::list< EqvClass > env;
     131               
    113132                std::list< EqvClass >::iterator internal_lookup( const std::string &var );
    114133        };
  • src/ResolvExpr/Unify.cc

    ra12c81f3 rc653b37  
    99// Author           : Richard C. Bilson
    1010// Created On       : Sun May 17 12:27:10 2015
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Mar 16 16:22:54 2017
    13 // Update Count     : 42
     11// Last Modified By : Aaron B. Moss
     12// Last Modified On : Mon Jun 18 11:58:00 2018
     13// Update Count     : 43
    1414//
    1515
     
    129129        }
    130130
    131         bool isFtype( Type *type ) {
    132                 if ( dynamic_cast< FunctionType* >( type ) ) {
    133                         return true;
    134                 } else if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( type ) ) {
    135                         return typeInst->get_isFtype();
    136                 } // if
    137                 return false;
    138         }
    139 
    140         bool tyVarCompatible( const TypeDecl::Data & data, Type *type ) {
    141                 switch ( data.kind ) {
    142                   case TypeDecl::Dtype:
    143                         // to bind to an object type variable, the type must not be a function type.
    144                         // if the type variable is specified to be a complete type then the incoming
    145                         // type must also be complete
    146                         // xxx - should this also check that type is not a tuple type and that it's not a ttype?
    147                         return ! isFtype( type ) && (! data.isComplete || type->isComplete() );
    148                   case TypeDecl::Ftype:
    149                         return isFtype( type );
    150                   case TypeDecl::Ttype:
    151                         // ttype unifies with any tuple type
    152                         return dynamic_cast< TupleType * >( type ) || Tuples::isTtype( type );
    153                 } // switch
    154                 return false;
    155         }
    156 
    157         bool bindVar( TypeInstType *typeInst, Type *other, const TypeDecl::Data & data, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer ) {
    158                 // remove references from other, so that type variables can only bind to value types
    159                 other = other->stripReferences();
    160                 OpenVarSet::const_iterator tyvar = openVars.find( typeInst->get_name() );
    161                 assert( tyvar != openVars.end() );
    162                 if ( ! tyVarCompatible( tyvar->second, other ) ) {
    163                         return false;
    164                 } // if
    165                 if ( occurs( other, typeInst->get_name(), env ) ) {
    166                         return false;
    167                 } // if
    168                 if ( const EqvClass *curClass = env.lookup( typeInst->get_name() ) ) {
    169                         if ( curClass->type ) {
    170                                 Type *common = 0;
    171                                 // attempt to unify equivalence class type (which has qualifiers stripped, so they must be restored) with the type to bind to
    172                                 std::unique_ptr< Type > newType( curClass->type->clone() );
    173                                 newType->get_qualifiers() = typeInst->get_qualifiers();
    174                                 if ( unifyInexact( newType.get(), other, env, needAssertions, haveAssertions, openVars, widenMode & WidenMode( curClass->allowWidening, true ), indexer, common ) ) {
    175                                         if ( common ) {
    176                                                 common->get_qualifiers() = Type::Qualifiers();
    177                                                 env.add( EqvClass{ *curClass, common } );
    178                                         } // if
    179                                         return true;
    180                                 } else {
    181                                         return false;
    182                                 } // if
    183                         } else {
    184                                 EqvClass newClass { *curClass, other };
    185                                 newClass.type->get_qualifiers() = Type::Qualifiers();
    186                                 newClass.allowWidening = widenMode.widenFirst && widenMode.widenSecond;
    187                                 env.add( std::move(newClass) );
    188                         } // if
    189                 } else {
    190                         EqvClass newClass;
    191                         newClass.vars.insert( typeInst->get_name() );
    192                         newClass.type = other->clone();
    193                         newClass.type->get_qualifiers() = Type::Qualifiers();
    194                         newClass.allowWidening = widenMode.widenFirst && widenMode.widenSecond;
    195                         newClass.data = data;
    196                         env.add( newClass );
    197                 } // if
    198                 return true;
    199         }
    200 
    201         bool bindVarToVar( TypeInstType *var1, TypeInstType *var2, const TypeDecl::Data & data, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer ) {
    202                 bool result = true;
    203                 const EqvClass *class1 = env.lookup( var1->get_name() );
    204                 const EqvClass *class2 = env.lookup( var2->get_name() );
    205                 bool widen1 = false, widen2 = false;
    206                 Type *type1 = nullptr, *type2 = nullptr;
    207 
    208                 if ( class1 ) {
    209                         if ( class1->type ) {
    210                                 if ( occurs( class1->type, var2->get_name(), env ) ) {
    211                                         return false;
    212                                 } // if
    213                                 type1 = class1->type->clone();
    214                         } // if
    215                         widen1 = widenMode.widenFirst && class1->allowWidening;
    216                 } // if
    217                 if ( class2 ) {
    218                         if ( class2->type ) {
    219                                 if ( occurs( class2->type, var1->get_name(), env ) ) {
    220                                         return false;
    221                                 } // if
    222                                 type2 = class2->type->clone();
    223                         } // if
    224                         widen2 = widenMode.widenSecond && class2->allowWidening;
    225                 } // if
    226 
    227                 if ( type1 && type2 ) {
    228 //    std::cerr << "has type1 && type2" << std::endl;
    229                         WidenMode newWidenMode ( widen1, widen2 );
    230                         Type *common = 0;
    231                         if ( unifyInexact( type1, type2, env, needAssertions, haveAssertions, openVars, newWidenMode, indexer, common ) ) {
    232                                 EqvClass newClass1 = *class1;
    233                                 newClass1.vars.insert( class2->vars.begin(), class2->vars.end() );
    234                                 newClass1.allowWidening = widen1 && widen2;
    235                                 if ( common ) {
    236                                         common->get_qualifiers() = Type::Qualifiers();
    237                                         delete newClass1.type;
    238                                         newClass1.type = common;
    239                                 } // if
    240                                 env.add( std::move(newClass1) );
    241                         } else {
    242                                 result = false;
    243                         } // if
    244                 } else if ( class1 && class2 ) {
    245                         if ( type1 ) {
    246                                 EqvClass newClass1 = *class1;
    247                                 newClass1.vars.insert( class2->vars.begin(), class2->vars.end() );
    248                                 newClass1.allowWidening = widen1;
    249                                 env.add( std::move(newClass1) );
    250                         } else {
    251                                 EqvClass newClass2 = *class2;
    252                                 newClass2.vars.insert( class1->vars.begin(), class1->vars.end() );
    253                                 newClass2.allowWidening = widen2;
    254                                 env.add( std::move(newClass2) );
    255                         } // if
    256                 } else if ( class1 ) {
    257                         EqvClass newClass1 = *class1;
    258                         newClass1.vars.insert( var2->get_name() );
    259                         newClass1.allowWidening = widen1;
    260                         env.add( std::move(newClass1) );
    261                 } else if ( class2 ) {
    262                         EqvClass newClass2 = *class2;
    263                         newClass2.vars.insert( var1->get_name() );
    264                         newClass2.allowWidening = widen2;
    265                         env.add( std::move(newClass2) );
    266                 } else {
    267                         EqvClass newClass;
    268                         newClass.vars.insert( var1->get_name() );
    269                         newClass.vars.insert( var2->get_name() );
    270                         newClass.allowWidening = widen1 && widen2;
    271                         newClass.data = data;
    272                         env.add( newClass );
    273                 } // if
    274                 delete type1;
    275                 delete type2;
    276                 return result;
    277         }
    278 
    279131        bool unify( Type *type1, Type *type2, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, OpenVarSet &openVars, const SymTab::Indexer &indexer ) {
    280132                OpenVarSet closedVars;
     
    321173
    322174                if ( isopen1 && isopen2 && entry1->second == entry2->second ) {
    323                         result = bindVarToVar( var1, var2, entry1->second, env, needAssertions, haveAssertions, openVars, widenMode, indexer );
     175                        result = env.bindVarToVar( var1, var2, entry1->second, needAssertions, haveAssertions, openVars, widenMode, indexer );
    324176                } else if ( isopen1 ) {
    325                         result = bindVar( var1, type2, entry1->second, env, needAssertions, haveAssertions, openVars, widenMode, indexer );
     177                        result = env.bindVar( var1, type2, entry1->second, needAssertions, haveAssertions, openVars, widenMode, indexer );
    326178                } else if ( isopen2 ) { // TODO: swap widenMode values in call, since type positions are flipped?
    327                         result = bindVar( var2, type1, entry2->second, env, needAssertions, haveAssertions, openVars, widenMode, indexer );
     179                        result = env.bindVar( var2, type1, entry2->second, needAssertions, haveAssertions, openVars, widenMode, indexer );
    328180                } else {
    329181                        PassVisitor<Unify> comparator( type2, env, needAssertions, haveAssertions, openVars, widenMode, indexer );
  • src/ResolvExpr/Unify.h

    ra12c81f3 rc653b37  
    99// Author           : Richard C. Bilson
    1010// Created On       : Sun May 17 13:09:04 2015
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Jul 21 23:09:34 2017
    13 // Update Count     : 3
     11// Last Modified By : Aaron B. Moss
     12// Last Modified On : Mon Jun 18 11:58:00 2018
     13// Update Count     : 4
    1414//
    1515
     
    2121#include "SynTree/Declaration.h"  // for TypeDecl, TypeDecl::Data
    2222#include "TypeEnvironment.h"      // for AssertionSet, OpenVarSet
     23#include "WidenMode.h"            // for WidenMode
    2324
    2425class Type;
     
    2930
    3031namespace ResolvExpr {
    31         struct WidenMode {
    32                 WidenMode( bool widenFirst, bool widenSecond ): widenFirst( widenFirst ), widenSecond( widenSecond ) {}
    33                 WidenMode &operator|=( const WidenMode &other ) { widenFirst |= other.widenFirst; widenSecond |= other.widenSecond; return *this; }
    34                 WidenMode &operator&=( const WidenMode &other ) { widenFirst &= other.widenFirst; widenSecond &= other.widenSecond; return *this; }
    35                 WidenMode operator|( const WidenMode &other ) { WidenMode newWM( *this ); newWM |= other; return newWM; }
    36                 WidenMode operator&( const WidenMode &other ) { WidenMode newWM( *this ); newWM &= other; return newWM; }
    37                 operator bool() { return widenFirst && widenSecond; }
    38 
    39                 bool widenFirst : 1, widenSecond : 1;
    40         };
    41 
    42         bool bindVar( TypeInstType *typeInst, Type *other, const TypeDecl::Data & data, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer );
    4332        bool unify( Type *type1, Type *type2, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, OpenVarSet &openVars, const SymTab::Indexer &indexer );
    4433        bool unify( Type *type1, Type *type2, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, OpenVarSet &openVars, const SymTab::Indexer &indexer, Type *&commonType );
    4534        bool unifyExact( Type *type1, Type *type2, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, OpenVarSet &openVars, const SymTab::Indexer &indexer );
     35        bool unifyInexact( Type *type1, Type *type2, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, const OpenVarSet &openVars, WidenMode widenMode, const SymTab::Indexer &indexer, Type *&common );
    4636
    4737        template< typename Iterator1, typename Iterator2 >
  • src/SynTree/Type.cc

    ra12c81f3 rc653b37  
    1010// Created On       : Mon May 18 07:44:20 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Sep 25 15:16:32 2017
    13 // Update Count     : 38
     12// Last Modified On : Fri Jun 22 10:17:19 2018
     13// Update Count     : 39
    1414//
    1515#include "Type.h"
     
    6969
    7070// These must remain in the same order as the corresponding bit fields.
    71 const char * Type::FuncSpecifiersNames[] = { "inline", "fortran", "_Noreturn" };
     71const char * Type::FuncSpecifiersNames[] = { "inline", "_Noreturn", "fortran" };
    7272const char * Type::StorageClassesNames[] = { "extern", "static", "auto", "register", "_Thread_local" };
    7373const char * Type::QualifiersNames[] = { "const", "restrict", "volatile", "lvalue", "mutex", "_Atomic" };
  • src/tests/.gitignore

    ra12c81f3 rc653b37  
    11.out/
    22.err/
     3.type
  • src/tests/concurrent/coroutineYield.c

    ra12c81f3