Changeset 9cdfb4d0


Ignore:
Timestamp:
Jan 22, 2018, 3:09:01 PM (5 years ago)
Author:
Aaron Moss <a3moss@…>
Branches:
ADT, aaron-thesis, arm-eh, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
Children:
e23d20b
Parents:
326cd2b (diff), 4bf3b2b (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' of plg.uwaterloo.ca:software/cfa/cfa-cc

Files:
7 added
12 deleted
93 edited
26 moved

Legend:

Unmodified
Added
Removed
  • doc/proposals/concurrency/.gitignore

    r326cd2b r9cdfb4d0  
    2424build/*.toc
    2525*.pdf
     26*.png
     27figures/*.tex
    2628
    2729examples
  • doc/proposals/concurrency/Makefile

    r326cd2b r9cdfb4d0  
    11## Define the appropriate configuration variables.
    22
    3 TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:
     3TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:/usr/local/bibliographies:
    44LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=build -interaction=nonstopmode
    55BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex -terse
     
    7575        build/*.tex     \
    7676        build/*.toc     \
     77        build/*.lof     \
     78        build/*.lol     \
     79        build/*.lot     \
     80        figures/*.tex   \
     81        *.png           \
    7782
    7883
     
    117122        fig2dev -L pstex_t -p $@ $< > $@_t
    118123
     124figures/%.tex: build/%.pstex
     125        echo -n         "\documentclass[preview]{standalone}\n"         \
     126                        "\usepackage[T1]{fontenc}\n"                    \
     127                        "\usepackage[usenames]{color}\n"                \
     128                        "\usepackage{graphicx}\n"                       \
     129                        "\usepackage{listings}\n"                       \
     130                        "\usepackage{xspace}\n"                         \
     131                        "\input{style}\n"                               \
     132                        "\\\\begin{document}\n"                         \
     133                        "{\\\\resizebox{3\\\\textwidth}{!}{\input{${basename ${notdir $@}}.pstex_t}}}\n" \
     134                        "\end{document}" > $@
     135
     136%.png : build/%.pstex figures/%.tex
     137        echo ${basename $@}
     138        ${LaTeX} figures/${basename $@}.tex
     139        dvips build/${basename $@}.dvi -o build/${basename $@}.ps
     140        ps2pdf build/${basename $@}.ps
     141        convert -negate ${basename $@}.pdf $@
     142
     143
    119144
    120145# Local Variables: #
  • doc/proposals/concurrency/annex/glossary.tex

    r326cd2b r9cdfb4d0  
    9898\newacronym{tls}{TLS}{Thread Local Storage}
    9999\newacronym{api}{API}{Application Program Interface}
    100 \newacronym{raii}{RAII}{Ressource Acquisition Is Initialization}
     100\newacronym{raii}{RAII}{Resource Acquisition Is Initialization}
    101101\newacronym{numa}{NUMA}{Non-Uniform Memory Access}
  • doc/proposals/concurrency/annex/local.bib

    r326cd2b r9cdfb4d0  
    3636
    3737@article{TBB,
     38        key     = {TBB},
    3839        keywords        = {Intel, TBB},
    3940        title   = {Intel Thread Building Blocks},
     
    4243
    4344@manual{www-cfa,
     45        key     = {CFA},
    4446        keywords        = {Cforall},
    45         title   = {Cforall Programmming Language},
    46         address = {https://plg.uwaterloo.ca/~cforall/}
     47        author  = {C$\forall$},
     48        title   = {C$\forall$ Programmming Language},
     49        note    = {\url{https://plg.uwaterloo.ca/~cforall}},
    4750}
    4851
    49 @article{rob-thesis,
     52@mastersthesis{rob-thesis,
    5053        keywords        = {Constructors, Destructors, Tuples},
    5154        author  = {Rob Schluntz},
    5255        title   = {Resource Management and Tuples in Cforall},
    53         year            = 2017
     56        year            = 2017,
     57        school  = {University of Waterloo},
     58        note    = {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}},
    5459}
    5560
     
    6469
    6570@article{BankTransfer,
     71        key     = {Bank Transfer},
    6672        keywords        = {Bank Transfer},
    6773        title   = {Bank Account Transfer Problem},
     
    8995}
    9096
    91 @book{Herlihy93,
    92         title={Transactional memory: Architectural support for lock-free data structures},
    93         author={Herlihy, Maurice and Moss, J Eliot B},
    94         volume={21},
    95         number={2},
    96         year={1993},
    97         publisher={ACM}
     97@article{Herlihy93,
     98        author  = {Herlihy, Maurice and Moss, J. Eliot B.},
     99        title   = {Transactional memory: architectural support for lock-free data structures},
     100        journal = {SIGARCH Comput. Archit. News},
     101        issue_date      = {May 1993},
     102        volume  = {21},
     103        number  = {2},
     104        month   = may,
     105        year    = {1993},
     106        pages   = {289--300},
     107        numpages        = {12},
     108        publisher       = {ACM},
     109        address = {New York, NY, USA},
    98110}
    99111
    100112@manual{affinityLinux,
     113        key     = {TBB},
    101114        title           = "{Linux man page - sched\_setaffinity(2)}"
    102115}
     
    122135}
    123136
    124 
    125137@misc{NodeJs,
    126138        title           = "{Node.js}",
  • doc/proposals/concurrency/figures/system.fig

    r326cd2b r9cdfb4d0  
    93934 0 0 50 -1 0 11 0.0000 2 120 570 4125 7126 thread 4\001
    9494-6
     956 6975 4050 9525 7875
     962 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
     97         7125 5400 7575 5400 7575 5850 7125 5850 7125 5400
     982 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
     99         7125 4200 7575 4200 7575 4650 7125 4650 7125 4200
     1002 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
     101         7125 4800 7575 4800 7575 5250 7125 5250 7125 4800
     1022 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
     103         6975 4050 9525 4050 9525 7875 6975 7875 6975 4050
     1043 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
     105         7350 6900 7500 6975 7200 7050 7500 7125 7200 7200 7500 7275
     106         7200 7350 7500 7425 7200 7500 7350 7575
     107         0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
     108         -1.000 0.000
     1093 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
     110         7350 6000 7500 6075 7200 6150 7500 6225 7200 6300 7500 6375
     111         7200 6450 7500 6525 7200 6600 7350 6675
     112         0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
     113         -1.000 0.000
     1144 0 0 50 -1 0 11 0.0000 2 120 945 7725 4500 Pthread stack\001
     1154 0 0 50 -1 0 11 0.0000 2 150 1530 7725 5100 Pthread stack (stolen)\001
     1164 0 0 50 -1 0 11 0.0000 2 120 540 7725 6375 Pthread\001
     1174 0 0 50 -1 0 11 0.0000 2 150 1065 7725 7275 $\\CFA$ thread\001
     1184 0 0 50 -1 0 11 0.0000 2 150 990 7725 5700 $\\CFA$ stack\001
     119-6
    951201 2 0 1 0 7 50 -1 -1 0.000 1 3.1416 3150 5250 750 450 2400 4800 3900 5700
    961212 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
     
    1001252 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
    101126         5550 3900 3825 5025
    102 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
    103          7125 3525 7575 3525 7575 3975 7125 3975 7125 3525
    104 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
    105          6975 2175 9525 2175 9525 6000 6975 6000 6975 2175
    1061272 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
    107128         900 6225 2400 5400
     
    1221432 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
    123144         5100 975 5850 975 5850 2625 5100 2625 5100 975
    124 2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
    125          7125 2325 7575 2325 7575 2775 7125 2775 7125 2325
    126 2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
    127          7125 2925 7575 2925 7575 3375 7125 3375 7125 2925
    1281452 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
    129146         525 7425 1275 7425 1275 9075 525 9075 525 7425
     
    1431602 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
    144161         2025 2625 2775 2625 2775 975 2025 975 2025 2625
    145 3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
    146          7350 5025 7500 5100 7200 5175 7500 5250 7200 5325 7500 5400
    147          7200 5475 7500 5550 7200 5625 7350 5700
    148          0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
    149          -1.000 0.000
    150 3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
    151          7350 4125 7500 4200 7200 4275 7500 4350 7200 4425 7500 4500
    152          7200 4575 7500 4650 7200 4725 7350 4800
    153          0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
    154          -1.000 0.000
    1551624 0 0 50 -1 0 18 0.0000 2 30 225 4500 3150 ...\001
    1561634 0 0 50 -1 0 18 0.0000 2 30 225 3750 4500 ...\001
    1571644 0 0 50 -1 0 11 0.0000 2 120 705 2775 5325 Scheduler\001
    158 4 0 0 50 -1 0 11 0.0000 2 120 945 7725 2625 Pthread stack\001
    159 4 0 0 50 -1 0 11 0.0000 2 150 1530 7725 3225 Pthread stack (stolen)\001
    160 4 0 0 50 -1 0 11 0.0000 2 120 540 7725 4500 Pthread\001
    161 4 0 0 50 -1 0 11 0.0000 2 150 1065 7725 5400 $\\CFA$ thread\001
    1621654 0 0 50 -1 0 18 0.0000 2 30 225 4950 6600 ...\001
    1631664 0 0 50 -1 0 18 0.0000 2 30 225 4200 5850 ...\001
    164 4 0 0 50 -1 0 11 0.0000 2 150 990 7725 3825 $\\CFA$ stack\001
  • doc/proposals/concurrency/text/basics.tex

    r326cd2b r9cdfb4d0  
    99At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution.
    1010
    11 Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
    12 
    13 Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller?s stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (aka non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
     11Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread's perspective) across the stacks is called concurrency.
     12
     13Therefore, a minimal concurrency system can be achieved by creating coroutines (see Section \ref{coroutine}), which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller's stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a., non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
    1414
    1515A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context switches occur. Mutual exclusion and synchronization are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desirable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
    1616
    1717\section{\protect\CFA's Thread Building Blocks}
    18 One of the important features that are missing in C is threading. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
     18One of the important features that are missing in C is threading\footnote{While the C11 standard defines a ``threads.h'' header, it is minimal and defined as optional. As such, library support for threading is far from widespread. At the time of writing the thesis, neither \texttt{gcc} nor \texttt{clang} support ``threads.h'' in their respective standard libraries.}. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
    1919
    2020\section{Coroutines: A Stepping Stone}\label{coroutine}
    21 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context switches and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.
     21While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. \textbf{Coroutine}s are generalized routines which have predefined points where execution is suspended and can be resumed at a later time. Therefore, they need to deal with context switches and other context-management operations. This proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.
    2222
    2323\begin{table}
     
    129129\end{tabular}
    130130\end{center}
    131 \caption{Different implementations of a Fibonacci sequence generator in C.},
     131\caption{Different implementations of a Fibonacci sequence generator in C.}
    132132\label{lst:fibonacci-c}
    133133\end{table}
    134134
    135 A good example of a problem made easier with coroutines is generators, like the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Table \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
     135A good example of a problem made easier with coroutines is generators, e.g., generating the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Listing \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
    136136
    137137Listing \ref{lst:fibonacci-cfa} is an example of a solution to the Fibonacci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the next generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the implementation is very similar to the \code{fibonacci_func} example.
     
    233233One important design challenge for implementing coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
    234234
    235 The runtime system needs to create the coroutine?s stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expect to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.
     235The runtime system needs to create the coroutine's stack and, more importantly, prepare it for the first resumption. The timing of the creation is non-trivial since users expect both to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.
    236236
    237237Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when cast to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
     
    268268}
    269269\end{ccode}
    270 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes Undefined Behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.
     270The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.
    271271
    272272\subsection{Alternative: Composition}
     
    310310symmetric_coroutine<>::yield_type
    311311\end{cfacode}
    312 Often, the canonical threading paradigm in languages is based on function pointers, pthread being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
    313 
    314 A variation of this would be to use a simple function pointer in the same way pthread does for threads :
     312Often, the canonical threading paradigm in languages is based on function pointers, \texttt{pthread} being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
     313
     314A variation of this would be to use a simple function pointer in the same way \texttt{pthread} does for threads:
    315315\begin{cfacode}
    316316void foo( coroutine_t cid, void* arg ) {
     
    329329\subsection{Alternative: Trait-Based Coroutines}
    330330
    331 Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} and is used as a coroutine.
     331Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} (as defined below) and is used as a coroutine.
    332332
    333333\begin{cfacode}
     
    340340forall( dtype T | is_coroutine(T) ) void resume (T&);
    341341\end{cfacode}
    342 This ensures an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} only has the effect of implementing the getter and forward declarations required for users to implement the main routine.
     342This ensures that an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} simply has the effect of implementing the getter and forward declarations required for users to implement the main routine.
    343343
    344344\begin{center}
     
    385385\end{cfacode}
    386386
    387 Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics using overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
     387Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
    388388\begin{cfacode}
    389389thread foo {};
     
    425425A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
    426426
    427 Of course for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
     427Of course, for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
    428428\begin{cfacode}
    429429thread World;
     
    466466\end{cfacode}
    467467
    468 However, one of the drawbacks of this approach is that threads always form a lattice, i.e., they are always destroyed in the opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
     468However, one of the drawbacks of this approach is that threads always form a tree where nodes must always outlive their children, i.e., they are always destroyed in the opposite order of construction because of C scoping rules. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
    469469
    470470\begin{cfacode}
  • doc/proposals/concurrency/text/cforall.tex

    r326cd2b r9cdfb4d0  
    5252% ======================================================================
    5353\section{Operators}
    54 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation occur, e.g.:
     54Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation appear, e.g.:
    5555\begin{cfacode}
    5656int ++? (int op);                       //unary prefix increment
     
    7272% ======================================================================
    7373\section{Constructors/Destructors}
    74 Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors :
     74Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors:
    7575\begin{cfacode}
    7676struct S {
     
    107107% ======================================================================
    108108\section{Parametric Polymorphism}
    109 Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clause, which gives \CFA its name. \code{forall} clauses allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition :
     109\label{s:ParametricPolymorphism}
     110Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clauses, which allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition:
    110111\begin{cfacode}
    111112//constraint type, 0 and +
     
    124125Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints:
    125126\begin{cfacode}
    126 trait sumable( otype T ) {
     127trait summable( otype T ) {
    127128        void ?{}(T *, zero_t);          //constructor from 0 literal
    128129        T ?+?(T, T);                            //assortment of additions
     
    131132        T ?++(T *);
    132133};
    133 forall( otype T | sumable(T) )  //use trait
     134forall( otype T | summable(T) ) //use trait
    134135T sum(T a[], size_t size);
    135136\end{cfacode}
     
    139140% ======================================================================
    140141\section{with Clause/Statement}
    141 Since \CFA lacks the concept of a receiver, certain functions end-up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
     142Since \CFA lacks the concept of a receiver, certain functions end up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
    142143\begin{cfacode}
    143144struct S { int i, j; };
  • doc/proposals/concurrency/text/concurrency.tex

    r326cd2b r9cdfb4d0  
    44% ======================================================================
    55% ======================================================================
    6 Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared-state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
     6Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared state, some languages and libraries simply disallow mutable shared state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
    77
    88Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
     
    1616
    1717\subsection{Mutual-Exclusion}
    18 As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to  higher-level mutual-exclusion methods, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g.: reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.
     18As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to  higher-level concurrency techniques, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.
    1919
    2020\subsection{Synchronization}
    21 As for mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete again to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.
     21As with mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding either better coupling between synchronization and data (e.g., message passing) or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called \textbf{barging}. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.
    2222
    2323% ======================================================================
     
    2626% ======================================================================
    2727% ======================================================================
    28 A monitor is a set of routines that ensure mutual exclusion when accessing shared state. This concept is generally associated with Object-Oriented Languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it :
     28A \textbf{monitor} is a set of routines that ensure mutual-exclusion when accessing shared state. More precisely, a monitor is a programming technique that associates mutual-exclusion to routine scopes, as opposed to mutex locks, where mutual-exclusion is defined by lock/release calls independently of any scoping of the calling routine. This strong association eases readability and maintainability, at the cost of flexibility. Note that both monitors and mutex locks, require an abstract handle to identify them. This concept is generally associated with object-oriented languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it:
    2929\begin{cfacode}
    3030typedef /*some monitor type*/ monitor;
    31 int f(monitor& m);
     31int f(monitor & m);
    3232
    3333int main() {
     
    4242% ======================================================================
    4343% ======================================================================
    44 The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are implicitly non-copy-able objects (\code{dtype}).
    45 
    46 Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Pass through can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter :
     44The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are non-copy-able objects (\code{dtype}).
     45
     46Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Passthrough can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter:
    4747
    4848\begin{cfacode}
     
    7171\end{tabular}
    7272\end{center}
    73 Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to \CC \code{atomic} template.
     73Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template \code{std::atomic}.
    7474
    7575Here, the constructor (\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. Furthermore, it allows the implementation greater freedom when it initializes the monitor locking. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
     
    9393\end{figure}
    9494
    95 Having both \code{mutex} and \code{nomutex} keywords is redundant based on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
     95Having both \code{mutex} and \code{nomutex} keywords can be redundant, depending on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
    9696
    9797The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations:
    9898\begin{cfacode}
    99 int f1(monitor& mutex m);
     99int f1(monitor & mutex m);
    100100int f2(const monitor & mutex m);
    101 int f3(monitor** mutex m);
    102 int f4(monitor* mutex m []);
    103 int f5(graph(monitor*) & mutex m);
     101int f3(monitor ** mutex m);
     102int f4(monitor * mutex m []);
     103int f5(graph(monitor *) & mutex m);
    104104\end{cfacode}
    105105The problem is to identify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to identify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then, making sure objects are only acquired once becomes none-trivial. This problem can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this project imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with at most one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
    106106\begin{cfacode}
    107 int f1(monitor& mutex m);   //Okay : recommended case
    108 int f2(monitor* mutex m);   //Okay : could be an array but probably not
     107int f1(monitor & mutex m);    //Okay : recommended case
     108int f2(monitor * mutex m);    //Not Okay : Could be an array
    109109int f3(monitor mutex m []);  //Not Okay : Array of unknown length
    110 int f4(monitor** mutex m);  //Not Okay : Could be an array
    111 int f5(monitor* mutex m []); //Not Okay : Array of unknown length
     110int f4(monitor ** mutex m);   //Not Okay : Could be an array
     111int f5(monitor * mutex m []); //Not Okay : Array of unknown length
    112112\end{cfacode}
    113113Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
    114114
    115 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to acquire mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
     115Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to specify the object that acquires mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
    116116\begin{cfacode}
    117117int f(MonitorA & mutex a, MonitorB & mutex b);
     
    137137The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
    138138
    139 However, such use leads to the lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore Undefined Behaviour. As shown~\cite{Lister77}, solving this problem requires:
     139However, such use leads to lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore undefined behaviour. As shown~\cite{Lister77}, solving this problem requires:
    140140\begin{enumerate}
    141         \item Dynamically tracking of the monitor-call order.
     141        \item Dynamically tracking the monitor-call order.
    142142        \item Implement rollback semantics.
    143143\end{enumerate}
     
    159159\subsection{\code{mutex} statement} \label{mutex-stmt}
    160160
    161 The call semantics discussed above have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
     161The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
    162162
    163163\begin{table}
     
    216216\end{cfacode}
    217217
    218 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is :
     218Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is:
    219219\begin{cfacode}
    220220trait is_monitor(dtype T) {
     
    223223};
    224224\end{cfacode}
    225 Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is Undefined Behaviour after the destructor has run.
     225Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is undefined behaviour after the destructor has run.
    226226
    227227% ======================================================================
     
    230230% ======================================================================
    231231% ======================================================================
    232 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
    233 
    234 First, here is a simple example of internal scheduling :
     232In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. With \textbf{scheduling} loosely defined as deciding which thread acquires the critical section next, \textbf{internal scheduling} means making the decision from inside the critical section (i.e., with access to the shared state), while \textbf{external scheduling} means making the decision when entering the critical section (i.e., without access to the shared state). Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
     233
     234First, here is a simple example of internal scheduling:
    235235
    236236\begin{cfacode}
     
    253253}
    254254\end{cfacode}
    255 There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semantic is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.
    256 
    257 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
     255There are two details to note here. First, \code{signal} is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section. This semantics is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.
     256
     257An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
    258258
    259259% ======================================================================
     
    262262% ======================================================================
    263263% ======================================================================
    264 It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly names the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.
     264It is easy to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.
    265265
    266266\begin{multicols}{2}
     
    299299\noindent This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers a group of monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
    300300
    301 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem :
     301While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem:
    302302\begin{multicols}{2}
    303303\begin{pseudo}
     
    351351% ======================================================================
    352352
    353 A larger example is presented to show complex issues for \gls{bulk-acq} and all the implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex}-statement.
     353A larger example is presented to show complex issues for \gls{bulk-acq} and its implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex} statement.
    354354
    355355\begin{figure}[!t]
     
    446446\end{figure}
    447447
    448 The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options.
     448The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options:
    449449
    450450\subsubsection{Delaying Signals}
    451 The obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (i.e., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until it can be disbanded, which means the group is not passed further and a thread can retain its locks.
    452 
    453 However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal  becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen :
     451The obvious solution to the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed, because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (e.g., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until all its monitors are released, which means the group is not passed further and a thread can retain its locks.
     452
     453However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal  becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen:
    454454
    455455\paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor \code{A} needs to be passed to thread $\beta$ when thread $\alpha$ is done with it.
     
    459459Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line \ref{line:signal-a} before line \ref{line:signal-ab} and get the reverse effect for listing \ref{lst:dependency}.
    460460
    461 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to dispand a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
     461In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to release a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
    462462
    463463\subsubsection{Dependency graphs}
     
    501501\end{figure}
    502502
    503 In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner close to polynomial. This complexity explosion can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
     503In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and has a super-linear complexity. This complexity can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
    504504\begin{figure}
    505505\begin{multicols}{2}
     
    530530\end{figure}
    531531
    532 Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
     532Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependencies unfold. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
    533533
    534534\subsubsection{Partial Signalling} \label{partial-sig}
    535 Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions, passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.
    536 
    537 Using partial signalling, listing \ref{lst:dependency} can be solved easily :
     535Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.
     536
     537Using partial signalling, listing \ref{lst:dependency} can be solved easily:
    538538\begin{itemize}
    539539        \item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor \code{B} to thread $\alpha$ and continues to hold monitor \code{A}.
     
    652652An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine.
    653653
    654 The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits, this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the frond end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.
     654The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.
    655655
    656656% ======================================================================
     
    659659% ======================================================================
    660660% ======================================================================
    661 An alternative to internal scheduling is external scheduling, e.g., in \uC.
    662 \begin{center}
     661An alternative to internal scheduling is external scheduling (see Table~\ref{tbl:sched}).
     662\begin{table}
    663663\begin{tabular}{|c|c|c|}
    664664Internal Scheduling & External Scheduling & Go\\
     
    720720\end{gocode}
    721721\end{tabular}
    722 \end{center}
    723 This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
     722\caption{Different forms of scheduling.}
     723\label{tbl:sched}
     724\end{table}
     725This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
    724726
    725727For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no other routine than \code{V} can acquire the monitor.
     
    761763\end{tabular}
    762764\end{center}
    763 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in the following figure:
    764 
    765 \begin{figure}[H]
    766 \begin{center}
    767 {\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    768 \end{center}
    769 \label{fig:monitor}
     765For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}.
     766
     767\begin{figure}
     768\centering
     769\subfloat[Classical Monitor] {
     770\label{fig:ClassicalMonitor}
     771{\resizebox{0.45\textwidth}{!}{\input{monitor}}}
     772}% subfloat
     773\qquad
     774\subfloat[\Gls{bulk-acq} Monitor] {
     775\label{fig:BulkMonitor}
     776{\resizebox{0.45\textwidth}{!}{\input{ext_monitor}}}
     777}% subfloat
     778\caption{External Scheduling Monitor}
    770779\end{figure}
    771780
    772 There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
    773 
    774 The alternative is to alter the implementation like this:
    775 \begin{center}
    776 {\resizebox{0.4\textwidth}{!}{\input{ext_monitor}}}
    777 \end{center}
     781There are other alternatives to these pictures, but in the case of the left picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
     782
     783The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}.
    778784Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate. Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the \code{waitfor} statement to check if a routine is already queued.
    779785
     
    793799\end{figure}
    794800
    795 Note that in the second picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.
     801Note that in the right picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.
    796802
    797803At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here, however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be  hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problem than writing locks that are as flexible as external scheduling in \CFA.
     
    838844\end{cfacode}
    839845
    840 Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is Undefined Behaviour.
    841 
    842 An important behaviour to note is when a set of monitors only match partially :
     846Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is undefined behaviour.
     847
     848An important behaviour to note is when a set of monitors only match partially:
    843849
    844850\begin{cfacode}
     
    862868}
    863869\end{cfacode}
    864 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake-up the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
     870While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wakeup the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
    865871
    866872% ======================================================================
     
    870876% ======================================================================
    871877
    872 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading but overloading is possible.
     878Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading, but overloading is possible.
    873879\begin{figure}
    874880\begin{cfacode}[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}]
     
    899905        waitfor(f4, a1);     //Incorrect : f4 ambiguous
    900906
    901         waitfor(f2, a1, b2); //Undefined Behaviour : b2 not mutex
     907        waitfor(f2, a1, b2); //Undefined behaviour : b2 not mutex
    902908}
    903909\end{cfacode}
    904910\end{figure}
    905911
    906 Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
     912Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2} demonstrates several complex masks and some incorrect ones.
    907913
    908914\begin{figure}
     
    972978% ======================================================================
    973979% ======================================================================
    974 An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
     980An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
    975981\begin{figure}
    976982\begin{cfacode}[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}]
  • doc/proposals/concurrency/text/frontpgs.tex

    r326cd2b r9cdfb4d0  
    3939        \vspace*{2.0cm}
    4040
    41         Waterloo, Ontario, Canada, 2017 \\
     41        Waterloo, Ontario, Canada, 2018 \\
    4242
    4343        \vspace*{1.0cm}
    4444
    45         \copyright\ Thierry Delisle 2017 \\
     45        \copyright\ Thierry Delisle 2018 \\
    4646        \end{center}
    4747\end{titlepage}
     
    154154% \newpage
    155155
     156% L I S T   O F   T A B L E S
     157% -----------------------------
     158\addcontentsline{toc}{chapter}{List of Acronyms}
     159\printglossary[type=\acronymtype,title={List of Acronyms}]
     160\cleardoublepage
     161\phantomsection         % allows hyperref to link to the correct page
     162
    156163% Change page numbering back to Arabic numerals
    157164\pagenumbering{arabic}
  • doc/proposals/concurrency/text/future.tex

    r326cd2b r9cdfb4d0  
    1111
    1212\subsection{Performance} \label{futur:perf}
    13 This thesis presents a first implementation of the \CFA runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.
     13This thesis presents a first implementation of the \CFA concurrency runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.
    1414
    1515\subsection{Flexible Scheduling} \label{futur:sched}
    1616An important part of concurrency is scheduling. Different scheduling algorithms can affect performance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted to the requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbitrary scheduling algorithms. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
    1717
    18 \subsection{Non-Blocking IO} \label{futur:nbio}
    19 While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, Non-Blocking IO is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
     18\subsection{Non-Blocking I/O} \label{futur:nbio}
     19While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, non-blocking I/O is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
    2020
    2121\subsection{Other Concurrency Tools} \label{futur:tools}
  • doc/proposals/concurrency/text/internals.tex

    r326cd2b r9cdfb4d0  
    33There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This approach avoids the chicken and egg problem~\cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal means that memory management is a constant concern in the design of the system.
    44
    5 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while mute routines and blocking calls allow for an unbound amount, within the stack size.
     5The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while \code{mutex} routines and blocking calls allow for an unbound amount, within the stack size.
    66
    77Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed.
     
    1313% ======================================================================
    1414
    15 The first step towards the monitor implementation is simple mute routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is Undefined Behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
     15The first step towards the monitor implementation is simple \code{mutex} routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
    1616\begin{figure}
    1717\begin{multicols}{2}
     
    3939\end{figure}
    4040
    41 \subsection{ Details: Interaction with polymorphism}
     41\subsection{Details: Interaction with polymorphism}
    4242Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues.
    4343
    44 First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options : \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:
     44First of all, interaction between \code{otype} polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options: \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:
    4545\begin{table}[H]
    4646\begin{center}
     
    109109\end{cfacode}
    110110
    111 Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e.: the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine multiple times, but there is only one entry point for many call sites.
     111Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e., the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine is called multiple times but there is only one entry point for many call sites.
    112112
    113113% ======================================================================
     
    127127\end{figure}
    128128
     129\subsection{Processors}
     130Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically \texttt{pthread}s in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
     131
     132\subsection{Stack Management}
     133One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all \texttt{pthread}s created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e., the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
     134
    129135\subsection{Context Switching}
    130 As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA but the changes required to add it are strictly additive.
    131 
    132 \subsection{Processors}
    133 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
    134 
    135 \subsection{Stack Management}
    136 One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all pthreads created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create there own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e. the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
     136As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA, but the changes required to add it are strictly additive.
    137137
    138138\subsection{Preemption} \label{preemption}
    139139Finally, an important aspect for any complete threading system is preemption. As mentioned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desirable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs a long loop can starve other threads, while in a preemptive system, starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload. However any preemptive system can become a cooperative system by making the time slices extremely large. Therefore, \CFA uses a preemptive threading system.
    140140
    141 Preemption in \CFA is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e. :
     141Preemption in \CFA\footnote{Note that the implementation of preemption is strongly tied with the underlying threading system. For this reason, only the Linux implementation is cover, \CFA does not run on Windows at the time of writting} is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e.:
    142142\begin{quote}
    143143A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
    144144SIGNAL(7) - Linux Programmer's Manual
    145145\end{quote}
    146 For the sake of simplicity and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one. Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.
    147 
    148 Involuntary context-switching is done by sending signal {\tt SIGUSER1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal-handler frame is eventually unwound when the thread is scheduled again. As a result, a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
     146For the sake of simplicity, and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one.
     147
     148Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread. Hence, involuntary context-switching is done by sending signal {\tt SIGUSR1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal handler frame is eventually unwound when the thread is scheduled again. As a result, a signal handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
    149149
    150150\subsection{Scheduler}
     
    156156% ======================================================================
    157157% ======================================================================
    158 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:monitor} for convenience) :
     158The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience):
    159159
    160160\begin{figure}[H]
     
    177177\end{figure}
    178178
    179 This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split the thread into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
     179This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
    180180
    181181\begin{figure}[b]
     
    210210\end{figure}
    211211
    212 Some important things to notice about the exit routine. The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.
     212The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.
    213213
    214214\begin{figure}[H]
     
    220220\end{figure}
    221221
    222 Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive stacks for linking onto monitor. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.
     222Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.
    223223
    224224% ======================================================================
     
    227227% ======================================================================
    228228% ======================================================================
    229 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (e.g., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
    230 
    231 This algorithm choice has two consequences :
     229Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (i.e., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
     230
     231This algorithm choice has two consequences:
    232232\begin{itemize}
    233233        \item The queue of the monitor with the lowest address is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same lowest address monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
    234234        \item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor is the monitor which has the lowest address, every monitor needs to have the correct queues even though it is possible that some queues go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
    235235\end{itemize}
    236 Therefore, the following modifications need to be made to support external scheduling :
     236Therefore, the following modifications need to be made to support external scheduling:
    237237\begin{itemize}
    238         \item The threads waiting on the entry-queue need to keep track of which routine it is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack so the thread only needs to keep a pointer to that information.
     238        \item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information.
    239239        \item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that if a thread has acquired two monitors but executes a \code{waitfor} with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless. This becomes relevant when \code{when} clauses affect the number of monitors passed to a \code{waitfor} statement.
    240         \item The entry/exit routine need to be updated as shown in listing \ref{lst:entry3}.
     240        \item The entry/exit routines need to be updated as shown in listing \ref{lst:entry3}.
    241241\end{itemize}
    242242
    243243\subsection{External Scheduling - Destructors}
    244 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The waitfor semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
     244Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The \code{waitfor} semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
    245245
    246246\begin{figure}
  • doc/proposals/concurrency/text/intro.tex

    r326cd2b r9cdfb4d0  
    22\chapter{Introduction}
    33% ======================================================================
    4 
    5 This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also comes with an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
     4This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, called \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also provides an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
    65
    76There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
     7
     8In the context of this thesis, a \textbf{thread} is a fundamental unit of execution that runs a sequence of code, generally on a program stack. Having multiple simultaneous threads gives rise to concurrency and generally requires some kind of locking mechanism to ensure proper execution. Correspondingly, \textbf{concurrency} is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. Accordingly, \textbf{locking} (and by extension locks) are defined as a mechanism that prevents the progress of certain threads in order to avoid problems due to concurrency. Finally, in this thesis \textbf{parallelism} is distinct from concurrency and is defined as running multiple threads simultaneously. More precisely, parallelism implies \emph{actual} simultaneous execution as opposed to concurrency which only requires \emph{apparent} simultaneous execution. As such, parallelism is only observable in the differences in performance or, more generally, differences in timing.
  • doc/proposals/concurrency/text/parallelism.tex

    r326cd2b r9cdfb4d0  
    77% #       #     # #     # #     # ####### ####### ####### ####### ###  #####  #     #
    88\chapter{Parallelism}
    9 Historically, computer performance was about processor speeds and instructions count. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is not longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
     9Historically, computer performance was about processor speeds and instruction counts. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
    1010
    1111\section{Paradigms}
    1212\subsection{User-Level Threads}
    13 A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees but the parallelism toolkit offers very little to reduce complexity in itself.
     13A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees, but the parallelism toolkit offers very little to reduce complexity in itself.
    1414
    1515Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    1616
    1717\subsection{Fibers : User-Level Threads Without Preemption} \label{fibers}
    18 A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
     18A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths, but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
    1919
    2020An example of a language that uses fibers is Go~\cite{Go}
     
    2626
    2727\subsection{Paradigm Performance}
    28 While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees that the \gls{pool}-based system has the best performance thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortized by the actual work done.
     28While the choice between the three paradigms listed above may have significant performance implications, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees equivalent performance across paradigms and that the \gls{pool}-based system has the best efficiency thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large, enough the paradigm choice is largely amortized by the actual work done.
    2929
    3030\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
    31 A \gls{cfacluster} is a group of \gls{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a plugable scheduler that can optimize the workload generated by the \glspl{uthread}.
     31A \gls{cfacluster} is a group of \glspl{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a pluggable scheduler that can optimize the workload generated by the \glspl{uthread}.
    3232
    33 \Glspl{cfacluster} have not been fully implemented in the context of this thesis, currently \CFA only supports one \gls{cfacluster}, the initial one.
     33\Glspl{cfacluster} have not been fully implemented in the context of this thesis. Currently \CFA only supports one \gls{cfacluster}, the initial one.
    3434
    3535\subsection{Future Work: Machine Setup}\label{machine}
    36 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and\/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx} which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
     36While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
    3737
    3838\subsection{Paradigms}\label{cfaparadigms}
  • doc/proposals/concurrency/text/results.tex

    r326cd2b r9cdfb4d0  
    3838
    3939\section{Micro Benchmarks}
    40 All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code :
     40All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code:
    4141\begin{pseudo}
    42 #define BENCH(run, result)
    43         before = gettime();
    44         run;
    45         after  = gettime();
     42#define BENCH(run, result) \
     43        before = gettime(); \
     44        run; \
     45        after  = gettime(); \
    4646        result = (after - before) / N;
    4747\end{pseudo}
     
    4949
    5050\subsection{Context-Switching}
    51 The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. In order to make the comparison fair, coroutines also execute a 2-step context-switch (\gls{uthread} to \gls{kthread} then \gls{kthread} to \gls{uthread}), which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests.
     51The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. Yielding causes the thread to context-switch to the scheduler and back, more precisely: from the \gls{uthread} to the \gls{kthread} then from the \gls{kthread} back to the same \gls{uthread} (or a different one in the general case). In order to make the comparison fair, coroutines also execute a 2-step context-switch by resuming another coroutine which does nothing but suspending in a tight loop, which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests. The difference between coroutines and threads can be attributed to the cost of scheduling.
    5252\begin{figure}
    5353\begin{multicols}{2}
     
    115115
    116116\subsection{Mutual-Exclusion}
    117 The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a pthread mutex lock is also measured. The results can be shown in table \ref{tab:mutex}.
     117The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a \code{pthread_mutex} lock is also measured. The results can be shown in table \ref{tab:mutex}.
    118118
    119119\begin{figure}
     
    199199\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
    200200\hline
     201Pthreads Condition Variable                     & 5902.5        & 6093.29       & 714.78 \\
    201202\uC \code{signal}                                       & 322           & 323   & 3.36   \\
    202203\CFA \code{signal}, 1 \code{monitor}    & 352.5 & 353.11        & 3.66   \\
     
    265266
    266267\subsection{Object Creation}
    267 Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for pthreads and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
    268 
    269 \begin{figure}
    270 \begin{center}
    271 pthread
     268Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for \texttt{pthread}s and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
     269
     270\begin{figure}
     271\begin{center}
     272\texttt{pthread}
    272273\begin{ccode}
    273274int main() {
     
    306307\end{cfacode}
    307308\end{center}
    308 \begin{cfacode}[caption={Benchmark code for pthreads and \CFA to measure object creation},label={lst:creation}]
     309\begin{cfacode}[caption={Benchmark code for \texttt{pthread}s and \CFA to measure object creation},label={lst:creation}]
    309310\end{cfacode}
    310311\end{figure}
  • doc/proposals/concurrency/text/together.tex

    r326cd2b r9cdfb4d0  
    77
    88\section{Threads As Monitors}
    9 As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
     9As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine:
    1010\begin{figure}[H]
    1111\begin{cfacode}[caption={Toy simulator using \code{thread}s and \code{monitor}s.},label={lst:engine-v1}]
     
    3838\end{cfacode}
    3939\end{figure}
    40 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner :
     40One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner:
    4141\begin{figure}[H]
    4242\begin{cfacode}[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}]
  • doc/proposals/concurrency/thesis.tex

    r326cd2b r9cdfb4d0  
    2323\usepackage{calc}
    2424\usepackage{xspace}
     25\usepackage[labelformat=simple]{subfig}
     26\renewcommand{\thesubfigure}{(\alph{subfigure})}
    2527\usepackage{graphicx}
    2628\usepackage{tabularx}
     
    3436\usepackage[usenames]{color}
    3537\usepackage[pagewise]{lineno}
     38\renewcommand{\linenumberfont}{\scriptsize\sffamily}
    3639\usepackage{fancyhdr}
    3740\usepackage{float}
    38 \renewcommand{\linenumberfont}{\scriptsize\sffamily}
    3941\usepackage{siunitx}
    4042\sisetup{ binary-units=true }
    4143\input{style}                                                   % bespoke macros used in the document
     44\usepackage{url}
    4245\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
    4346\usepackage{breakurl}
     47\urlstyle{rm}
    4448
    4549\usepackage{tikz}
    4650\def\checkmark{\tikz\fill[scale=0.4](0,.35) -- (.25,0) -- (1,.7) -- (.25,.15) -- cycle;}
    47 
    48 \renewcommand{\UrlFont}{\small\sf}
    4951
    5052\setlength{\topmargin}{-0.45in}                         % move running title into header
     
    123125\input{future}
    124126
    125 \clearpage
    126 \printglossary[type=\acronymtype]
    127 \printglossary
    128127
    129128\clearpage
     129
     130% B I B L I O G R A P H Y
     131% -----------------------------
     132\addcontentsline{toc}{chapter}{Bibliography}
    130133\bibliographystyle{plain}
    131 \bibliography{cw92,distSharedMem,lfp92,mlw92,parallel,parallelIO,partheory,pl,pldi92,ps,realtime,techreportsPAB,visual,local}
     134\bibliography{pl,local}
     135\cleardoublepage
     136\phantomsection         % allows hyperref to link to the correct page
     137
     138% G L O S S A R Y
     139% -----------------------------
     140\addcontentsline{toc}{chapter}{Glossary}
     141\printglossary
     142\cleardoublepage
     143\phantomsection         % allows hyperref to link to the correct page
    132144
    133145
  • doc/proposals/concurrency/version

    r326cd2b r9cdfb4d0  
    1 0.11.299
     10.11.403
  • src/CodeGen/FixMain.cc

    r326cd2b r9cdfb4d0  
    2323
    2424#include "Common/SemanticError.h"  // for SemanticError
     25#include "CodeGen/GenType.h"       // for GenType
    2526#include "SynTree/Declaration.h"   // for FunctionDecl, operator<<
    2627#include "SynTree/Type.h"          // for FunctionType
     
    2930        bool FixMain::replace_main = false;
    3031        std::unique_ptr<FunctionDecl> FixMain::main_signature = nullptr;
     32
     33        template<typename container>
     34        std::string genTypeAt(const container& p, size_t idx) {
     35                return genType((*std::next(p.begin(), idx))->get_type(), "");
     36        }
    3137
    3238        void FixMain::registerMain(FunctionDecl* functionDecl)
     
    4349
    4450                        os << main_signature->get_scopedMangleName() << "(";
    45                         switch(main_signature->get_functionType()->get_parameters().size()) {
    46                                 case 3: os << "argc, argv, envp"; break;
    47                                 case 2: os << "argc, argv"; break;
     51                        const auto& params = main_signature->get_functionType()->get_parameters();
     52                        switch(params.size()) {
     53                                case 3: os << "(" << genTypeAt(params, 0) << ")argc, (" << genTypeAt(params, 1) << ")argv, (" << genTypeAt(params, 2) << ")envp"; break;
     54                                case 2: os << "(" << genTypeAt(params, 0) << ")argc, (" << genTypeAt(params, 1) << ")argv"; break;
    4855                                case 0: break;
    4956                                default : assert(false);
  • src/CodeTools/TrackLoc.cc

    r326cd2b r9cdfb4d0  
    6464                                }
    6565                                else {
    66                                         assertf( false, "Top level node has no CodeLocation %s", name.c_str() );
     66                                        assertf( false, "Top level node has no CodeLocation %s", toString( node ).c_str() );
    6767                                }
    6868                        }
  • src/Common/PassVisitor.h

    r326cd2b r9cdfb4d0  
    1919#include "SynTree/Expression.h"
    2020#include "SynTree/Constant.h"
    21 #include "SynTree/TypeSubstitution.h"
     21
     22class TypeSubstitution;
    2223
    2324#include "PassVisitor.proto.h"
     
    403404};
    404405
     406#include "SynTree/TypeSubstitution.h"
    405407#include "PassVisitor.impl.h"
  • src/Common/PassVisitor.impl.h

    r326cd2b r9cdfb4d0  
    6262
    6363template< typename pass_type >
    64 static inline void acceptAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& visitor ) {
     64inline void acceptAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& visitor ) {
    6565        DeclList_t* beforeDecls = visitor.get_beforeDecls();
    6666        DeclList_t* afterDecls  = visitor.get_afterDecls();
     
    9090
    9191template< typename pass_type >
    92 static inline void mutateAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& mutator ) {
     92inline void mutateAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& mutator ) {
    9393        DeclList_t* beforeDecls = mutator.get_beforeDecls();
    9494        DeclList_t* afterDecls  = mutator.get_afterDecls();
  • src/Concurrency/Keywords.cc

    r326cd2b r9cdfb4d0  
    257257        // Generic keyword implementation
    258258        //=============================================================================================
     259        void fixupGenerics(FunctionType * func, StructDecl * decl) {
     260                cloneAll(decl->parameters, func->forall);
     261                for ( TypeDecl * td : func->forall ) {
     262                        strict_dynamic_cast<StructInstType*>(
     263                                func->parameters.front()->get_type()->stripReferences()
     264                        )->parameters.push_back(
     265                                new TypeExpr( new TypeInstType( noQualifiers, td->name, td ) )
     266                        );
     267                }
     268        }
     269
    259270        void ConcurrentSueKeyword::postvisit(StructDecl * decl) {
    260271                if( decl->name == type_name && decl->body ) {
     
    301312                );
    302313
    303                 get_type->get_parameters().push_back( this_decl );
     314                get_type->get_parameters().push_back( this_decl->clone() );
    304315                get_type->get_returnVals().push_back(
    305316                        new ObjectDecl(
     
    318329                        )
    319330                );
     331                fixupGenerics(get_type, decl);
    320332
    321333                FunctionDecl * get_decl = new FunctionDecl(
     
    343355                                nullptr
    344356                        );
    345                 }
     357                        fixupGenerics(main_type, decl);
     358                }
     359
     360                delete this_decl;
    346361
    347362                declsToAddBefore.push_back( forward );
     
    377392                                        new MemberExpr(
    378393                                                field,
    379                                                 UntypedExpr::createDeref( new VariableExpr( func->get_functionType()->get_parameters().front() ) )
     394                                                new CastExpr(
     395                                                        new VariableExpr( func->get_functionType()->get_parameters().front() ),
     396                                                        func->get_functionType()->get_parameters().front()->get_type()->stripReferences()->clone()
     397                                                )
    380398                                        )
    381399                                )
  • src/ControlStruct/LabelFixer.cc

    r326cd2b r9cdfb4d0  
    4444
    4545        void LabelFixer::postvisit( FunctionDecl * functionDecl ) {
    46                 MLEMutator mlemut( resolveJumps(), generator );
     46                PassVisitor<MLEMutator> mlemut( resolveJumps(), generator );
    4747                functionDecl->acceptMutator( mlemut );
    4848        }
  • src/ControlStruct/MLEMutator.cc

    r326cd2b r9cdfb4d0  
    4646        void MLEMutator::fixBlock( std::list< Statement * > &kids ) {
    4747                for ( std::list< Statement * >::iterator k = kids.begin(); k != kids.end(); k++ ) {
    48                         *k = (*k)->acceptMutator(*this);
     48                        *k = (*k)->acceptMutator(*visitor);
    4949
    5050                        if ( ! get_breakLabel().empty() ) {
     
    5757        }
    5858
    59         CompoundStmt* MLEMutator::mutate( CompoundStmt *cmpndStmt ) {
    60                 bool labeledBlock = !(cmpndStmt->get_labels().empty());
     59        void MLEMutator::premutate( CompoundStmt *cmpndStmt ) {
     60                visit_children = false;
     61                bool labeledBlock = !(cmpndStmt->labels.empty());
    6162                if ( labeledBlock ) {
    6263                        Label brkLabel = generator->newLabel("blockBreak", cmpndStmt);
     
    6566
    6667                // a child statement may set the break label - if they do, attach it to the next statement
    67                 std::list< Statement * > &kids = cmpndStmt->get_kids();
     68                std::list< Statement * > &kids = cmpndStmt->kids;
    6869                fixBlock( kids );
    6970
     
    7576                        enclosingControlStructures.pop_back();
    7677                } // if
    77 
    78                 return cmpndStmt;
    79         }
    80 
    81         template< typename LoopClass >
    82         Statement *MLEMutator::handleLoopStmt( LoopClass *loopStmt ) {
    83                 // remember this as the most recent enclosing loop, then mutate the body of the loop -- this will determine
    84                 // whether brkLabel and contLabel are used with branch statements and will recursively do the same to nested
    85                 // loops
    86                 Label brkLabel = generator->newLabel("loopBreak", loopStmt);
    87                 Label contLabel = generator->newLabel("loopContinue", loopStmt);
    88                 enclosingControlStructures.push_back( Entry( loopStmt, brkLabel, contLabel ) );
    89                 loopStmt->set_body ( loopStmt->get_body()->acceptMutator( *this ) );
    90 
    91                 assert( ! enclosingControlStructures.empty() );
    92                 Entry &e = enclosingControlStructures.back();
    93                 // sanity check that the enclosing loops have been popped correctly
    94                 assert ( e == loopStmt );
    95 
    96                 // this will take the necessary steps to add definitions of the previous two labels, if they are used.
    97                 loopStmt->set_body( mutateLoop( loopStmt->get_body(), e ) );
    98                 enclosingControlStructures.pop_back();
    99 
    100                 return loopStmt;
    101         }
    102 
    103         Statement *MLEMutator::mutate( CaseStmt *caseStmt ) {
    104                 caseStmt->set_condition( maybeMutate( caseStmt->get_condition(), *this ) );
    105                 fixBlock( caseStmt->get_statements() );
    106 
    107                 return caseStmt;
    108         }
    109 
    110         template< typename IfClass >
    111         Statement *MLEMutator::handleIfStmt( IfClass *ifStmt ) {
    112                 // generate a label for breaking out of a labeled if
    113                 bool labeledBlock = !(ifStmt->get_labels().empty());
    114                 if ( labeledBlock ) {
    115                         Label brkLabel = generator->newLabel("blockBreak", ifStmt);
    116                         enclosingControlStructures.push_back( Entry( ifStmt, brkLabel ) );
    117                 } // if
    118 
    119                 Parent::mutate( ifStmt );
    120 
    121                 if ( labeledBlock ) {
    122                         if ( ! enclosingControlStructures.back().useBreakExit().empty() ) {
    123                                 set_breakLabel( enclosingControlStructures.back().useBreakExit() );
    124                         } // if
    125                         enclosingControlStructures.pop_back();
    126                 } // if
    127                 return ifStmt;
    128         }
    129 
    130         template< typename SwitchClass >
    131         Statement *MLEMutator::handleSwitchStmt( SwitchClass *switchStmt ) {
    132                 // generate a label for breaking out of a labeled switch
    133                 Label brkLabel = generator->newLabel("switchBreak", switchStmt);
    134                 enclosingControlStructures.push_back( Entry(switchStmt, brkLabel) );
    135                 mutateAll( switchStmt->get_statements(), *this );
    136 
    137                 Entry &e = enclosingControlStructures.back();
    138                 assert ( e == switchStmt );
    139 
    140                 // only generate break label if labeled break is used
    141                 if ( e.isBreakUsed() ) {
    142                         // for the purposes of keeping switch statements uniform (i.e. all statements that are direct children of a
    143                         // switch should be CastStmts), append the exit label + break to the last case statement; create a default
    144                         // case if there are no cases
    145                         std::list< Statement * > &statements = switchStmt->get_statements();
    146                         if ( statements.empty() ) {
    147                                 statements.push_back( CaseStmt::makeDefault() );
    148                         } // if
    149 
    150                         if ( CaseStmt * c = dynamic_cast< CaseStmt * >( statements.back() ) ) {
    151                                 Statement * stmt = new BranchStmt( Label("brkLabel"), BranchStmt::Break );
    152                                 stmt->labels.push_back( brkLabel );
    153                                 c->get_statements().push_back( stmt );
    154                         } else assert(0); // as of this point, all statements of a switch are still CaseStmts
    155                 } // if
    156 
    157                 assert ( enclosingControlStructures.back() == switchStmt );
    158                 enclosingControlStructures.pop_back();
    159                 return switchStmt;
    160         }
     78        }
     79
    16180
    16281        void addUnused( Statement * stmt, const Label & originalTarget ) {
     
    17998
    18099
    181         Statement *MLEMutator::mutate( BranchStmt *branchStmt ) throw ( SemanticError ) {
    182                 std::string originalTarget = branchStmt->get_originalTarget();
     100        Statement *MLEMutator::postmutate( BranchStmt *branchStmt ) throw ( SemanticError ) {
     101                std::string originalTarget = branchStmt->originalTarget;
    183102
    184103                std::list< Entry >::reverse_iterator targetEntry;
     
    215134                // branch error checks, get the appropriate label name and create a goto
    216135                Label exitLabel;
    217                 switch ( branchStmt->get_type() ) {
     136                switch ( branchStmt->type ) {
    218137                  case BranchStmt::Break:
    219138                                assert( targetEntry->useBreakExit() != "");
     
    229148
    230149                // add unused attribute to label to silence warnings
    231                 addUnused( targetEntry->get_controlStructure(), branchStmt->get_originalTarget() );
     150                addUnused( targetEntry->get_controlStructure(), branchStmt->originalTarget );
    232151
    233152                // transform break/continue statements into goto to simplify later handling of branches
     
    260179        }
    261180
    262         Statement *MLEMutator::mutate( WhileStmt *whileStmt ) {
    263                 return handleLoopStmt( whileStmt );
    264         }
    265 
    266         Statement *MLEMutator::mutate( ForStmt *forStmt ) {
    267                 return handleLoopStmt( forStmt );
    268         }
    269 
    270         Statement *MLEMutator::mutate( IfStmt *ifStmt ) {
    271                 return handleIfStmt( ifStmt );
    272         }
    273 
    274         Statement *MLEMutator::mutate( SwitchStmt *switchStmt ) {
    275                 return handleSwitchStmt( switchStmt );
     181        template< typename LoopClass >
     182        void MLEMutator::prehandleLoopStmt( LoopClass * loopStmt ) {
     183                // remember this as the most recent enclosing loop, then mutate the body of the loop -- this will determine
     184                // whether brkLabel and contLabel are used with branch statements and will recursively do the same to nested
     185                // loops
     186                Label brkLabel = generator->newLabel("loopBreak", loopStmt);
     187                Label contLabel = generator->newLabel("loopContinue", loopStmt);
     188                enclosingControlStructures.push_back( Entry( loopStmt, brkLabel, contLabel ) );
     189        }
     190
     191        template< typename LoopClass >
     192        Statement * MLEMutator::posthandleLoopStmt( LoopClass * loopStmt ) {
     193                assert( ! enclosingControlStructures.empty() );
     194                Entry &e = enclosingControlStructures.back();
     195                // sanity check that the enclosing loops have been popped correctly
     196                assert ( e == loopStmt );
     197
     198                // this will take the necessary steps to add definitions of the previous two labels, if they are used.
     199                loopStmt->set_body( mutateLoop( loopStmt->get_body(), e ) );
     200                enclosingControlStructures.pop_back();
     201                return loopStmt;
     202        }
     203
     204        void MLEMutator::premutate( WhileStmt * whileStmt ) {
     205                return prehandleLoopStmt( whileStmt );
     206        }
     207
     208        void MLEMutator::premutate( ForStmt * forStmt ) {
     209                return prehandleLoopStmt( forStmt );
     210        }
     211
     212        Statement * MLEMutator::postmutate( WhileStmt * whileStmt ) {
     213                return posthandleLoopStmt( whileStmt );
     214        }
     215
     216        Statement * MLEMutator::postmutate( ForStmt * forStmt ) {
     217                return posthandleLoopStmt( forStmt );
     218        }
     219
     220        void MLEMutator::premutate( IfStmt * ifStmt ) {
     221                // generate a label for breaking out of a labeled if
     222                bool labeledBlock = !(ifStmt->get_labels().empty());
     223                if ( labeledBlock ) {
     224                        Label brkLabel = generator->newLabel("blockBreak", ifStmt);
     225                        enclosingControlStructures.push_back( Entry( ifStmt, brkLabel ) );
     226                } // if
     227        }
     228
     229        Statement * MLEMutator::postmutate( IfStmt * ifStmt ) {
     230                bool labeledBlock = !(ifStmt->get_labels().empty());
     231                if ( labeledBlock ) {
     232                        if ( ! enclosingControlStructures.back().useBreakExit().empty() ) {
     233                                set_breakLabel( enclosingControlStructures.back().useBreakExit() );
     234                        } // if
     235                        enclosingControlStructures.pop_back();
     236                } // if
     237                return ifStmt;
     238        }
     239
     240        void MLEMutator::premutate( CaseStmt *caseStmt ) {
     241                visit_children = false;
     242                caseStmt->condition = maybeMutate( caseStmt->condition, *visitor );
     243                fixBlock( caseStmt->stmts );
     244        }
     245
     246        void MLEMutator::premutate( SwitchStmt *switchStmt ) {
     247                // generate a label for breaking out of a labeled switch
     248                Label brkLabel = generator->newLabel("switchBreak", switchStmt);
     249                enclosingControlStructures.push_back( Entry(switchStmt, brkLabel) );
     250        }
     251
     252        Statement * MLEMutator::postmutate( SwitchStmt * switchStmt ) {
     253                Entry &e = enclosingControlStructures.back();
     254                assert ( e == switchStmt );
     255
     256                // only generate break label if labeled break is used
     257                if ( e.isBreakUsed() ) {
     258                        // for the purposes of keeping switch statements uniform (i.e. all statements that are direct children of a
     259                        // switch should be CastStmts), append the exit label + break to the last case statement; create a default
     260                        // case if there are no cases
     261                        std::list< Statement * > &statements = switchStmt->statements;
     262                        if ( statements.empty() ) {
     263                                statements.push_back( CaseStmt::makeDefault() );
     264                        } // if
     265
     266                        if ( CaseStmt * c = dynamic_cast< CaseStmt * >( statements.back() ) ) {
     267                                Statement * stmt = new BranchStmt( Label("brkLabel"), BranchStmt::Break );
     268                                stmt->labels.push_back( e.useBreakExit() );
     269                                c->stmts.push_back( stmt );
     270                        } else assert(0); // as of this point, all statements of a switch are still CaseStmts
     271                } // if
     272
     273                assert ( enclosingControlStructures.back() == switchStmt );
     274                enclosingControlStructures.pop_back();
     275                return switchStmt;
    276276        }
    277277} // namespace ControlStruct
  • src/ControlStruct/MLEMutator.h

    r326cd2b r9cdfb4d0  
    2020#include <string>                  // for string
    2121
     22#include "Common/PassVisitor.h"
    2223#include "Common/SemanticError.h"  // for SemanticError
    2324#include "SynTree/Label.h"         // for Label
     
    2627
    2728namespace ControlStruct {
    28 class LabelGenerator;
     29        class LabelGenerator;
    2930
    30         class MLEMutator : public Mutator {
     31        class MLEMutator : public WithVisitorRef<MLEMutator>, public WithShortCircuiting {
    3132                class Entry;
    3233
    33                 typedef Mutator Parent;
    3434          public:
    3535                MLEMutator( std::map<Label, Statement *> *t, LabelGenerator *gen = 0 ) : targetTable( t ), breakLabel(std::string("")), generator( gen ) {}
    3636                ~MLEMutator();
    3737
    38                 virtual CompoundStmt *mutate( CompoundStmt *cmpndStmt ) override;
    39                 virtual Statement *mutate( WhileStmt *whileStmt ) override;
    40                 virtual Statement *mutate( ForStmt *forStmt ) override;
    41                 virtual Statement *mutate( BranchStmt *branchStmt ) throw ( SemanticError ) override;
    42                 virtual Statement *mutate( CaseStmt *caseStmt ) override;
    43                 virtual Statement *mutate( IfStmt *ifStmt ) override;
    44                 virtual Statement *mutate( SwitchStmt *switchStmt ) override;
     38                void premutate( CompoundStmt *cmpndStmt );
     39                Statement * postmutate( BranchStmt *branchStmt ) throw ( SemanticError );
     40                void premutate( WhileStmt *whileStmt );
     41                Statement * postmutate( WhileStmt *whileStmt );
     42                void premutate( ForStmt *forStmt );
     43                Statement * postmutate( ForStmt *forStmt );
     44                void premutate( CaseStmt *caseStmt );
     45                void premutate( IfStmt *ifStmt );
     46                Statement * postmutate( IfStmt *ifStmt );
     47                void premutate( SwitchStmt *switchStmt );
     48                Statement * postmutate( SwitchStmt *switchStmt );
    4549
    4650                Statement *mutateLoop( Statement *bodyLoop, Entry &e );
     
    5458                                loop( _loop ), breakExit( _breakExit ), contExit( _contExit ), breakUsed(false), contUsed(false) {}
    5559
    56                         bool operator==( const Statement *stmt ) { return ( loop == stmt ); }
    57                         bool operator!=( const Statement *stmt ) { return ( loop != stmt ); }
     60                        bool operator==( const Statement *stmt ) { return loop == stmt; }
     61                        bool operator!=( const Statement *stmt ) { return loop != stmt; }
    5862
    59                         bool operator==( const Entry &other ) { return ( loop == other.get_controlStructure() ); }
     63                        bool operator==( const Entry &other ) { return loop == other.get_controlStructure(); }
    6064
    6165                        Statement *get_controlStructure() const { return loop; }
     
    7882
    7983                template< typename LoopClass >
    80                 Statement *handleLoopStmt( LoopClass *loopStmt );
     84                void prehandleLoopStmt( LoopClass * loopStmt );
    8185
    82                 template< typename IfClass >
    83                 Statement *handleIfStmt( IfClass *switchStmt );
    84 
    85                 template< typename SwitchClass >
    86                 Statement *handleSwitchStmt( SwitchClass *switchStmt );
     86                template< typename LoopClass >
     87                Statement * posthandleLoopStmt( LoopClass * loopStmt );
    8788
    8889                void fixBlock( std::list< Statement * > &kids );
  • src/GenPoly/Box.cc

    r326cd2b r9cdfb4d0  
    302302        Expression *makeOp( const std::string &name, Expression *arg ) {
    303303                UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) );
    304                 expr->get_args().push_back( arg );
     304                expr->args.push_back( arg );
    305305                return expr;
    306306        }
     
    309309        Expression *makeOp( const std::string &name, Expression *lhs, Expression *rhs ) {
    310310                UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) );
    311                 expr->get_args().push_back( lhs );
    312                 expr->get_args().push_back( rhs );
     311                expr->args.push_back( lhs );
     312                expr->args.push_back( rhs );
    313313                return expr;
    314314        }
     
    316316        /// Returns the dereference of a local pointer variable
    317317        Expression *derefVar( ObjectDecl *var ) {
    318                 return makeOp( "*?", new VariableExpr( var ) );
     318                return UntypedExpr::createDeref( new VariableExpr( var ) );
    319319        }
    320320
     
    831831                                if ( ! isPolyType( arg->get_type() ) ) {
    832832                                        UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) );
    833                                         deref->get_args().push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
    834                                         deref->set_result( arg->get_type()->clone() );
     833                                        deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
     834                                        deref->result = arg->get_type()->clone();
     835                                        deref->result->set_lvalue( true );
    835836                                        return deref;
    836837                                } // if
  • src/GenPoly/Lvalue.cc

    r326cd2b r9cdfb4d0  
    4747                        if ( SymTab::dereferenceOperator ) {
    4848                                VariableExpr * deref = new VariableExpr( SymTab::dereferenceOperator );
    49                                 deref->set_result( new PointerType( Type::Qualifiers(), deref->get_result() ) );
    50                                 Type * base = InitTweak::getPointerBase( arg->get_result() );
    51                                 assertf( base, "expected pointer type in dereference (type was %s)", toString( arg->get_result() ).c_str() );
     49                                deref->result = new PointerType( Type::Qualifiers(), deref->result );
     50                                Type * base = InitTweak::getPointerBase( arg->result );
     51                                assertf( base, "expected pointer type in dereference (type was %s)", toString( arg->result ).c_str() );
    5252                                ApplicationExpr * ret = new ApplicationExpr( deref, { arg } );
    53                                 delete ret->get_result();
    54                                 ret->set_result( base->clone() );
    55                                 ret->get_result()->set_lvalue( true );
     53                                delete ret->result;
     54                                ret->result = base->clone();
     55                                ret->result->set_lvalue( true );
    5656                                return ret;
    5757                        } else {
     
    308308                                        int diff = depth1-depth2;
    309309                                        if ( diff == 0 ) {
     310                                                // conversion between references of the same depth
    310311                                                assertf( depth1 == depth2, "non-intrinsic reference with cast of reference to reference not yet supported: %d %d %s", depth1, depth2, toString( castExpr ).c_str() );
    311312                                                PRINT( std::cerr << castExpr << std::endl; )
    312313                                                return castExpr;
    313314                                        } else if ( diff < 0 ) {
    314                                                 Expression * ret = castExpr->get_arg();
     315                                                // conversion from reference to reference with less depth (e.g. int && -> int &): add dereferences
     316                                                Expression * ret = castExpr->arg;
    315317                                                for ( int i = 0; i < diff; ++i ) {
    316318                                                        ret = mkDeref( ret );
    317319                                                }
    318                                                 ret->set_env( castExpr->get_env() );
    319                                                 delete ret->get_result();
    320                                                 ret->set_result( castExpr->get_result() );
    321                                                 castExpr->set_env( nullptr );
    322                                                 castExpr->set_arg( nullptr );
    323                                                 castExpr->set_result( nullptr );
     320                                                ret->env = castExpr->env;
     321                                                delete ret->result;
     322                                                ret->result = castExpr->result;
     323                                                ret->result->set_lvalue( true ); // ensure result is lvalue
     324                                                castExpr->env = nullptr;
     325                                                castExpr->arg = nullptr;
     326                                                castExpr->result = nullptr;
    324327                                                delete castExpr;
    325328                                                return ret;
    326329                                        } else if ( diff > 0 ) {
    327                                                 Expression * ret = castExpr->get_arg();
     330                                                // conversion from reference to reference with more depth (e.g. int & -> int &&): add address-of
     331                                                Expression * ret = castExpr->arg;
    328332                                                for ( int i = 0; i < diff; ++i ) {
    329333                                                        ret = new AddressExpr( ret );
    330334                                                }
    331                                                 ret->set_env( castExpr->get_env() );
    332                                                 delete ret->get_result();
    333                                                 ret->set_result( castExpr->get_result() );
    334                                                 castExpr->set_env( nullptr );
    335                                                 castExpr->set_arg( nullptr );
    336                                                 castExpr->set_result( nullptr );
     335                                                ret->env = castExpr->env;
     336                                                delete ret->result;
     337                                                ret->result = castExpr->result;
     338                                                castExpr->env = nullptr;
     339                                                castExpr->arg = nullptr;
     340                                                castExpr->result = nullptr;
    337341                                                delete castExpr;
    338342                                                return ret;
     
    342346                                        PRINT( std::cerr << castExpr << std::endl; )
    343347                                        return castExpr;
    344                                 } else if ( castExpr->get_arg()->get_result()->get_lvalue() ) {
     348                                } else if ( castExpr->arg->result->get_lvalue() ) {
    345349                                        // conversion from lvalue to reference
    346350                                        // xxx - keep cast, but turn into pointer cast??
     
    348352                                        PRINT(
    349353                                                std::cerr << "convert lvalue to reference -- &" << std::endl;
    350                                                 std::cerr << castExpr->get_arg() << std::endl;
     354                                                std::cerr << castExpr->arg << std::endl;
    351355                                        )
    352                                         AddressExpr * ret = new AddressExpr( castExpr->get_arg() );
    353                                         if ( refType->get_base()->get_qualifiers() != castExpr->get_arg()->get_result()->get_qualifiers() ) {
     356                                        AddressExpr * ret = new AddressExpr( castExpr->arg );
     357                                        if ( refType->base->get_qualifiers() != castExpr->arg->result->get_qualifiers() ) {
    354358                                                // must keep cast if cast-to type is different from the actual type
    355                                                 castExpr->set_arg( ret );
     359                                                castExpr->arg = ret;
    356360                                                return castExpr;
    357361                                        }
    358                                         ret->set_env( castExpr->get_env() );
    359                                         delete ret->get_result();
    360                                         ret->set_result( castExpr->get_result() );
    361                                         castExpr->set_env( nullptr );
    362                                         castExpr->set_arg( nullptr );
    363                                         castExpr->set_result( nullptr );
     362                                        ret->env = castExpr->env;
     363                                        delete ret->result;
     364                                        ret->result = castExpr->result;
     365                                        castExpr->env = nullptr;
     366                                        castExpr->arg = nullptr;
     367                                        castExpr->result = nullptr;
    364368                                        delete castExpr;
    365369                                        return ret;
     
    368372                                }
    369373                                assertf( false, "Only conversions to reference from lvalue are currently supported: %s", toString( castExpr ).c_str() );
    370                         } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * >( castExpr->get_arg()->get_result() ) ) {
     374                        } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * >( castExpr->arg->result ) ) {
    371375                                (void)refType;
    372376                                // conversion from reference to rvalue
     
    375379                                        std::cerr << "was = " << castExpr << std::endl;
    376380                                )
    377                                 Expression * ret = castExpr->get_arg();
    378                                 TypeSubstitution * env = castExpr->get_env();
     381                                Expression * ret = castExpr->arg;
     382                                TypeSubstitution * env = castExpr->env;
    379383                                castExpr->set_env( nullptr );
    380384                                if ( ! isIntrinsicReference( ret ) ) {
     
    382386                                        ret = mkDeref( ret );
    383387                                }
    384                                 if ( ResolvExpr::typesCompatibleIgnoreQualifiers( castExpr->get_result(), castExpr->get_arg()->get_result()->stripReferences(), SymTab::Indexer() ) ) {
     388                                if ( ResolvExpr::typesCompatibleIgnoreQualifiers( castExpr->result, castExpr->arg->result->stripReferences(), SymTab::Indexer() ) ) {
    385389                                        // can remove cast if types are compatible, changing expression type to value type
    386                                         ret->set_result( castExpr->get_result()->clone() );
    387                                         castExpr->set_arg( nullptr );
     390                                        ret->result = castExpr->result->clone();
     391                                        ret->result->set_lvalue( true );  // ensure result is lvalue
     392                                        castExpr->arg = nullptr;
    388393                                        delete castExpr;
    389394                                } else {
    390395                                        // must keep cast if types are different
    391                                         castExpr->set_arg( ret );
     396                                        castExpr->arg = ret;
    392397                                        ret = castExpr;
    393398                                }
  • src/GenPoly/ScrubTyVars.cc

    r326cd2b r9cdfb4d0  
    2525
    2626namespace GenPoly {
    27         Type * ScrubTyVars::mutate( TypeInstType *typeInst ) {
     27        Type * ScrubTyVars::postmutate( TypeInstType * typeInst ) {
    2828                if ( ! tyVars ) {
    2929                        if ( typeInst->get_isFtype() ) {
     
    3131                                return new PointerType( Type::Qualifiers(), new FunctionType( Type::Qualifiers(), true ) );
    3232                        } else {
    33                                 PointerType *ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
     33                                PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
    3434                                delete typeInst;
    3535                                return ret;
     
    3737                }
    3838
    39                 TyVarMap::const_iterator tyVar = tyVars->find( typeInst->get_name() );
     39                TyVarMap::const_iterator tyVar = tyVars->find( typeInst->name );
    4040                if ( tyVar != tyVars->end() ) {
    4141                        switch ( tyVar->second.kind ) {
     
    4343                          case TypeDecl::Ttype:
    4444                                {
    45                                         PointerType *ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
     45                                        PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
    4646                                        delete typeInst;
    4747                                        return ret;
     
    5555        }
    5656
    57         Type * ScrubTyVars::mutateAggregateType( Type *ty ) {
     57        Type * ScrubTyVars::mutateAggregateType( Type * ty ) {
    5858                if ( shouldScrub( ty ) ) {
    59                         PointerType *ret = new PointerType( Type::Qualifiers(), new VoidType( ty->get_qualifiers() ) );
     59                        PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( ty->get_qualifiers() ) );
    6060                        delete ty;
    6161                        return ret;
     
    6464        }
    6565
    66         Type * ScrubTyVars::mutate( StructInstType *structInst ) {
     66        Type * ScrubTyVars::postmutate( StructInstType * structInst ) {
    6767                return mutateAggregateType( structInst );
    6868        }
    6969
    70         Type * ScrubTyVars::mutate( UnionInstType *unionInst ) {
     70        Type * ScrubTyVars::postmutate( UnionInstType * unionInst ) {
    7171                return mutateAggregateType( unionInst );
    7272        }
    7373
    74         Expression * ScrubTyVars::mutate( SizeofExpr *szeof ) {
     74        void ScrubTyVars::primeBaseScrub( Type * type ) {
     75                // need to determine whether type needs to be scrubbed to determine whether
     76                // automatic recursion is necessary
     77                if ( Type * t = shouldScrub( type ) ) {
     78                        visit_children = false;
     79                        GuardValue( dynType );
     80                        dynType = t;
     81                }
     82        }
     83
     84        Expression * ScrubTyVars::postmutate( SizeofExpr * szeof ) {
    7585                // sizeof( T ) => _sizeof_T parameter, which is the size of T
    76                 if ( Type *dynType = shouldScrub( szeof->get_type() ) ) {
     86                if ( dynType ) {
    7787                        Expression *expr = new NameExpr( sizeofName( mangleType( dynType ) ) );
    7888                        return expr;
    79                 } else {
    80                         return Mutator::mutate( szeof );
    8189                } // if
     90                return szeof;
    8291        }
    8392
    84         Expression * ScrubTyVars::mutate( AlignofExpr *algnof ) {
     93        Expression * ScrubTyVars::postmutate( AlignofExpr * algnof ) {
    8594                // alignof( T ) => _alignof_T parameter, which is the alignment of T
    86                 if ( Type *dynType = shouldScrub( algnof->get_type() ) ) {
     95                if ( dynType ) {
    8796                        Expression *expr = new NameExpr( alignofName( mangleType( dynType ) ) );
    8897                        return expr;
    89                 } else {
    90                         return Mutator::mutate( algnof );
    9198                } // if
     99                return algnof;
    92100        }
    93101
    94         Type * ScrubTyVars::mutate( PointerType *pointer ) {
    95 //              // special case of shouldScrub that takes all TypeInstType pointer bases, even if they're not dynamic
    96 //              Type *base = pointer->get_base();
    97 //              Type *dynType = 0;
    98 //              if ( dynamicOnly ) {
    99 //                      if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( base ) ) {
    100 //                              if ( tyVars.find( typeInst->get_name() ) != tyVars.end() ) { dynType = typeInst; }
    101 //                      } else {
    102 //                              dynType = isDynType( base, tyVars );
    103 //                      }
    104 //              } else {
    105 //                      dynType = isPolyType( base, tyVars );
    106 //              }
    107 //              if ( dynType ) {
    108                 if ( Type *dynType = shouldScrub( pointer->get_base() ) ) {
    109                         Type *ret = dynType->acceptMutator( *this );
     102        Type * ScrubTyVars::postmutate( PointerType * pointer ) {
     103                if ( dynType ) {
     104                        Type * ret = dynType->acceptMutator( *visitor );
    110105                        ret->get_qualifiers() |= pointer->get_qualifiers();
    111                         pointer->set_base( 0 );
     106                        pointer->base = nullptr;
    112107                        delete pointer;
    113108                        return ret;
    114109                }
    115                 return Mutator::mutate( pointer );
     110                return pointer;
    116111        }
    117112} // namespace GenPoly
  • src/GenPoly/ScrubTyVars.h

    r326cd2b r9cdfb4d0  
    1818#include <cassert>            // for assert
    1919
     20#include "Common/PassVisitor.h"
    2021#include "GenPoly.h"          // for TyVarMap, isPolyType, isDynType
    2122#include "SynTree/Mutator.h"  // for Mutator
     
    2728
    2829namespace GenPoly {
    29         class ScrubTyVars : public Mutator {
     30        struct ScrubTyVars : public WithVisitorRef<ScrubTyVars>, public WithShortCircuiting, public WithGuards {
    3031                /// Whether to scrub all type variables from the provided map, dynamic type variables from the provided map, or all type variables
    3132                enum ScrubMode { FromMap, DynamicFromMap, All };
     
    5152                static SynTreeClass *scrubAll( SynTreeClass *target );
    5253
    53                 virtual Type* mutate( TypeInstType *typeInst );
    54                 virtual Type* mutate( StructInstType *structInst );
    55                 virtual Type* mutate( UnionInstType *unionInst );
    56                 virtual Expression* mutate( SizeofExpr *szeof );
    57                 virtual Expression* mutate( AlignofExpr *algnof );
    58                 virtual Type* mutate( PointerType *pointer );
     54                /// determine if children should be visited based on whether base type should be scrubbed.
     55                void primeBaseScrub( Type * );
     56
     57                void premutate( TypeInstType * ) { visit_children = false; }
     58                void premutate( StructInstType * ) { visit_children = false; }
     59                void premutate( UnionInstType * ) { visit_children = false; }
     60                void premutate( SizeofExpr * szeof ) { primeBaseScrub( szeof->type ); }
     61                void premutate( AlignofExpr * algnof ) { primeBaseScrub( algnof->type ); }
     62                void premutate( PointerType * pointer ) { primeBaseScrub( pointer->base ); }
     63
     64                Type * postmutate( TypeInstType * typeInst );
     65                Type * postmutate( StructInstType * structInst );
     66                Type * postmutate( UnionInstType * unionInst );
     67                Expression * postmutate( SizeofExpr * szeof );
     68                Expression * postmutate( AlignofExpr * algnof );
     69                Type * postmutate( PointerType * pointer );
    5970
    6071          private:
     
    7586                const TyVarMap *tyVars;  ///< Type variables to scrub
    7687                ScrubMode mode;          ///< which type variables to scrub? [FromMap]
     88
     89                Type * dynType = nullptr; ///< result of shouldScrub
    7790        };
    7891
    7992        template< typename SynTreeClass >
    8093        SynTreeClass * ScrubTyVars::scrub( SynTreeClass *target, const TyVarMap &tyVars ) {
    81                 ScrubTyVars scrubber( tyVars );
     94                PassVisitor<ScrubTyVars> scrubber( tyVars );
    8295                return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) );
    8396        }
     
    8598        template< typename SynTreeClass >
    8699        SynTreeClass * ScrubTyVars::scrubDynamic( SynTreeClass *target, const TyVarMap &tyVars ) {
    87                 ScrubTyVars scrubber( tyVars, ScrubTyVars::DynamicFromMap );
     100                PassVisitor<ScrubTyVars> scrubber( tyVars, ScrubTyVars::DynamicFromMap );
    88101                return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) );
    89102        }
     
    91104        template< typename SynTreeClass >
    92105        SynTreeClass * ScrubTyVars::scrubAll( SynTreeClass *target ) {
    93                 ScrubTyVars scrubber;
     106                PassVisitor<ScrubTyVars> scrubber;
    94107                return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) );
    95108        }
  • src/InitTweak/FixInit.cc

    r326cd2b r9cdfb4d0  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // FixInit.h --
     7// FixInit.cc --
    88//
    99// Author           : Rob Schluntz
     
    197197                };
    198198
    199                 struct GenStructMemberCalls final : public WithGuards, public WithShortCircuiting, public WithIndexer {
     199                struct GenStructMemberCalls final : public WithGuards, public WithShortCircuiting, public WithIndexer, public WithVisitorRef<GenStructMemberCalls> {
    200200                        /// generate default/copy ctor and dtor calls for user-defined struct ctor/dtors
    201201                        /// for any member that is missing a corresponding ctor/dtor call.
     
    203203                        static void generate( std::list< Declaration * > & translationUnit );
    204204
    205                         void previsit( FunctionDecl * funcDecl );
    206                         void postvisit( FunctionDecl * funcDecl );
    207 
    208                         void previsit( MemberExpr * memberExpr );
    209                         void previsit( ApplicationExpr * appExpr );
     205                        void premutate( FunctionDecl * funcDecl );
     206                        DeclarationWithType * postmutate( FunctionDecl * funcDecl );
     207
     208                        void premutate( MemberExpr * memberExpr );
     209                        void premutate( ApplicationExpr * appExpr );
     210
     211                        /// Note: this post mutate used to be in a separate visitor. If this pass breaks, one place to examine is whether it is
     212                        /// okay for this part of the recursion to occur alongside the rest.
     213                        Expression * postmutate( UntypedExpr * expr );
    210214
    211215                        SemanticError errors;
     
    220224                        bool isCtor = false; // true if current function is a constructor
    221225                        StructDecl * structDecl = nullptr;
    222                 };
    223 
    224                 // very simple resolver-like mutator class - used to
    225                 // resolve UntypedExprs that are found within newly
    226                 // generated constructor/destructor calls
    227                 class MutatingResolver final : public Mutator {
    228                   public:
    229                         MutatingResolver( SymTab::Indexer & indexer ) : indexer( indexer ) {}
    230 
    231                         using Mutator::mutate;
    232                         virtual DeclarationWithType* mutate( ObjectDecl *objectDecl ) override;
    233                         virtual Expression* mutate( UntypedExpr *untypedExpr ) override;
    234 
    235                   private:
    236                         SymTab::Indexer & indexer;
    237226                };
    238227
     
    315304                void GenStructMemberCalls::generate( std::list< Declaration * > & translationUnit ) {
    316305                        PassVisitor<GenStructMemberCalls> warner;
    317                         acceptAll( translationUnit, warner );
     306                        mutateAll( translationUnit, warner );
    318307                }
    319308
     
    365354                        // arrays are not copy constructed, so this should always be an ExprStmt
    366355                        ImplicitCtorDtorStmt * stmt = genCtorDtor( fname, var, cpArg );
    367                         ExprStmt * exprStmt = strict_dynamic_cast< ExprStmt * >( stmt->get_callStmt() );
     356                        assertf( stmt, "ResolveCopyCtors: genCtorDtor returned nullptr: %s / %s / %s", fname.c_str(), toString( var ).c_str(), toString( cpArg ).c_str() );
     357                        ExprStmt * exprStmt = strict_dynamic_cast< ExprStmt * >( stmt->callStmt );
    368358                        Expression * resolved = exprStmt->expr;
    369359                        exprStmt->expr = nullptr; // take ownership of expr
     
    382372                        } // if
    383373                        delete stmt;
     374                        if ( TupleAssignExpr * assign = dynamic_cast< TupleAssignExpr * >( resolved ) ) {
     375                                // fix newly generated StmtExpr
     376                                postvisit( assign->stmtExpr );
     377                        }
    384378                        return resolved;
    385379                }
     
    475469                                static UniqueName retNamer("_tmp_stmtexpr_ret");
    476470
    477                                 // create variable that will hold the result of the stmt expr
    478471                                result = result->clone();
    479472                                env->apply( result );
     473                                if ( ! InitTweak::isConstructable( result ) ) {
     474                                        delete result;
     475                                        return;
     476                                }
     477
     478                                // create variable that will hold the result of the stmt expr
    480479                                ObjectDecl * ret = ObjectDecl::newObject( retNamer.newName(), result, nullptr );
    481                                 ret->get_type()->set_const( false );
    482                                 stmtExpr->get_returnDecls().push_front( ret );
     480                                ret->type->set_const( false );
     481                                stmtExpr->returnDecls.push_front( ret );
    483482
    484483                                // must have a non-empty body, otherwise it wouldn't have a result
    485                                 CompoundStmt * body = stmtExpr->get_statements();
     484                                CompoundStmt * body = stmtExpr->statements;
    486485                                assert( ! body->get_kids().empty() );
    487486                                // must be an ExprStmt, otherwise it wouldn't have a result
    488487                                ExprStmt * last = strict_dynamic_cast< ExprStmt * >( body->get_kids().back() );
    489                                 last->set_expr( makeCtorDtor( "?{}", ret, last->get_expr() ) );
    490 
    491                                 stmtExpr->get_dtors().push_front( makeCtorDtor( "^?{}", ret ) );
     488                                last->expr = makeCtorDtor( "?{}", ret, last->get_expr() );
     489
     490                                stmtExpr->dtors.push_front( makeCtorDtor( "^?{}", ret ) );
    492491                        } // if
    493492                }
     
    590589                        // to the outer context, rather than inside of the statement expression.
    591590                        visit_children = false;
    592                         std::list< Statement * > & stmts = stmtExpr->get_statements()->get_kids();
     591                        std::list< Statement * > & stmts = stmtExpr->statements->get_kids();
    593592                        for ( Statement *& stmt : stmts ) {
    594593                                stmt = stmt->acceptMutator( *visitor );
    595594                        } // for
    596                         assert( stmtExpr->get_result() );
    597                         Type * result = stmtExpr->get_result();
     595                        assert( stmtExpr->result );
     596                        Type * result = stmtExpr->result;
    598597                        if ( ! result->isVoid() ) {
    599                                 for ( ObjectDecl * obj : stmtExpr->get_returnDecls() ) {
     598                                for ( ObjectDecl * obj : stmtExpr->returnDecls ) {
    600599                                        stmtsToAddBefore.push_back( new DeclStmt( obj ) );
    601600                                } // for
    602601                                // add destructors after current statement
    603                                 for ( Expression * dtor : stmtExpr->get_dtors() ) {
     602                                for ( Expression * dtor : stmtExpr->dtors ) {
    604603                                        stmtsToAddAfter.push_back( new ExprStmt( dtor ) );
    605604                                } // for
    606605                                // must have a non-empty body, otherwise it wouldn't have a result
    607606                                assert( ! stmts.empty() );
    608                                 assert( ! stmtExpr->get_returnDecls().empty() );
    609                                 stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->get_returnDecls().front() ) ) );
    610                                 stmtExpr->get_returnDecls().clear();
    611                                 stmtExpr->get_dtors().clear();
    612                         }
    613                         assert( stmtExpr->get_returnDecls().empty() );
    614                         assert( stmtExpr->get_dtors().empty() );
     607                                assertf( ! stmtExpr->returnDecls.empty() || stmtExpr->dtors.empty(), "StmtExpr returns non-void, but no return decls: %s", toString( stmtExpr ).c_str() );
     608                                // if there is a return decl, add a use as the last statement; will not have return decl on non-constructable returns
     609                                if ( ! stmtExpr->returnDecls.empty() ) {
     610                                        stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->returnDecls.front() ) ) );
     611                                }
     612                                stmtExpr->returnDecls.clear();
     613                                stmtExpr->dtors.clear();
     614                        }
     615                        assert( stmtExpr->returnDecls.empty() );
     616                        assert( stmtExpr->dtors.empty() );
    615617                }
    616618
     
    937939                }
    938940
    939                 void GenStructMemberCalls::previsit( FunctionDecl * funcDecl ) {
     941                void GenStructMemberCalls::premutate( FunctionDecl * funcDecl ) {
    940942                        GuardValue( function );
    941943                        GuardValue( unhandled );
     
    971973                }
    972974
    973                 void GenStructMemberCalls::postvisit( FunctionDecl * funcDecl ) {
     975                DeclarationWithType * GenStructMemberCalls::postmutate( FunctionDecl * funcDecl ) {
    974976                        // remove the unhandled objects from usedUninit, because a call is inserted
    975977                        // to handle them - only objects that are later constructed are used uninitialized.
     
    10251027                                                Statement * callStmt = stmt.front();
    10261028
    1027                                                 MutatingResolver resolver( indexer );
    10281029                                                try {
    1029                                                         callStmt->acceptMutator( resolver );
     1030                                                        callStmt->acceptMutator( *visitor );
    10301031                                                        if ( isCtor ) {
    10311032                                                                function->get_statements()->push_front( callStmt );
     
    10431044                                throw errors;
    10441045                        }
     1046                        return funcDecl;
    10451047                }
    10461048
     
    10681070                }
    10691071
    1070                 void GenStructMemberCalls::previsit( ApplicationExpr * appExpr ) {
     1072                void GenStructMemberCalls::premutate( ApplicationExpr * appExpr ) {
    10711073                        if ( ! checkWarnings( function ) ) {
    10721074                                visit_children = false;
     
    10931095                }
    10941096
    1095                 void GenStructMemberCalls::previsit( MemberExpr * memberExpr ) {
     1097                void GenStructMemberCalls::premutate( MemberExpr * memberExpr ) {
    10961098                        if ( ! checkWarnings( function ) || ! isCtor ) {
    10971099                                visit_children = false;
     
    11211123                }
    11221124
    1123                 DeclarationWithType * MutatingResolver::mutate( ObjectDecl * objectDecl ) {
    1124                         // add object to the indexer assumes that there will be no name collisions
    1125                         // in generated code. If this changes, add mutate methods for entities with
    1126                         // scope and call {enter,leave}Scope explicitly.
    1127                         indexer.addId( objectDecl );
    1128                         return objectDecl;
    1129                 }
    1130 
    1131                 Expression * MutatingResolver::mutate( UntypedExpr * untypedExpr ) {
     1125                Expression * GenStructMemberCalls::postmutate( UntypedExpr * untypedExpr ) {
    11321126                        Expression * newExpr = untypedExpr;
    11331127                        ResolvExpr::findVoidExpression( newExpr, indexer );
  • src/InitTweak/GenInit.cc

    r326cd2b r9cdfb4d0  
    3030#include "InitTweak.h"             // for isConstExpr, InitExpander, checkIn...
    3131#include "Parser/LinkageSpec.h"    // for isOverridable, C
     32#include "ResolvExpr/Resolver.h"
    3233#include "SymTab/Autogen.h"        // for genImplicitCall, SizeType
    3334#include "SymTab/Mangler.h"        // for Mangler
     
    4041#include "SynTree/Type.h"          // for Type, ArrayType, Type::Qualifiers
    4142#include "SynTree/Visitor.h"       // for acceptAll, maybeAccept
     43#include "Tuples/Tuples.h"         // for maybeImpure
    4244
    4345namespace InitTweak {
     
    8991        };
    9092
    91         struct HoistArrayDimension final : public WithDeclsToAdd, public WithShortCircuiting, public WithGuards {
     93        struct HoistArrayDimension final : public WithDeclsToAdd, public WithShortCircuiting, public WithGuards, public WithIndexer {
    9294                /// hoist dimension from array types in object declaration so that it uses a single
    9395                /// const variable of type size_t, so that side effecting array dimensions are only
     
    104106                void premutate( FunctionType * ) { visit_children = false; }
    105107
     108                // need this so that enumerators are added to the indexer, due to premutate(AggregateDecl *)
     109                void premutate( EnumDecl * ) {}
     110
    106111                void hoist( Type * type );
    107112
     
    135140                                if ( varExpr->var == retVal ) return;
    136141                        }
    137                         stmtsToAddBefore.push_back( genCtorDtor( "?{}", retVal, returnStmt->get_expr() ) );
     142                        Statement * stmt = genCtorDtor( "?{}", retVal, returnStmt->expr );
     143                        assertf( stmt, "ReturnFixer: genCtorDtor returned nullptr: %s / %s", toString( retVal ).c_str(), toString( returnStmt->expr ).c_str() );
     144                        stmtsToAddBefore.push_back( stmt );
    138145
    139146                        // return the retVal object
     
    178185                        if ( ! arrayType->get_dimension() ) return; // xxx - recursive call to hoist?
    179186
    180                         // don't need to hoist dimension if it's a constexpr - only need to if there's potential for side effects.
    181                         if ( isConstExpr( arrayType->get_dimension() ) ) return;
     187                        // need to resolve array dimensions in order to accurately determine if constexpr
     188                        ResolvExpr::findSingleExpression( arrayType->dimension, SymTab::SizeType->clone(), indexer );
     189                        // array is variable-length when the dimension is not constexpr
     190                        arrayType->isVarLen = ! isConstExpr( arrayType->dimension );
     191                        // don't need to hoist dimension if it's definitely pure - only need to if there's potential for side effects.
     192                        if ( ! Tuples::maybeImpure( arrayType->dimension ) ) return;
    182193
    183194                        ObjectDecl * arrayDimension = new ObjectDecl( dimensionName.newName(), storageClasses, LinkageSpec::C, 0, SymTab::SizeType->clone(), new SingleInit( arrayType->get_dimension() ) );
     
    194205        void HoistArrayDimension::premutate( FunctionDecl * ) {
    195206                GuardValue( inFunction );
     207                inFunction = true;
    196208        }
    197209
     
    220232                Type * type = objDecl->get_type();
    221233                while ( ArrayType * at = dynamic_cast< ArrayType * >( type ) ) {
     234                        // must always construct VLAs with an initializer, since this is an error in C
     235                        if ( at->isVarLen && objDecl->init ) return true;
    222236                        type = at->get_base();
    223237                }
  • src/InitTweak/InitTweak.cc

    r326cd2b r9cdfb4d0  
    564564                void previsit( ConstantExpr * ) {}
    565565
     566                void previsit( VariableExpr * varExpr ) {
     567                        visit_children = false;
     568
     569                        if ( EnumInstType * inst = dynamic_cast< EnumInstType * >( varExpr->result ) ) {
     570                                long long int value;
     571                                if ( inst->baseEnum->valueOf( varExpr->var, value ) ) {
     572                                        // enumerators are const expr
     573                                        return;
     574                                }
     575                        }
     576                        isConstExpr = false;
     577                }
     578
    566579                bool isConstExpr = true;
    567580        };
  • src/Parser/DeclarationNode.cc

    r326cd2b r9cdfb4d0  
    343343        DeclarationNode * newnode = new DeclarationNode;
    344344        newnode->type = new TypeData( kind == OperKinds::PointTo ? TypeData::Pointer : TypeData::Reference );
     345        if ( kind == OperKinds::And ) {
     346                // T && is parsed as 'And' operator rather than two references => add a second reference type
     347                TypeData * td = new TypeData( TypeData::Reference );
     348                td->base = newnode->type;
     349                newnode->type = td;
     350        }
    345351        if ( qualifiers ) {
    346352                return newnode->addQualifiers( qualifiers );
  • src/Parser/parser.yy

    r326cd2b r9cdfb4d0  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Nov 27 17:23:35 2017
    13 // Update Count     : 2992
     12// Last Modified On : Thu Dec 21 11:32:56 2017
     13// Update Count     : 2996
    1414//
    1515
     
    317317%type<decl> cfa_typedef_declaration cfa_variable_declaration cfa_variable_specifier
    318318
    319 %type<decl> c_declaration
     319%type<decl> c_declaration static_assert
    320320%type<decl> KR_function_declarator KR_function_no_ptr KR_function_ptr KR_function_array
    321321%type<decl> KR_declaration_list KR_declaration_list_opt
     
    835835        | exception_statement
    836836        | asm_statement
     837        ;
    837838
    838839labeled_statement:
     
    12821283        c_declaration pop ';'
    12831284        | cfa_declaration pop ';'                                                       // CFA
    1284         | STATICASSERT '(' constant_expression ',' string_literal ')' ';' // C11
     1285        | static_assert
     1286        ;
     1287
     1288static_assert:
     1289        STATICASSERT '(' constant_expression ',' string_literal ')' ';' // C11
    12851290                { throw SemanticError("Static assert is currently unimplemented."); $$ = nullptr; }     // FIX ME
    1286         ;
    12871291
    12881292// C declaration syntax is notoriously confusing and error prone. Cforall provides its own type, variable and function
     
    18901894                        $$ = distAttr( $2, $3 );
    18911895                }
     1896        | static_assert
    18921897        ;
    18931898
  • src/ResolvExpr/CastCost.cc

    r326cd2b r9cdfb4d0  
    3131
    3232namespace ResolvExpr {
    33         class CastCost : public ConversionCost {
     33        struct CastCost : public ConversionCost {
    3434          public:
    35                 CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env );
     35                CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc );
    3636
    37                 virtual void visit( BasicType *basicType );
    38                 virtual void visit( PointerType *pointerType );
     37                using ConversionCost::previsit;
     38                using ConversionCost::postvisit;
     39                void postvisit( BasicType * basicType );
     40                void postvisit( PointerType * pointerType );
    3941        };
    4042
     
    5254                                // all typedefs should be gone by this point
    5355                                TypeDecl *type = strict_dynamic_cast< TypeDecl* >( namedType );
    54                                 if ( type->get_base() ) {
    55                                         return castCost( src, type->get_base(), indexer, env ) + Cost::safe;
     56                                if ( type->base ) {
     57                                        return castCost( src, type->base, indexer, env ) + Cost::safe;
    5658                                } // if
    5759                        } // if
     
    7476                } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * > ( dest ) ) {
    7577                        PRINT( std::cerr << "conversionCost: dest is reference" << std::endl; )
    76                         return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const TypeEnvironment & env, const SymTab::Indexer & indexer) {
     78                        return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const SymTab::Indexer & indexer, const TypeEnvironment & env ) {
    7779                                return ptrsCastable( t1, t2, env, indexer );
    7880                        });
    7981                } else {
    80                         CastCost converter( dest, indexer, env );
     82                        PassVisitor<CastCost> converter( dest, indexer, env, castCost );
    8183                        src->accept( converter );
    82                         if ( converter.get_cost() == Cost::infinity ) {
     84                        if ( converter.pass.get_cost() == Cost::infinity ) {
    8385                                return Cost::infinity;
    8486                        } else {
    8587                                // xxx - why are we adding cost 0 here?
    86                                 return converter.get_cost() + Cost::zero;
     88                                return converter.pass.get_cost() + Cost::zero;
    8789                        } // if
    8890                } // if
    8991        }
    9092
    91         CastCost::CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env )
    92                 : ConversionCost( dest, indexer, env ) {
     93        CastCost::CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc )
     94                : ConversionCost( dest, indexer, env, costFunc ) {
    9395        }
    9496
    95         void CastCost::visit( BasicType *basicType ) {
     97        void CastCost::postvisit( BasicType *basicType ) {
    9698                PointerType *destAsPointer = dynamic_cast< PointerType* >( dest );
    9799                if ( destAsPointer && basicType->isInteger() ) {
     
    103105        }
    104106
    105         void CastCost::visit( PointerType *pointerType ) {
     107        void CastCost::postvisit( PointerType *pointerType ) {
    106108                if ( PointerType *destAsPtr = dynamic_cast< PointerType* >( dest ) ) {
    107                         if ( pointerType->get_qualifiers() <= destAsPtr->get_qualifiers() && typesCompatibleIgnoreQualifiers( pointerType->get_base(), destAsPtr->get_base(), indexer, env ) ) {
     109                        if ( pointerType->get_qualifiers() <= destAsPtr->get_qualifiers() && typesCompatibleIgnoreQualifiers( pointerType->base, destAsPtr->base, indexer, env ) ) {
    108110                                cost = Cost::safe;
    109111                        } else {
    110112                                TypeEnvironment newEnv( env );
    111                                 newEnv.add( pointerType->get_forall() );
    112                                 newEnv.add( pointerType->get_base()->get_forall() );
    113                                 int castResult = ptrsCastable( pointerType->get_base(), destAsPtr->get_base(), newEnv, indexer );
     113                                newEnv.add( pointerType->forall );
     114                                newEnv.add( pointerType->base->forall );
     115                                int castResult = ptrsCastable( pointerType->base, destAsPtr->base, newEnv, indexer );
    114116                                if ( castResult > 0 ) {
    115117                                        cost = Cost::safe;
  • src/ResolvExpr/CommonType.cc

    r326cd2b r9cdfb4d0  
    9191                        // special case where one type has a reference depth of 1 larger than the other
    9292                        if ( diff > 0 || diff < 0 ) {
     93                                // std::cerr << "reference depth diff: " << diff << std::endl;
    9394                                Type * result = nullptr;
    94                                 if ( ReferenceType * ref1 = dynamic_cast< ReferenceType * >( type1 ) ) {
     95                                ReferenceType * ref1 = dynamic_cast< ReferenceType * >( type1 );
     96                                ReferenceType * ref2 = dynamic_cast< ReferenceType * >( type2 );
     97                                if ( diff > 0 ) {
     98                                        // deeper on the left
     99                                        assert( ref1 );
     100                                        result = handleReference( ref1->base, type2, widenFirst, widenSecond, indexer, env, openVars );
     101                                } else {
     102                                        // deeper on the right
     103                                        assert( ref2 );
     104                                        result = handleReference( type1, ref2->base, widenFirst, widenSecond, indexer, env, openVars );
     105                                }
     106                                if ( result && ref1 ) {
    95107                                        // formal is reference, so result should be reference
    96                                         result = handleReference( ref1->base, type2, widenFirst, widenSecond, indexer, env, openVars );
    97                                         if ( result ) result = new ReferenceType( ref1->get_qualifiers(), result );
    98                                 } else {
    99                                         // formal is value, so result should be value
    100                                         ReferenceType * ref2 = strict_dynamic_cast< ReferenceType * > ( type2 );
    101                                         result = handleReference( type1, ref2->base, widenFirst, widenSecond, indexer, env, openVars );
     108                                        // std::cerr << "formal is reference; result should be reference" << std::endl;
     109                                        result = new ReferenceType( ref1->get_qualifiers(), result );
    102110                                }
    103111                                // std::cerr << "common type of reference [" << type1 << "] and [" << type2 << "] is [" << result << "]" << std::endl;
     
    180188        }
    181189
    182         void CommonType::visit( __attribute((unused)) VoidType *voidType ) {}
     190        void CommonType::visit( VoidType * ) {}
    183191
    184192        void CommonType::visit( BasicType *basicType ) {
     
    246254        }
    247255
    248         void CommonType::visit( __attribute((unused)) ArrayType *arrayType ) {}
     256        void CommonType::visit( ArrayType * ) {}
    249257
    250258        void CommonType::visit( ReferenceType *refType ) {
     
    283291        }
    284292
    285         void CommonType::visit( __attribute((unused)) FunctionType *functionType ) {}
    286         void CommonType::visit( __attribute((unused)) StructInstType *aggregateUseType ) {}
    287         void CommonType::visit( __attribute((unused)) UnionInstType *aggregateUseType ) {}
     293        void CommonType::visit( FunctionType * ) {}
     294        void CommonType::visit( StructInstType * ) {}
     295        void CommonType::visit( UnionInstType * ) {}
    288296
    289297        void CommonType::visit( EnumInstType *enumInstType ) {
     
    296304        }
    297305
    298         void CommonType::visit( __attribute((unused)) TraitInstType *aggregateUseType ) {
     306        void CommonType::visit( TraitInstType * ) {
    299307        }
    300308
     
    321329        }
    322330
    323         void CommonType::visit( __attribute((unused)) TupleType *tupleType ) {}
    324         void CommonType::visit( __attribute((unused)) VarArgsType *varArgsType ) {}
     331        void CommonType::visit( TupleType * ) {}
     332        void CommonType::visit( VarArgsType * ) {}
    325333
    326334        void CommonType::visit( ZeroType *zeroType ) {
  • src/ResolvExpr/ConversionCost.cc

    r326cd2b r9cdfb4d0  
    4444                        EqvClass eqvClass;
    4545                        NamedTypeDecl *namedType;
    46                         PRINT( std::cerr << "type inst " << destAsTypeInst->get_name(); )
    47                         if ( env.lookup( destAsTypeInst->get_name(), eqvClass ) ) {
     46                        PRINT( std::cerr << "type inst " << destAsTypeInst->name; )
     47                        if ( env.lookup( destAsTypeInst->name, eqvClass ) ) {
    4848                                if ( eqvClass.type ) {
    4949                                        return conversionCost( src, eqvClass.type, indexer, env );
     
    5151                                        return Cost::infinity;
    5252                                }
    53                         } else if ( ( namedType = indexer.lookupType( destAsTypeInst->get_name() ) ) ) {
     53                        } else if ( ( namedType = indexer.lookupType( destAsTypeInst->name ) ) ) {
    5454                                PRINT( std::cerr << " found" << std::endl; )
    5555                                TypeDecl *type = dynamic_cast< TypeDecl* >( namedType );
    5656                                // all typedefs should be gone by this point
    5757                                assert( type );
    58                                 if ( type->get_base() ) {
    59                                         return conversionCost( src, type->get_base(), indexer, env ) + Cost::safe;
     58                                if ( type->base ) {
     59                                        return conversionCost( src, type->base, indexer, env ) + Cost::safe;
    6060                                } // if
    6161                        } // if
     
    7777                } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * > ( dest ) ) {
    7878                        PRINT( std::cerr << "conversionCost: dest is reference" << std::endl; )
    79                         return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const TypeEnvironment & env, const SymTab::Indexer &){
     79                        return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const SymTab::Indexer &, const TypeEnvironment & env ){
    8080                                return ptrsAssignable( t1, t2, env );
    8181                        });
    8282                } else {
    83                         ConversionCost converter( dest, indexer, env );
     83                        PassVisitor<ConversionCost> converter( dest, indexer, env, conversionCost );
    8484                        src->accept( converter );
    85                         if ( converter.get_cost() == Cost::infinity ) {
     85                        if ( converter.pass.get_cost() == Cost::infinity ) {
    8686                                return Cost::infinity;
    8787                        } else {
    88                                 return converter.get_cost() + Cost::zero;
     88                                return converter.pass.get_cost() + Cost::zero;
    8989                        } // if
    9090                } // if
     
    9292
    9393        Cost convertToReferenceCost( Type * src, Type * dest, int diff, const SymTab::Indexer & indexer, const TypeEnvironment & env, PtrsFunction func ) {
    94                 PRINT( std::cerr << "convert to reference cost... diff " << diff << std::endl; )
     94                PRINT( std::cerr << "convert to reference cost... diff " << diff << " " << src << " / " << dest << std::endl; )
    9595                if ( diff > 0 ) {
    9696                        // TODO: document this
    97                         Cost cost = convertToReferenceCost( strict_dynamic_cast< ReferenceType * >( src )->get_base(), dest, diff-1, indexer, env, func );
     97                        Cost cost = convertToReferenceCost( strict_dynamic_cast< ReferenceType * >( src )->base, dest, diff-1, indexer, env, func );
    9898                        cost.incReference();
    9999                        return cost;
    100100                } else if ( diff < -1 ) {
    101101                        // TODO: document this
    102                         Cost cost = convertToReferenceCost( src, strict_dynamic_cast< ReferenceType * >( dest )->get_base(), diff+1, indexer, env, func );
     102                        Cost cost = convertToReferenceCost( src, strict_dynamic_cast< ReferenceType * >( dest )->base, diff+1, indexer, env, func );
    103103                        cost.incReference();
    104104                        return cost;
     
    108108                        if ( srcAsRef && destAsRef ) { // pointer-like conversions between references
    109109                                PRINT( std::cerr << "converting between references" << std::endl; )
    110                                 if ( srcAsRef->get_base()->get_qualifiers() <= destAsRef->get_base()->get_qualifiers() && typesCompatibleIgnoreQualifiers( srcAsRef->get_base(), destAsRef->get_base(), indexer, env ) ) {
    111                                         return Cost::safe;
     110                                Type::Qualifiers tq1 = srcAsRef->base->get_qualifiers();
     111                                Type::Qualifiers tq2 = destAsRef->base->get_qualifiers();
     112                                if ( tq1 <= tq2 && typesCompatibleIgnoreQualifiers( srcAsRef->base, destAsRef->base, indexer, env ) ) {
     113                                        PRINT( std::cerr << " :: compatible and good qualifiers" << std::endl; )
     114                                        if ( tq1 == tq2 ) {
     115                                                // types are the same
     116                                                return Cost::zero;
     117                                        } else {
     118                                                // types are the same, except otherPointer has more qualifiers
     119                                                return Cost::safe;
     120                                        }
    112121                                } else {  // xxx - this discards reference qualifiers from consideration -- reducing qualifiers is a safe conversion; is this right?
    113                                         int assignResult = func( srcAsRef->get_base(), destAsRef->get_base(), env, indexer );
     122                                        int assignResult = func( srcAsRef->base, destAsRef->base, indexer, env );
    114123                                        PRINT( std::cerr << "comparing references: " << assignResult << " " << srcAsRef << " " << destAsRef << std::endl; )
    115124                                        if ( assignResult > 0 ) {
     
    121130                        } else {
    122131                                PRINT( std::cerr << "reference to rvalue conversion" << std::endl; )
    123                                 ConversionCost converter( dest, indexer, env );
     132                                PassVisitor<ConversionCost> converter( dest, indexer, env, conversionCost );
    124133                                src->accept( converter );
    125                                 return converter.get_cost();
     134                                return converter.pass.get_cost();
    126135                        } // if
    127136                } else {
     
    129138                        assert( diff == -1 && destAsRef );
    130139                        PRINT( std::cerr << "dest is: " << dest << " / src is: " << src << std::endl; )
    131                         if ( typesCompatibleIgnoreQualifiers( src, destAsRef->get_base(), indexer, env ) ) {
     140                        if ( typesCompatibleIgnoreQualifiers( src, destAsRef->base, indexer, env ) ) {
    132141                                PRINT( std::cerr << "converting compatible base type" << std::endl; )
    133142                                if ( src->get_lvalue() ) {
     
    137146                                        )
    138147                                        // lvalue-to-reference conversion:  cv lvalue T => cv T &
    139                                         if ( src->get_qualifiers() == destAsRef->get_base()->get_qualifiers() ) {
     148                                        if ( src->get_qualifiers() == destAsRef->base->get_qualifiers() ) {
    140149                                                return Cost::reference; // cost needs to be non-zero to add cast
    141                                         } if ( src->get_qualifiers() < destAsRef->get_base()->get_qualifiers() ) {
     150                                        } if ( src->get_qualifiers() < destAsRef->base->get_qualifiers() ) {
    142151                                                return Cost::safe; // cost needs to be higher than previous cast to differentiate adding qualifiers vs. keeping same
    143152                                        } else {
    144153                                                return Cost::unsafe;
    145154                                        } // if
    146                                 } else if ( destAsRef->get_base()->get_const() ) {
     155                                } else if ( destAsRef->base->get_const() ) {
    147156                                        PRINT( std::cerr << "rvalue to const ref conversion" << std::endl; )
    148157                                        // rvalue-to-const-reference conversion: T => const T &
     
    164173        }
    165174
    166         ConversionCost::ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env )
    167                 : dest( dest ), indexer( indexer ), cost( Cost::infinity ), env( env ) {
     175        ConversionCost::ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc )
     176                : dest( dest ), indexer( indexer ), cost( Cost::infinity ), env( env ), costFunc( costFunc ) {
    168177        }
    169178
     
    248257        };
    249258
    250         void ConversionCost::visit( __attribute((unused)) VoidType *voidType ) {
     259        void ConversionCost::postvisit( VoidType * ) {
    251260                cost = Cost::infinity;
    252261        }
    253262
    254         void ConversionCost::visit(BasicType *basicType) {
     263        void ConversionCost::postvisit(BasicType *basicType) {
    255264                if ( BasicType *destAsBasic = dynamic_cast< BasicType* >( dest ) ) {
    256265                        int tableResult = costMatrix[ basicType->get_kind() ][ destAsBasic->get_kind() ];
     
    269278        }
    270279
    271         void ConversionCost::visit( PointerType * pointerType ) {
     280        void ConversionCost::postvisit( PointerType * pointerType ) {
    272281                if ( PointerType *destAsPtr = dynamic_cast< PointerType* >( dest ) ) {
    273                         PRINT( std::cerr << pointerType << " ===> " << destAsPtr; )
    274                         Type::Qualifiers tq1 = pointerType->get_base()->get_qualifiers();
    275                         Type::Qualifiers tq2 = destAsPtr->get_base()->get_qualifiers();
    276                         if ( tq1 <= tq2 && typesCompatibleIgnoreQualifiers( pointerType->get_base(), destAsPtr->get_base(), indexer, env ) ) {
     282                        PRINT( std::cerr << pointerType << " ===> " << destAsPtr << std::endl; )
     283                        Type::Qualifiers tq1 = pointerType->base->get_qualifiers();
     284                        Type::Qualifiers tq2 = destAsPtr->base->get_qualifiers();
     285                        if ( tq1 <= tq2 && typesCompatibleIgnoreQualifiers( pointerType->base, destAsPtr->base, indexer, env ) ) {
     286                                PRINT( std::cerr << " :: compatible and good qualifiers" << std::endl; )
    277287                                if ( tq1 == tq2 ) {
    278288                                        // types are the same
     
    280290                                } else {
    281291                                        // types are the same, except otherPointer has more qualifiers
    282                                         PRINT( std::cerr << " :: compatible and good qualifiers" << std::endl; )
    283292                                        cost = Cost::safe;
    284293                                }
    285                         } else {  // xxx - this discards qualifiers from consideration -- reducing qualifiers is a safe conversion; is this right?
     294                        } else {
    286295                                int assignResult = ptrsAssignable( pointerType->base, destAsPtr->base, env );
    287296                                PRINT( std::cerr << " :: " << assignResult << std::endl; )
    288                                 if ( assignResult > 0 && pointerType->get_base()->get_qualifiers() <= destAsPtr->get_qualifiers() ) {
    289                                         cost = Cost::safe;
     297                                if ( assignResult > 0 && tq1 <= tq2 ) {
     298                                        // xxx - want the case where qualifiers are added to be more expensive than the case where qualifiers are the same. Is 1 safe vs. 2 safe correct?
     299                                        if ( tq1 == tq2 ) {
     300                                                cost = Cost::safe;
     301                                        } else if ( tq1 < tq2 ) {
     302                                                cost = Cost::safe+Cost::safe;
     303                                        }
    290304                                } else if ( assignResult < 0 ) {
    291305                                        cost = Cost::unsafe;
     
    298312        }
    299313
    300         void ConversionCost::visit( ArrayType * ) {}
    301 
    302         void ConversionCost::visit( ReferenceType * refType ) {
     314        void ConversionCost::postvisit( ArrayType * ) {}
     315
     316        void ConversionCost::postvisit( ReferenceType * refType ) {
    303317                // Note: dest can never be a reference, since it would have been caught in an earlier check
    304318                assert( ! dynamic_cast< ReferenceType * >( dest ) );
     
    306320                // recursively compute conversion cost from T1 to T2.
    307321                // cv can be safely dropped because of 'implicit dereference' behavior.
    308                 refType->base->accept( *this );
     322                cost = costFunc( refType->base, dest, indexer, env );
    309323                if ( refType->base->get_qualifiers() == dest->get_qualifiers() ) {
    310324                        cost.incReference();  // prefer exact qualifiers
     
    317331        }
    318332
    319         void ConversionCost::visit( FunctionType * ) {}
    320 
    321         void ConversionCost::visit( StructInstType * inst ) {
     333        void ConversionCost::postvisit( FunctionType * ) {}
     334
     335        void ConversionCost::postvisit( StructInstType * inst ) {
    322336                if ( StructInstType *destAsInst = dynamic_cast< StructInstType* >( dest ) ) {
    323337                        if ( inst->name == destAsInst->name ) {
     
    327341        }
    328342
    329         void ConversionCost::visit( UnionInstType * inst ) {
     343        void ConversionCost::postvisit( UnionInstType * inst ) {
    330344                if ( UnionInstType *destAsInst = dynamic_cast< UnionInstType* >( dest ) ) {
    331345                        if ( inst->name == destAsInst->name ) {
     
    335349        }
    336350
    337         void ConversionCost::visit( EnumInstType * ) {
     351        void ConversionCost::postvisit( EnumInstType * ) {
    338352                static Type::Qualifiers q;
    339353                static BasicType integer( q, BasicType::SignedInt );
    340                 integer.accept( *this );  // safe if dest >= int
     354                cost = costFunc( &integer, dest, indexer, env );  // safe if dest >= int
    341355                if ( cost < Cost::unsafe ) {
    342356                        cost.incSafe();
     
    344358        }
    345359
    346         void ConversionCost::visit( TraitInstType * ) {}
    347 
    348         void ConversionCost::visit( TypeInstType *inst ) {
     360        void ConversionCost::postvisit( TraitInstType * ) {}
     361
     362        void ConversionCost::postvisit( TypeInstType *inst ) {
    349363                EqvClass eqvClass;
    350364                NamedTypeDecl *namedType;
    351                 if ( env.lookup( inst->get_name(), eqvClass ) ) {
    352                         cost = conversionCost( eqvClass.type, dest, indexer, env );
     365                if ( env.lookup( inst->name, eqvClass ) ) {
     366                        cost = costFunc( eqvClass.type, dest, indexer, env );
    353367                } else if ( TypeInstType *destAsInst = dynamic_cast< TypeInstType* >( dest ) ) {
    354                         if ( inst->get_name() == destAsInst->get_name() ) {
     368                        if ( inst->name == destAsInst->name ) {
    355369                                cost = Cost::zero;
    356370                        }
    357                 } else if ( ( namedType = indexer.lookupType( inst->get_name() ) ) ) {
     371                } else if ( ( namedType = indexer.lookupType( inst->name ) ) ) {
    358372                        TypeDecl *type = dynamic_cast< TypeDecl* >( namedType );
    359373                        // all typedefs should be gone by this point
    360374                        assert( type );
    361                         if ( type->get_base() ) {
    362                                 cost = conversionCost( type->get_base(), dest, indexer, env ) + Cost::safe;
    363                         } // if
    364                 } // if
    365         }
    366 
    367         void ConversionCost::visit( TupleType * tupleType ) {
     375                        if ( type->base ) {
     376                                cost = costFunc( type->base, dest, indexer, env ) + Cost::safe;
     377                        } // if
     378                } // if
     379        }
     380
     381        void ConversionCost::postvisit( TupleType * tupleType ) {
    368382                Cost c = Cost::zero;
    369383                if ( TupleType * destAsTuple = dynamic_cast< TupleType * >( dest ) ) {
    370                         std::list< Type * >::const_iterator srcIt = tupleType->get_types().begin();
    371                         std::list< Type * >::const_iterator destIt = destAsTuple->get_types().begin();
    372                         while ( srcIt != tupleType->get_types().end() && destIt != destAsTuple->get_types().end() ) {
    373                                 Cost newCost = conversionCost( *srcIt++, *destIt++, indexer, env );
     384                        std::list< Type * >::const_iterator srcIt = tupleType->types.begin();
     385                        std::list< Type * >::const_iterator destIt = destAsTuple->types.begin();
     386                        while ( srcIt != tupleType->types.end() && destIt != destAsTuple->types.end() ) {
     387                                Cost newCost = costFunc( *srcIt++, *destIt++, indexer, env );
    374388                                if ( newCost == Cost::infinity ) {
    375389                                        return;
     
    377391                                c += newCost;
    378392                        } // while
    379                         if ( destIt != destAsTuple->get_types().end() ) {
     393                        if ( destIt != destAsTuple->types.end() ) {
    380394                                cost = Cost::infinity;
    381395                        } else {
     
    385399        }
    386400
    387         void ConversionCost::visit( VarArgsType * ) {
     401        void ConversionCost::postvisit( VarArgsType * ) {
    388402                if ( dynamic_cast< VarArgsType* >( dest ) ) {
    389403                        cost = Cost::zero;
     
    391405        }
    392406
    393         void ConversionCost::visit( ZeroType * ) {
     407        void ConversionCost::postvisit( ZeroType * ) {
    394408                if ( dynamic_cast< ZeroType * >( dest ) ) {
    395409                        cost = Cost::zero;
     
    408422        }
    409423
    410         void ConversionCost::visit( OneType * ) {
     424        void ConversionCost::postvisit( OneType * ) {
    411425                if ( dynamic_cast< OneType * >( dest ) ) {
    412426                        cost = Cost::zero;
  • src/ResolvExpr/ConversionCost.h

    r326cd2b r9cdfb4d0  
    1919
    2020#include "Cost.h"             // for Cost
     21
     22#include "Common/PassVisitor.h"
    2123#include "SynTree/Visitor.h"  // for Visitor
    2224#include "SynTree/SynTree.h"  // for Visitor Nodes
     
    2931        class TypeEnvironment;
    3032
    31         class ConversionCost : public Visitor {
     33        typedef std::function<Cost(Type *, Type *, const SymTab::Indexer &, const TypeEnvironment &)> CostFunction;
     34        struct ConversionCost : public WithShortCircuiting {
    3235          public:
    33                 ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env );
     36                ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction );
    3437
    3538                Cost get_cost() const { return cost; }
    3639
    37                 virtual void visit(VoidType *voidType);
    38                 virtual void visit(BasicType *basicType);
    39                 virtual void visit(PointerType *pointerType);
    40                 virtual void visit(ArrayType *arrayType);
    41                 virtual void visit(ReferenceType *refType);
    42                 virtual void visit(FunctionType *functionType);
    43                 virtual void visit(StructInstType *aggregateUseType);
    44                 virtual void visit(UnionInstType *aggregateUseType);
    45                 virtual void visit(EnumInstType *aggregateUseType);
    46                 virtual void visit(TraitInstType *aggregateUseType);
    47                 virtual void visit(TypeInstType *aggregateUseType);
    48                 virtual void visit(TupleType *tupleType);
    49                 virtual void visit(VarArgsType *varArgsType);
    50                 virtual void visit(ZeroType *zeroType);
    51                 virtual void visit(OneType *oneType);
     40                void previsit( BaseSyntaxNode * ) { visit_children = false; }
     41
     42                void postvisit( VoidType * voidType );
     43                void postvisit( BasicType * basicType );
     44                void postvisit( PointerType * pointerType );
     45                void postvisit( ArrayType * arrayType );
     46                void postvisit( ReferenceType * refType );
     47                void postvisit( FunctionType * functionType );
     48                void postvisit( StructInstType * aggregateUseType );
     49                void postvisit( UnionInstType * aggregateUseType );
     50                void postvisit( EnumInstType * aggregateUseType );
     51                void postvisit( TraitInstType * aggregateUseType );
     52                void postvisit( TypeInstType * aggregateUseType );
     53                void postvisit( TupleType * tupleType );
     54                void postvisit( VarArgsType * varArgsType );
     55                void postvisit( ZeroType * zeroType );
     56                void postvisit( OneType * oneType );
    5257          protected:
    5358                Type *dest;
     
    5560                Cost cost;
    5661                const TypeEnvironment &env;
     62                CostFunction costFunc;
    5763        };
    5864
    59         typedef std::function<int(Type *, Type *, const TypeEnvironment &, const SymTab::Indexer &)> PtrsFunction;
     65        typedef std::function<int(Type *, Type *, const SymTab::Indexer &, const TypeEnvironment &)> PtrsFunction;
    6066        Cost convertToReferenceCost( Type * src, ReferenceType * dest, const SymTab::Indexer & indexer, const TypeEnvironment & env, PtrsFunction func );
    6167} // namespace ResolvExpr
  • src/ResolvExpr/CurrentObject.cc

    r326cd2b r9cdfb4d0  
    3838
    3939namespace ResolvExpr {
    40         long long int getConstValue( ConstantExpr * constExpr ) {
    41                 if ( BasicType * basicType = dynamic_cast< BasicType * >( constExpr->get_result() ) ) {
    42                         if ( basicType->isInteger() ) {
    43                                 return constExpr->get_constant()->get_ival();
    44                         } else {
    45                                 assertf( false, "Non-integer constant expression in getConstValue %s", toString( constExpr ).c_str() ); // xxx - might be semantic error
    46                         }
    47                 } else if ( dynamic_cast< OneType * >( constExpr->get_result() ) ) {
    48                         return 1;
    49                 } else if ( dynamic_cast< ZeroType * >( constExpr->get_result() ) ) {
    50                         return 0;
    51                 } else {
    52                         assertf( false, "unhandled type on getConstValue %s", toString( constExpr->get_result() ).c_str() ); // xxx - might be semantic error
    53                 }
    54         }
    55 
    5640        template< typename AggrInst >
    5741        TypeSubstitution makeGenericSubstitution( AggrInst * inst ) {
     
    141125                        base = at->get_base();
    142126                        memberIter = createMemberIterator( base );
     127                        if ( at->isVarLen ) throw SemanticError( "VLA initialization does not support @=", at );
    143128                        setSize( at->get_dimension() );
    144129                }
     
    151136                void setSize( Expression * expr ) {
    152137                        if ( ConstantExpr * constExpr = dynamic_cast< ConstantExpr * >( expr ) ) {
    153                                 size = getConstValue( constExpr );
    154                                 PRINT( std::cerr << "array type with size: " << size << std::endl; )
     138                                try {
     139                                        size = constExpr->intValue();
     140                                        PRINT( std::cerr << "array type with size: " << size << std::endl; )
     141                                } catch ( SemanticError & ) {
     142                                        throw SemanticError( "Constant expression of non-integral type in array dimension: ", expr );
     143                                }
    155144                        }       else if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( expr ) ) {
    156145                                setSize( castExpr->get_arg() ); // xxx - need to perform the conversion specified by the cast
     146                        } else if ( VariableExpr * varExpr = dynamic_cast< VariableExpr * >( expr ) ) {
     147                                if ( EnumInstType * inst = dynamic_cast< EnumInstType * > ( varExpr->result ) ) {
     148                                        long long int value;
     149                                        if ( inst->baseEnum->valueOf( varExpr->var, value ) ) {
     150                                                size = value;
     151                                        }
     152                                }
    157153                        } else {
    158154                                assertf( false, "unhandled expression in setSize: %s", toString( expr ).c_str() ); // xxx - if not a constant expression, it's not simple to determine how long the array actually is, which is necessary for initialization to be done correctly -- fix this
     
    164160                        // need to permit integer-constant-expressions, including: integer constants, enumeration constants, character constants, sizeof expressions, _Alignof expressions, cast expressions
    165161                        if ( ConstantExpr * constExpr = dynamic_cast< ConstantExpr * >( expr ) ) {
    166                                 index = getConstValue( constExpr );
     162                                try {
     163                                        index = constExpr->intValue();
     164                                } catch( SemanticError & ) {
     165                                        throw SemanticError( "Constant expression of non-integral type in array designator: ", expr );
     166                                }
    167167                        } else if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( expr ) ) {
    168168                                setPosition( castExpr->get_arg() );
    169169                        } else if ( VariableExpr * varExpr = dynamic_cast< VariableExpr * >( expr ) ) {
    170                                 assertf( dynamic_cast<EnumInstType *> ( varExpr->get_result() ), "ArrayIterator given variable that isn't an enum constant : %s", toString( expr ).c_str() );
    171                                 index = 0; // xxx - get actual value of enum constant
     170                                EnumInstType * inst = dynamic_cast<EnumInstType *>( varExpr->get_result() );
     171                                assertf( inst, "ArrayIterator given variable that isn't an enum constant : %s", toString( expr ).c_str() );
     172                                long long int value;
     173                                if ( inst->baseEnum->valueOf( varExpr->var, value ) ) {
     174                                        index = value;
     175                                }
    172176                        } else if ( dynamic_cast< SizeofExpr * >( expr ) || dynamic_cast< AlignofExpr * >( expr ) ) {
    173177                                index = 0; // xxx - get actual sizeof/alignof value?
  • src/ResolvExpr/FindOpenVars.cc

    r326cd2b r9cdfb4d0  
    1919#include <map>                    // for map<>::mapped_type
    2020
     21#include "Common/PassVisitor.h"
    2122#include "SynTree/Declaration.h"  // for TypeDecl, DeclarationWithType (ptr ...
    2223#include "SynTree/Type.h"         // for Type, Type::ForallList, ArrayType
    23 #include "SynTree/Visitor.h"      // for Visitor
    2424
    2525namespace ResolvExpr {
    26         class FindOpenVars : public Visitor {
    27           public:
     26        struct FindOpenVars : public WithGuards {
    2827                FindOpenVars( OpenVarSet &openVars, OpenVarSet &closedVars, AssertionSet &needAssertions, AssertionSet &haveAssertions, bool firstIsOpen );
    2928
    30           private:
    31                 virtual void visit(PointerType *pointerType);
    32                 virtual void visit(ArrayType *arrayType);
    33                 virtual void visit(FunctionType *functionType);
    34                 virtual void visit(TupleType *tupleType);
     29                void previsit( PointerType * pointerType );
     30                void previsit( ArrayType * arrayType );
     31                void previsit( FunctionType * functionType );
     32                void previsit( TupleType * tupleType );
    3533
    3634                void common_action( Type *type );
     
    4240
    4341        void findOpenVars( Type *type, OpenVarSet &openVars, OpenVarSet &closedVars, AssertionSet &needAssertions, AssertionSet &haveAssertions, bool firstIsOpen ) {
    44                 FindOpenVars finder( openVars, closedVars, needAssertions, haveAssertions, firstIsOpen );
     42                PassVisitor<FindOpenVars> finder( openVars, closedVars, needAssertions, haveAssertions, firstIsOpen );
    4543                type->accept( finder );
    4644        }
     
    7068                        } // for
    7169                } // if
    72 ///   std::cout << "type is ";
    73 ///   type->print( std::cout );
    74 ///   std::cout << std::endl << "need is" << std::endl;
    75 ///   printAssertionSet( needAssertions, std::cout );
    76 ///   std::cout << std::endl << "have is" << std::endl;
    77 ///   printAssertionSet( haveAssertions, std::cout );
     70///   std::cerr << "type is ";
     71///   type->print( std::cerr );
     72///   std::cerr << std::endl << "need is" << std::endl;
     73///   printAssertionSet( needAssertions, std::cerr );
     74///   std::cerr << std::endl << "have is" << std::endl;
     75///   printAssertionSet( haveAssertions, std::cerr );
    7876        }
    7977
    80         void FindOpenVars::visit(PointerType *pointerType) {
     78        void FindOpenVars::previsit(PointerType *pointerType) {
    8179                common_action( pointerType );
    82                 Visitor::visit( pointerType );
    8380        }
    8481
    85         void FindOpenVars::visit(ArrayType *arrayType) {
     82        void FindOpenVars::previsit(ArrayType *arrayType) {
    8683                common_action( arrayType );
    87                 Visitor::visit( arrayType );
    8884        }
    8985
    90         void FindOpenVars::visit(FunctionType *functionType) {
     86        void FindOpenVars::previsit(FunctionType *functionType) {
    9187                common_action( functionType );
    9288                nextIsOpen = ! nextIsOpen;
    93                 Visitor::visit( functionType );
    94                 nextIsOpen = ! nextIsOpen;
     89                GuardAction( [this](){ nextIsOpen = ! nextIsOpen; } );
    9590        }
    9691
    97         void FindOpenVars::visit(TupleType *tupleType) {
     92        void FindOpenVars::previsit(TupleType *tupleType) {
    9893                common_action( tupleType );
    99                 Visitor::visit( tupleType );
    10094        }
    10195} // namespace ResolvExpr
  • src/ResolvExpr/Occurs.cc

    r326cd2b r9cdfb4d0  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // Occurs.cc -- 
     7// Occurs.cc --
    88//
    99// Author           : Richard C. Bilson
     
    1717#include <string>             // for string
    1818
     19#include "Common/PassVisitor.h"
    1920#include "SynTree/Type.h"     // for TypeInstType, Type
    20 #include "SynTree/Visitor.h"  // for Visitor
    2121#include "TypeEnvironment.h"  // for EqvClass, TypeEnvironment
    2222
    2323namespace ResolvExpr {
    24         class Occurs : public Visitor {
    25           public:
     24        struct Occurs : public WithVisitorRef<Occurs> {
    2625                Occurs( std::string varName, const TypeEnvironment &env );
    27                 bool get_result() const { return result; }
    28                 virtual void visit( TypeInstType *typeInst );
    29           private:
     26                void previsit( TypeInstType * typeInst );
     27
    3028                bool result;
    3129                std::set< std::string > eqvVars;
    32                 const TypeEnvironment &env;
     30                const TypeEnvironment &tenv;
    3331        };
    3432
    3533        bool occurs( Type *type, std::string varName, const TypeEnvironment &env ) {
    36                 Occurs occur( varName, env );
     34                PassVisitor<Occurs> occur( varName, env );
    3735                type->accept( occur );
    38                 return occur.get_result();
     36                return occur.pass.result;
    3937        }
    4038
    41         Occurs::Occurs( std::string varName, const TypeEnvironment &env ) : result( false ), env( env ) {
     39        Occurs::Occurs( std::string varName, const TypeEnvironment & env ) : result( false ), tenv( env ) {
    4240                EqvClass eqvClass;
    43                 if ( env.lookup( varName, eqvClass ) ) {
     41                if ( tenv.lookup( varName, eqvClass ) ) {
    4442                        eqvVars = eqvClass.vars;
    4543                } else {
     
    4846        }
    4947
    50         void Occurs::visit( TypeInstType *typeInst ) {
     48        void Occurs::previsit( TypeInstType * typeInst ) {
    5149                EqvClass eqvClass;
    52 ///   std::cout << "searching for vars: ";
    53 ///   std::copy( eqvVars.begin(), eqvVars.end(), std::ostream_iterator< std::string >( std::cout, " " ) );
    54 ///   std::cout << std::endl;
     50///   std::cerr << "searching for vars: ";
     51///   std::copy( eqvVars.begin(), eqvVars.end(), std::ostream_iterator< std::string >( std::cerr, " " ) );
     52///   std::cerr << std::endl;
    5553                if ( eqvVars.find( typeInst->get_name() ) != eqvVars.end() ) {
    5654                        result = true;
    57                 } else if ( env.lookup( typeInst->get_name(), eqvClass ) ) {
     55                } else if ( tenv.lookup( typeInst->get_name(), eqvClass ) ) {
    5856                        if ( eqvClass.type ) {
    59 ///       std::cout << typeInst->get_name() << " is bound to";
    60 ///       eqvClass.type->print( std::cout );
    61 ///       std::cout << std::endl;
    62                                 eqvClass.type->accept( *this );
     57///       std::cerr << typeInst->get_name() << " is bound to";
     58///       eqvClass.type->print( std::cerr );
     59///       std::cerr << std::endl;
     60                                eqvClass.type->accept( *visitor );
    6361                        } // if
    6462                } // if
  • src/ResolvExpr/PolyCost.cc

    r326cd2b r9cdfb4d0  
    1414//
    1515
     16#include "Common/PassVisitor.h"
    1617#include "SymTab/Indexer.h"   // for Indexer
    1718#include "SynTree/Type.h"     // for TypeInstType, Type
    18 #include "SynTree/Visitor.h"  // for Visitor
    1919#include "TypeEnvironment.h"  // for EqvClass, TypeEnvironment
    2020
    2121namespace ResolvExpr {
    22         class PolyCost : public Visitor {
    23           public:
     22        struct PolyCost {
    2423                PolyCost( const TypeEnvironment &env, const SymTab::Indexer &indexer );
    25                 int get_result() const { return result; }
    26           private:
    27                 virtual void visit(TypeInstType *aggregateUseType);
     24
     25                void previsit( TypeInstType * aggregateUseType );
    2826                int result;
    29                 const TypeEnvironment &env;
     27                const TypeEnvironment &tenv;
    3028                const SymTab::Indexer &indexer;
    3129        };
    3230
    3331        int polyCost( Type *type, const TypeEnvironment & env, const SymTab::Indexer &indexer ) {
    34                 PolyCost coster( env, indexer );
     32                PassVisitor<PolyCost> coster( env, indexer );
    3533                type->accept( coster );
    36                 return coster.get_result();
     34                return coster.pass.result;
    3735        }
    3836
    39         PolyCost::PolyCost( const TypeEnvironment & env, const SymTab::Indexer & indexer ) : result( 0 ), env( env ), indexer( indexer ) {
     37        PolyCost::PolyCost( const TypeEnvironment & env, const SymTab::Indexer & indexer ) : result( 0 ), tenv( env ), indexer( indexer ) {
    4038        }
    4139
    42         void PolyCost::visit(TypeInstType * typeInst) {
     40        void PolyCost::previsit(TypeInstType * typeInst) {
    4341                EqvClass eqvClass;
    44                 if ( env.lookup( typeInst->name, eqvClass ) ) {
     42                if ( tenv.lookup( typeInst->name, eqvClass ) ) {
    4543                        if ( eqvClass.type ) {
    4644                                if ( TypeInstType * otherTypeInst = dynamic_cast< TypeInstType* >( eqvClass.type ) ) {
  • src/ResolvExpr/PtrsAssignable.cc

    r326cd2b r9cdfb4d0  
    6767        PtrsAssignable::PtrsAssignable( Type *dest, const TypeEnvironment &env ) : dest( dest ), result( 0 ), env( env ) {}
    6868
    69         void PtrsAssignable::visit( __attribute((unused)) VoidType *voidType ) {
     69        void PtrsAssignable::visit( VoidType * ) {
    7070                // T * = void * is disallowed - this is a change from C, where any
    7171                // void * can be assigned or passed to a non-void pointer without a cast.
  • src/ResolvExpr/Resolver.cc

    r326cd2b r9cdfb4d0  
    155155                } // if
    156156                #endif
    157                 assertf( finder.get_alternatives().size() == 1, "findSingleExpression: must have exactly one alternative at the end." );
     157                assertf( finder.get_alternatives().size() == 1, "findSingleExpression: must have exactly one alternative at the end: (%zd) %s", finder.get_alternatives().size(), toString( untyped ).c_str() );
    158158                Alternative &choice = finder.get_alternatives().front();
    159159                Expression *newExpr = choice.expr->clone();
     
    171171
    172172        namespace {
     173                /// resolve `untyped` to the expression whose type satisfies `pred` with the lowest cost; kindStr is used for providing better error messages
     174                template<typename Pred>
     175                void findKindExpression(Expression *& untyped, const SymTab::Indexer & indexer, const std::string & kindStr, Pred pred) {
     176                        TypeEnvironment env;
     177                        AlternativeFinder finder( indexer, env );
     178                        finder.findWithAdjustment( untyped );
     179
     180                        AltList candidates;
     181                        for ( Alternative & alt : finder.get_alternatives() ) {
     182                                if ( pred( alt.expr->result ) ) {
     183                                        candidates.push_back( std::move( alt ) );
     184                                }
     185                        }
     186
     187                        // choose the lowest cost expression among the candidates
     188                        AltList winners;
     189                        findMinCost( candidates.begin(), candidates.end(), back_inserter( winners ) );
     190                        if ( winners.size() == 0 ) {
     191                                throw SemanticError( "No reasonable alternatives for " + kindStr + " expression: ", untyped );
     192                        } else if ( winners.size() != 1 ) {
     193                                std::ostringstream stream;
     194                                stream << "Cannot choose between " << winners.size() << " alternatives for " + kindStr +  " expression\n";
     195                                untyped->print( stream );
     196                                stream << "Alternatives are:\n";
     197                                printAlts( winners, stream, 1 );
     198                                throw SemanticError( stream.str() );
     199                        }
     200
     201                        // there is one unambiguous interpretation - move the expression into the with statement
     202                        Alternative & alt = winners.front();
     203                        finishExpr( alt.expr, alt.env, untyped->env );
     204                        delete untyped;
     205                        untyped = alt.expr;
     206                        alt.expr = nullptr;
     207                }
     208
    173209                bool isIntegralType( Type *type ) {
    174210                        if ( dynamic_cast< EnumInstType * >( type ) ) {
     
    184220
    185221                void findIntegralExpression( Expression *& untyped, const SymTab::Indexer &indexer ) {
    186                         TypeEnvironment env;
    187                         AlternativeFinder finder( indexer, env );
    188                         finder.find( untyped );
    189 #if 0
    190                         if ( finder.get_alternatives().size() != 1 ) {
    191                                 std::cout << "untyped expr is ";
    192                                 untyped->print( std::cout );
    193                                 std::cout << std::endl << "alternatives are:";
    194                                 for ( std::list< Alternative >::const_iterator i = finder.get_alternatives().begin(); i != finder.get_alternatives().end(); ++i ) {
    195                                         i->print( std::cout );
    196                                 } // for
    197                         } // if
    198 #endif
    199                         Expression *newExpr = 0;
    200                         const TypeEnvironment *newEnv = 0;
    201                         for ( AltList::const_iterator i = finder.get_alternatives().begin(); i != finder.get_alternatives().end(); ++i ) {
    202                                 if ( i->expr->get_result()->size() == 1 && isIntegralType( i->expr->get_result() ) ) {
    203                                         if ( newExpr ) {
    204                                                 throw SemanticError( "Too many interpretations for case control expression", untyped );
    205                                         } else {
    206                                                 newExpr = i->expr->clone();
    207                                                 newEnv = &i->env;
    208                                         } // if
    209                                 } // if
    210                         } // for
    211                         if ( ! newExpr ) {
    212                                 throw SemanticError( "No interpretations for case control expression", untyped );
    213                         } // if
    214                         finishExpr( newExpr, *newEnv, untyped->env );
    215                         delete untyped;
    216                         untyped = newExpr;
    217                 }
    218 
     222                        findKindExpression( untyped, indexer, "condition", isIntegralType );
     223                }
    219224        }
    220225
     
    311316
    312317        void Resolver::previsit( IfStmt *ifStmt ) {
    313                 findSingleExpression( ifStmt->condition, indexer );
     318                findIntegralExpression( ifStmt->condition, indexer );
    314319        }
    315320
    316321        void Resolver::previsit( WhileStmt *whileStmt ) {
    317                 findSingleExpression( whileStmt->condition, indexer );
     322                findIntegralExpression( whileStmt->condition, indexer );
    318323        }
    319324
    320325        void Resolver::previsit( ForStmt *forStmt ) {
    321326                if ( forStmt->condition ) {
    322                         findSingleExpression( forStmt->condition, indexer );
     327                        findIntegralExpression( forStmt->condition, indexer );
    323328                } // if
    324329
     
    579584        }
    580585
     586
    581587        void Resolver::previsit( WithStmt * withStmt ) {
    582588                for ( Expression *& expr : withStmt->exprs )  {
    583                         TypeEnvironment env;
    584                         AlternativeFinder finder( indexer, env );
    585                         finder.findWithAdjustment( expr );
    586 
    587589                        // only struct- and union-typed expressions are viable candidates
    588                         AltList candidates;
    589                         for ( Alternative & alt : finder.get_alternatives() ) {
    590                                 if ( isStructOrUnion( alt.expr->result ) ) {
    591                                         candidates.push_back( std::move( alt ) );
    592                                 }
    593                         }
    594 
    595                         // choose the lowest cost expression among the candidates
    596                         AltList winners;
    597                         findMinCost( candidates.begin(), candidates.end(), back_inserter( winners ) );
    598                         if ( winners.size() == 0 ) {
    599                                 throw SemanticError( "No reasonable alternatives for with statement expression: ", expr );
    600                         } else if ( winners.size() != 1 ) {
    601                                 std::ostringstream stream;
    602                                 stream << "Cannot choose between " << winners.size() << " alternatives for with statement expression\n";
    603                                 expr->print( stream );
    604                                 stream << "Alternatives are:\n";
    605                                 printAlts( winners, stream, 1 );
    606                                 throw SemanticError( stream.str() );
    607                         }
    608 
    609                         // there is one unambiguous interpretation - move the expression into the with statement
    610                         Alternative & alt = winners.front();
    611                         finishExpr( alt.expr, alt.env, expr->env );
    612                         delete expr;
    613                         expr = alt.expr;
    614                         alt.expr = nullptr;
     590                        findKindExpression( expr, indexer, "with statement", isStructOrUnion );
    615591
    616592                        // if with expression might be impure, create a temporary so that it is evaluated once
  • src/ResolvExpr/Resolver.h

    r326cd2b r9cdfb4d0  
    3333        void findVoidExpression( Expression *& untyped, const SymTab::Indexer &indexer );
    3434        void findSingleExpression( Expression *& untyped, const SymTab::Indexer &indexer );
     35        void findSingleExpression( Expression *& untyped, Type * type, const SymTab::Indexer &indexer );
    3536        void resolveCtorInit( ConstructorInit * ctorInit, const SymTab::Indexer & indexer );
    3637        void resolveStmtExpr( StmtExpr * stmtExpr, const SymTab::Indexer & indexer );
  • src/SymTab/Indexer.cc

    r326cd2b r9cdfb4d0  
    572572        }
    573573
     574        void Indexer::addMembers( AggregateDecl * aggr, Expression * expr ) {
     575                for ( Declaration * decl : aggr->members ) {
     576                        if ( DeclarationWithType * dwt = dynamic_cast< DeclarationWithType * >( decl ) ) {
     577                                addId( dwt, expr );
     578                                if ( dwt->name == "" ) {
     579                                        Type * t = dwt->get_type()->stripReferences();
     580                                        if ( dynamic_cast< StructInstType * >( t ) || dynamic_cast< UnionInstType * >( t ) ) {
     581                                                Expression * base = expr->clone();
     582                                                ResolvExpr::referenceToRvalueConversion( base );
     583                                                addMembers( t->getAggr(), new MemberExpr( dwt, base ) );
     584                                        }
     585                                }
     586                        }
     587                }
     588        }
     589
    574590        void Indexer::addWith( WithStmt * stmt ) {
    575591                for ( Expression * expr : stmt->exprs ) {
     
    578594                                assertf( aggr, "WithStmt expr has non-aggregate type: %s", toString( expr->result ).c_str() );
    579595
    580                                 for ( Declaration * decl : aggr->members ) {
    581                                         if ( DeclarationWithType * dwt = dynamic_cast< DeclarationWithType * >( decl ) ) {
    582                                                 addId( dwt, expr );
    583                                         }
    584                                 }
     596                                addMembers( aggr, expr );
    585597                        }
    586598                }
  • src/SymTab/Indexer.h

    r326cd2b r9cdfb4d0  
    8686                void addWith( WithStmt * );
    8787
     88                /// adds all of the members of the Aggregate (addWith helper)
     89                void addMembers( AggregateDecl * aggr, Expression * expr );
     90
    8891                /// convenience function for adding a list of Ids to the indexer
    8992                void addIds( const std::list< DeclarationWithType * > & decls );
  • src/SymTab/Validate.cc

    r326cd2b r9cdfb4d0  
    9494                template< typename AggDecl > void handleAggregate( AggDecl *aggregateDecl );
    9595
    96                 bool inStruct = false;
     96                AggregateDecl * parentAggr = nullptr;
    9797        };
    9898
     
    303303        template< typename AggDecl >
    304304        void HoistStruct::handleAggregate( AggDecl *aggregateDecl ) {
    305                 if ( inStruct ) {
     305                if ( parentAggr ) {
    306306                        // Add elements in stack order corresponding to nesting structure.
    307307                        declsToAddBefore.push_front( aggregateDecl );
    308308                } else {
    309                         GuardValue( inStruct );
    310                         inStruct = true;
     309                        GuardValue( parentAggr );
     310                        parentAggr = aggregateDecl;
    311311                } // if
    312312                // Always remove the hoisted aggregate from the inner structure.
     
    754754                                throw SemanticError( "Cannot redefine typedef: " + tyDecl->name );
    755755                        }
    756                         // cannot redefine VLA typedefs
     756                        // Cannot redefine VLA typedefs. Note: this is slightly incorrect, because our notion of VLAs
     757                        // at this point in the translator is imprecise. In particular, this will disallow redefining typedefs
     758                        // with arrays whose dimension is an enumerator or a cast of a constant/enumerator. The effort required
     759                        // to fix this corner case likely outweighs the utility of allowing it.
    757760                        if ( isVariableLength( t1 ) || isVariableLength( t2 ) ) {
    758761                                throw SemanticError( "Cannot redefine typedef: " + tyDecl->name );
  • src/SynTree/AggregateDecl.cc

    r326cd2b r9cdfb4d0  
    8686std::string TraitDecl::typeString() const { return "trait"; }
    8787
     88namespace {
     89        long long int getConstValue( Expression * expr ) {
     90                if ( CastExpr * castExpr = dynamic_cast< CastExpr * > ( expr ) ) {
     91                        return getConstValue( castExpr->arg );
     92                } else if ( ConstantExpr * constExpr = dynamic_cast< ConstantExpr * >( expr ) ) {
     93                        return constExpr->intValue();
     94                // can be -1, +1, etc.
     95                // } else if ( UntypedExpr * untypedExpr = dynamic_cast< UntypedExpr * >( expr ) ) {
     96                //      if ( untypedExpr-> )
     97                } else {
     98                        assertf( false, "Unhandled expression type in getConstValue for enumerators: %s", toString( expr ).c_str() );
     99                }
     100        }
     101}
     102
     103bool EnumDecl::valueOf( Declaration * enumerator, long long int & value ) {
     104        if ( enumValues.empty() ) {
     105                long long int currentValue = 0;
     106                for ( Declaration * member : members ) {
     107                        ObjectDecl * field = strict_dynamic_cast< ObjectDecl * >( member );
     108                        if ( field->init ) {
     109                                SingleInit * init = strict_dynamic_cast< SingleInit * >( field->init );
     110                                currentValue = getConstValue( init->value );
     111                        }
     112                        assertf( enumValues.count( field->name ) == 0, "Enum %s has multiple members with the name %s", name.c_str(), field->name.c_str() );
     113                        enumValues[ field->name ] = currentValue;
     114                        ++currentValue;
     115                }
     116        }
     117        if ( enumValues.count( enumerator->name ) ) {
     118                value = enumValues[ enumerator->name ];
     119                return true;
     120        }
     121        return false;
     122}
     123
    88124// Local Variables: //
    89125// tab-width: 4 //
  • src/SynTree/Declaration.h

    r326cd2b r9cdfb4d0  
    319319        EnumDecl( const EnumDecl &other ) : Parent( other ) {}
    320320
     321        bool valueOf( Declaration * enumerator, long long int & value );
     322
    321323        virtual EnumDecl *clone() const override { return new EnumDecl( *this ); }
    322324        virtual void accept( Visitor &v ) override { v.visit( this ); }
    323325        virtual Declaration *acceptMutator( Mutator &m )  override { return m.mutate( this ); }
    324326  private:
     327        std::map< std::string, long long int > enumValues;
    325328        virtual std::string typeString() const override;
    326329};
  • src/SynTree/Expression.cc

    r326cd2b r9cdfb4d0  
    8383}
    8484
     85long long int ConstantExpr::intValue() const {
     86        if ( BasicType * basicType = dynamic_cast< BasicType * >( result ) ) {
     87                if ( basicType->isInteger() ) {
     88                        return get_constant()->get_ival();
     89                }
     90        } else if ( dynamic_cast< OneType * >( result ) ) {
     91                return 1;
     92        } else if ( dynamic_cast< ZeroType * >( result ) ) {
     93                return 0;
     94        }
     95        throw SemanticError( "Constant expression of non-integral type ", this );
     96}
     97
    8598VariableExpr::VariableExpr( DeclarationWithType *_var ) : Expression(), var( _var ) {
    8699        assert( var );
     
    95108        //      assert( inst->baseEnum );
    96109        //      EnumDecl * decl = inst->baseEnum;
    97         //      for ( Declaration * member : decl->members ) {
    98         //              if ( member == _var ) {
    99         //                      type->set_lvalue( false );
    100         //              }
     110        //      long long int value;
     111        //      if ( decl->valueOf( var, value ) ) {
     112        //              type->set_lvalue( false );
    101113        //      }
    102114        // }
     
    403415                } else {
    404416                        // references have been removed, in which case dereference returns an lvalue of the base type.
    405                         ret->get_result()->set_lvalue( true );
     417                        ret->result->set_lvalue( true );
    406418                }
    407419        }
     
    589601        if ( ! body.empty() ) {
    590602                if ( ExprStmt * exprStmt = dynamic_cast< ExprStmt * >( body.back() ) ) {
    591                         set_result( maybeClone( exprStmt->get_expr()->get_result() ) );
     603                        result = maybeClone( exprStmt->expr->result );
    592604                }
    593605        }
    594606        // ensure that StmtExpr has a result type
    595607        if ( ! result ) {
    596                 set_result( new VoidType( Type::Qualifiers() ) );
     608                result = new VoidType( Type::Qualifiers() );
    597609        }
    598610}
  • src/SynTree/Expression.h

    r326cd2b r9cdfb4d0  
    295295
    296296        Constant * get_constant() { return & constant; }
     297        const Constant * get_constant() const { return & constant; }
    297298        void set_constant( const Constant & newValue ) { constant = newValue; }
     299
     300        long long int intValue() const;
    298301
    299302        virtual ConstantExpr * clone() const { return new ConstantExpr( * this ); }
  • src/SynTree/Label.h

    r326cd2b r9cdfb4d0  
    3333        std::list< Attribute * >& get_attributes() { return attributes; }
    3434
    35         operator std::string() { return name; }
     35        operator std::string() const { return name; }
    3636        bool empty() { return name.empty(); }
    3737  private:
  • src/SynTree/Type.h

    r326cd2b r9cdfb4d0  
    356356        bool isTtype() const;
    357357
     358        bool isUnprototyped() const { return isVarArgs && parameters.size() == 0; }
     359
    358360        virtual FunctionType *clone() const override { return new FunctionType( *this ); }
    359361        virtual void accept( Visitor & v ) override { v.visit( this ); }
  • src/SynTree/TypeSubstitution.cc

    r326cd2b r9cdfb4d0  
    107107
    108108void TypeSubstitution::normalize() {
     109        PassVisitor<Substituter> sub( *this, true );
    109110        do {
    110                 subCount = 0;
    111                 freeOnly = true;
     111                sub.pass.subCount = 0;
     112                sub.pass.freeOnly = true;
    112113                for ( TypeEnvType::iterator i = typeEnv.begin(); i != typeEnv.end(); ++i ) {
    113                         i->second = i->second->acceptMutator( *this );
     114                        i->second = i->second->acceptMutator( sub );
    114115                }
    115         } while ( subCount );
    116 }
    117 
    118 Type * TypeSubstitution::mutate( TypeInstType *inst ) {
    119         BoundVarsType::const_iterator bound = boundVars.find( inst->get_name() );
     116        } while ( sub.pass.subCount );
     117}
     118
     119Type * TypeSubstitution::Substituter::postmutate( TypeInstType *inst ) {
     120        BoundVarsType::const_iterator bound = boundVars.find( inst->name );
    120121        if ( bound != boundVars.end() ) return inst;
    121122
    122         TypeEnvType::const_iterator i = typeEnv.find( inst->get_name() );
    123         if ( i == typeEnv.end() ) {
     123        TypeEnvType::const_iterator i = sub.typeEnv.find( inst->get_name() );
     124        if ( i == sub.typeEnv.end() ) {
    124125                return inst;
    125126        } else {
    126 ///         std::cout << "found " << inst->get_name() << ", replacing with ";
    127 ///         i->second->print( std::cout );
    128 ///         std::cout << std::endl;
     127///         std::cerr << "found " << inst->get_name() << ", replacing with ";
     128///         i->second->print( std::cerr );
     129///         std::cerr << std::endl;
    129130                subCount++;
    130                 Type *newtype = i->second->clone();
     131                Type * newtype = i->second->clone();
    131132                newtype->get_qualifiers() |= inst->get_qualifiers();
    132133                delete inst;
     
    135136}
    136137
    137 Expression * TypeSubstitution::mutate( NameExpr *nameExpr ) {
    138         VarEnvType::const_iterator i = varEnv.find( nameExpr->get_name() );
    139         if ( i == varEnv.end() ) {
     138Expression * TypeSubstitution::Substituter::postmutate( NameExpr * nameExpr ) {
     139        VarEnvType::const_iterator i = sub.varEnv.find( nameExpr->name );
     140        if ( i == sub.varEnv.end() ) {
    140141                return nameExpr;
    141142        } else {
     
    146147}
    147148
    148 template< typename TypeClass >
    149 Type *TypeSubstitution::handleType( TypeClass *type ) {
    150         ValueGuard<BoundVarsType> oldBoundVars( boundVars );
     149void TypeSubstitution::Substituter::premutate( Type * type ) {
     150        GuardValue( boundVars );
    151151        // bind type variables from forall-qualifiers
    152152        if ( freeOnly ) {
    153                 for ( Type::ForallList::const_iterator tyvar = type->get_forall().begin(); tyvar != type->get_forall().end(); ++tyvar ) {
    154                         boundVars.insert( (*tyvar )->get_name() );
     153                for ( Type::ForallList::const_iterator tyvar = type->forall.begin(); tyvar != type->forall.end(); ++tyvar ) {
     154                        boundVars.insert( (*tyvar)->name );
    155155                } // for
    156156        } // if
    157         Type *ret = Mutator::mutate( type );
    158         return ret;
    159157}
    160158
    161159template< typename TypeClass >
    162 Type *TypeSubstitution::handleAggregateType( TypeClass *type ) {
    163         ValueGuard<BoundVarsType> oldBoundVars( boundVars );
     160void TypeSubstitution::Substituter::handleAggregateType( TypeClass * type ) {
     161        GuardValue( boundVars );
    164162        // bind type variables from forall-qualifiers
    165163        if ( freeOnly ) {
    166                 for ( Type::ForallList::const_iterator tyvar = type->get_forall().begin(); tyvar != type->get_forall().end(); ++tyvar ) {
    167                         boundVars.insert( (*tyvar )->get_name() );
     164                for ( Type::ForallList::const_iterator tyvar = type->forall.begin(); tyvar != type->forall.end(); ++tyvar ) {
     165                        boundVars.insert( (*tyvar)->name );
    168166                } // for
    169167                // bind type variables from generic type instantiations
    170168                std::list< TypeDecl* > *baseParameters = type->get_baseParameters();
    171                 if ( baseParameters && ! type->get_parameters().empty() ) {
     169                if ( baseParameters && ! type->parameters.empty() ) {
    172170                        for ( std::list< TypeDecl* >::const_iterator tyvar = baseParameters->begin(); tyvar != baseParameters->end(); ++tyvar ) {
    173                                 boundVars.insert( (*tyvar)->get_name() );
     171                                boundVars.insert( (*tyvar)->name );
    174172                        } // for
    175173                } // if
    176174        } // if
    177         Type *ret = Mutator::mutate( type );
    178         return ret;
    179 }
    180 
    181 Type * TypeSubstitution::mutate( VoidType *voidType ) {
    182         return handleType( voidType );
    183 }
    184 
    185 Type * TypeSubstitution::mutate( BasicType *basicType ) {
    186         return handleType( basicType );
    187 }
    188 
    189 Type * TypeSubstitution::mutate( PointerType *pointerType ) {
    190         return handleType( pointerType );
    191 }
    192 
    193 Type * TypeSubstitution::mutate( ArrayType *arrayType ) {
    194         return handleType( arrayType );
    195 }
    196 
    197 Type * TypeSubstitution::mutate( FunctionType *functionType ) {
    198         return handleType( functionType );
    199 }
    200 
    201 Type * TypeSubstitution::mutate( StructInstType *aggregateUseType ) {
    202         return handleAggregateType( aggregateUseType );
    203 }
    204 
    205 Type * TypeSubstitution::mutate( UnionInstType *aggregateUseType ) {
    206         return handleAggregateType( aggregateUseType );
    207 }
    208 
    209 Type * TypeSubstitution::mutate( EnumInstType *aggregateUseType ) {
    210         return handleType( aggregateUseType );
    211 }
    212 
    213 Type * TypeSubstitution::mutate( TraitInstType *aggregateUseType ) {
    214         return handleType( aggregateUseType );
    215 }
    216 
    217 Type * TypeSubstitution::mutate( TupleType *tupleType ) {
    218         return handleType( tupleType );
    219 }
    220 
    221 Type * TypeSubstitution::mutate( VarArgsType *varArgsType ) {
    222         return handleType( varArgsType );
    223 }
    224 
    225 Type * TypeSubstitution::mutate( ZeroType *zeroType ) {
    226         return handleType( zeroType );
    227 }
    228 
    229 Type * TypeSubstitution::mutate( OneType *oneType ) {
    230         return handleType( oneType );
     175}
     176
     177void TypeSubstitution::Substituter::premutate( StructInstType * aggregateUseType ) {
     178        handleAggregateType( aggregateUseType );
     179}
     180
     181void TypeSubstitution::Substituter::premutate( UnionInstType *aggregateUseType ) {
     182        handleAggregateType( aggregateUseType );
    231183}
    232184
  • src/SynTree/TypeSubstitution.h

    r326cd2b r9cdfb4d0  
    2727#include "SynTree/Declaration.h"   // for TypeDecl, Declaration (ptr only)
    2828#include "SynTree/Expression.h"    // for Expression (ptr only), NameExpr (p...
    29 #include "SynTree/Mutator.h"       // for Mutator
    3029#include "SynTree/Type.h"          // for Type, ArrayType (ptr only), BasicT...
    3130
    32 class TypeSubstitution : public Mutator {
    33         typedef Mutator Parent;
     31class TypeSubstitution {
    3432  public:
    3533        TypeSubstitution();
     
    6462        TypeSubstitution *clone() const { return new TypeSubstitution( *this ); }
    6563  private:
    66         virtual Type* mutate(TypeInstType *aggregateUseType);
    67         virtual Expression* mutate(NameExpr *nameExpr);
    6864
    69         /// Records type variable bindings from forall-statements
    70         template< typename TypeClass > Type *handleType( TypeClass *type );
    71         /// Records type variable bindings from forall-statements and instantiations of generic types
    72         template< typename TypeClass > Type *handleAggregateType( TypeClass *type );
    73 
    74         virtual Type* mutate(VoidType *basicType);
    75         virtual Type* mutate(BasicType *basicType);
    76         virtual Type* mutate(PointerType *pointerType);
    77         virtual Type* mutate(ArrayType *arrayType);
    78         virtual Type* mutate(FunctionType *functionType);
    79         virtual Type* mutate(StructInstType *aggregateUseType);