Changeset b6838214


Ignore:
Timestamp:
Jan 23, 2018, 5:46:43 PM (4 years ago)
Author:
Alan Kennedy <afakenne@…>
Branches:
aaron-thesis, arm-eh, cleanup-dtors, deferred_resn, demangler, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, resolv-new, with_gc
Children:
258e6ad5
Parents:
b158d8f (diff), 15d248e (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

add context switch for ARM

Files:
8 added
13 deleted
113 edited
26 moved

Legend:

Unmodified
Added
Removed
  • doc/proposals/concurrency/.gitignore

    rb158d8f rb6838214  
    2424build/*.toc
    2525*.pdf
     26*.png
     27figures/*.tex
    2628
    2729examples
  • doc/proposals/concurrency/Makefile

    rb158d8f rb6838214  
    11## Define the appropriate configuration variables.
    22
    3 TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:
     3TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:/usr/local/bibliographies:
    44LaTeX  = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=build -interaction=nonstopmode
    55BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex -terse
     
    7575        build/*.tex     \
    7676        build/*.toc     \
     77        build/*.lof     \
     78        build/*.lol     \
     79        build/*.lot     \
     80        figures/*.tex   \
     81        *.png           \
    7782
    7883
     
    117122        fig2dev -L pstex_t -p $@ $< > $@_t
    118123
     124figures/%.tex: build/%.pstex
     125        echo -n         "\documentclass[preview]{standalone}\n"         \
     126                        "\usepackage[T1]{fontenc}\n"                    \
     127                        "\usepackage[usenames]{color}\n"                \
     128                        "\usepackage{graphicx}\n"                       \
     129                        "\usepackage{listings}\n"                       \
     130                        "\usepackage{xspace}\n"                         \
     131                        "\input{style}\n"                               \
     132                        "\\\\begin{document}\n"                         \
     133                        "{\\\\resizebox{3\\\\textwidth}{!}{\input{${basename ${notdir $@}}.pstex_t}}}\n" \
     134                        "\end{document}" > $@
     135
     136%.png : build/%.pstex figures/%.tex
     137        echo ${basename $@}
     138        ${LaTeX} figures/${basename $@}.tex
     139        dvips build/${basename $@}.dvi -o build/${basename $@}.ps
     140        ps2pdf build/${basename $@}.ps
     141        convert -negate ${basename $@}.pdf $@
     142
     143
    119144
    120145# Local Variables: #
  • doc/proposals/concurrency/annex/glossary.tex

    rb158d8f rb6838214  
    9898\newacronym{tls}{TLS}{Thread Local Storage}
    9999\newacronym{api}{API}{Application Program Interface}
    100 \newacronym{raii}{RAII}{Ressource Acquisition Is Initialization}
     100\newacronym{raii}{RAII}{Resource Acquisition Is Initialization}
    101101\newacronym{numa}{NUMA}{Non-Uniform Memory Access}
  • doc/proposals/concurrency/annex/local.bib

    rb158d8f rb6838214  
    3636
    3737@article{TBB,
     38        key     = {TBB},
    3839        keywords        = {Intel, TBB},
    3940        title   = {Intel Thread Building Blocks},
     
    4243
    4344@manual{www-cfa,
     45        key     = {CFA},
    4446        keywords        = {Cforall},
    45         title   = {Cforall Programmming Language},
    46         address = {https://plg.uwaterloo.ca/~cforall/}
     47        author  = {C$\forall$},
     48        title   = {C$\forall$ Programmming Language},
     49        note    = {\url{https://plg.uwaterloo.ca/~cforall}},
    4750}
    4851
    49 @article{rob-thesis,
     52@mastersthesis{rob-thesis,
    5053        keywords        = {Constructors, Destructors, Tuples},
    5154        author  = {Rob Schluntz},
    5255        title   = {Resource Management and Tuples in Cforall},
    53         year            = 2017
     56        year            = 2017,
     57        school  = {University of Waterloo},
     58        note    = {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}},
    5459}
    5560
     
    6469
    6570@article{BankTransfer,
     71        key     = {Bank Transfer},
    6672        keywords        = {Bank Transfer},
    6773        title   = {Bank Account Transfer Problem},
     
    8995}
    9096
    91 @book{Herlihy93,
    92         title={Transactional memory: Architectural support for lock-free data structures},
    93         author={Herlihy, Maurice and Moss, J Eliot B},
    94         volume={21},
    95         number={2},
    96         year={1993},
    97         publisher={ACM}
     97@article{Herlihy93,
     98        author  = {Herlihy, Maurice and Moss, J. Eliot B.},
     99        title   = {Transactional memory: architectural support for lock-free data structures},
     100        journal = {SIGARCH Comput. Archit. News},
     101        issue_date      = {May 1993},
     102        volume  = {21},
     103        number  = {2},
     104        month   = may,
     105        year    = {1993},
     106        pages   = {289--300},
     107        numpages        = {12},
     108        publisher       = {ACM},
     109        address = {New York, NY, USA},
    98110}
    99111
    100112@manual{affinityLinux,
     113        key     = {TBB},
    101114        title           = "{Linux man page - sched\_setaffinity(2)}"
    102115}
     
    122135}
    123136
    124 
    125137@misc{NodeJs,
    126138        title           = "{Node.js}",
  • doc/proposals/concurrency/figures/system.fig

    rb158d8f rb6838214  
    93934 0 0 50 -1 0 11 0.0000 2 120 570 4125 7126 thread 4\001
    9494-6
     956 6975 4050 9525 7875
     962 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
     97         7125 5400 7575 5400 7575 5850 7125 5850 7125 5400
     982 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
     99         7125 4200 7575 4200 7575 4650 7125 4650 7125 4200
     1002 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
     101         7125 4800 7575 4800 7575 5250 7125 5250 7125 4800
     1022 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
     103         6975 4050 9525 4050 9525 7875 6975 7875 6975 4050
     1043 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
     105         7350 6900 7500 6975 7200 7050 7500 7125 7200 7200 7500 7275
     106         7200 7350 7500 7425 7200 7500 7350 7575
     107         0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
     108         -1.000 0.000
     1093 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
     110         7350 6000 7500 6075 7200 6150 7500 6225 7200 6300 7500 6375
     111         7200 6450 7500 6525 7200 6600 7350 6675
     112         0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
     113         -1.000 0.000
     1144 0 0 50 -1 0 11 0.0000 2 120 945 7725 4500 Pthread stack\001
     1154 0 0 50 -1 0 11 0.0000 2 150 1530 7725 5100 Pthread stack (stolen)\001
     1164 0 0 50 -1 0 11 0.0000 2 120 540 7725 6375 Pthread\001
     1174 0 0 50 -1 0 11 0.0000 2 150 1065 7725 7275 $\\CFA$ thread\001
     1184 0 0 50 -1 0 11 0.0000 2 150 990 7725 5700 $\\CFA$ stack\001
     119-6
    951201 2 0 1 0 7 50 -1 -1 0.000 1 3.1416 3150 5250 750 450 2400 4800 3900 5700
    961212 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
     
    1001252 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
    101126         5550 3900 3825 5025
    102 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
    103          7125 3525 7575 3525 7575 3975 7125 3975 7125 3525
    104 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5
    105          6975 2175 9525 2175 9525 6000 6975 6000 6975 2175
    1061272 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2
    107128         900 6225 2400 5400
     
    1221432 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
    123144         5100 975 5850 975 5850 2625 5100 2625 5100 975
    124 2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
    125          7125 2325 7575 2325 7575 2775 7125 2775 7125 2325
    126 2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
    127          7125 2925 7575 2925 7575 3375 7125 3375 7125 2925
    1281452 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5
    129146         525 7425 1275 7425 1275 9075 525 9075 525 7425
     
    1431602 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5
    144161         2025 2625 2775 2625 2775 975 2025 975 2025 2625
    145 3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10
    146          7350 5025 7500 5100 7200 5175 7500 5250 7200 5325 7500 5400
    147          7200 5475 7500 5550 7200 5625 7350 5700
    148          0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
    149          -1.000 0.000
    150 3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10
    151          7350 4125 7500 4200 7200 4275 7500 4350 7200 4425 7500 4500
    152          7200 4575 7500 4650 7200 4725 7350 4800
    153          0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
    154          -1.000 0.000
    1551624 0 0 50 -1 0 18 0.0000 2 30 225 4500 3150 ...\001
    1561634 0 0 50 -1 0 18 0.0000 2 30 225 3750 4500 ...\001
    1571644 0 0 50 -1 0 11 0.0000 2 120 705 2775 5325 Scheduler\001
    158 4 0 0 50 -1 0 11 0.0000 2 120 945 7725 2625 Pthread stack\001
    159 4 0 0 50 -1 0 11 0.0000 2 150 1530 7725 3225 Pthread stack (stolen)\001
    160 4 0 0 50 -1 0 11 0.0000 2 120 540 7725 4500 Pthread\001
    161 4 0 0 50 -1 0 11 0.0000 2 150 1065 7725 5400 $\\CFA$ thread\001
    1621654 0 0 50 -1 0 18 0.0000 2 30 225 4950 6600 ...\001
    1631664 0 0 50 -1 0 18 0.0000 2 30 225 4200 5850 ...\001
    164 4 0 0 50 -1 0 11 0.0000 2 150 990 7725 3825 $\\CFA$ stack\001
  • doc/proposals/concurrency/text/basics.tex

    rb158d8f rb6838214  
    99At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution.
    1010
    11 Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
    12 
    13 Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller?s stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (aka non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
     11Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread's perspective) across the stacks is called concurrency.
     12
     13Therefore, a minimal concurrency system can be achieved by creating coroutines (see Section \ref{coroutine}), which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller's stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a., non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
    1414
    1515A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context switches occur. Mutual exclusion and synchronization are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desirable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
    1616
    1717\section{\protect\CFA's Thread Building Blocks}
    18 One of the important features that are missing in C is threading. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
     18One of the important features that are missing in C is threading\footnote{While the C11 standard defines a ``threads.h'' header, it is minimal and defined as optional. As such, library support for threading is far from widespread. At the time of writing the thesis, neither \texttt{gcc} nor \texttt{clang} support ``threads.h'' in their respective standard libraries.}. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
    1919
    2020\section{Coroutines: A Stepping Stone}\label{coroutine}
    21 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context switches and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.
     21While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. \textbf{Coroutine}s are generalized routines which have predefined points where execution is suspended and can be resumed at a later time. Therefore, they need to deal with context switches and other context-management operations. This proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.
    2222
    2323\begin{table}
     
    129129\end{tabular}
    130130\end{center}
    131 \caption{Different implementations of a Fibonacci sequence generator in C.},
     131\caption{Different implementations of a Fibonacci sequence generator in C.}
    132132\label{lst:fibonacci-c}
    133133\end{table}
    134134
    135 A good example of a problem made easier with coroutines is generators, like the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Table \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
     135A good example of a problem made easier with coroutines is generators, e.g., generating the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Listing \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
    136136
    137137Listing \ref{lst:fibonacci-cfa} is an example of a solution to the Fibonacci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the next generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the implementation is very similar to the \code{fibonacci_func} example.
     
    233233One important design challenge for implementing coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
    234234
    235 The runtime system needs to create the coroutine?s stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expect to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.
     235The runtime system needs to create the coroutine's stack and, more importantly, prepare it for the first resumption. The timing of the creation is non-trivial since users expect both to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.
    236236
    237237Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when cast to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
     
    268268}
    269269\end{ccode}
    270 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes Undefined Behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.
     270The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.
    271271
    272272\subsection{Alternative: Composition}
     
    310310symmetric_coroutine<>::yield_type
    311311\end{cfacode}
    312 Often, the canonical threading paradigm in languages is based on function pointers, pthread being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
    313 
    314 A variation of this would be to use a simple function pointer in the same way pthread does for threads :
     312Often, the canonical threading paradigm in languages is based on function pointers, \texttt{pthread} being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
     313
     314A variation of this would be to use a simple function pointer in the same way \texttt{pthread} does for threads:
    315315\begin{cfacode}
    316316void foo( coroutine_t cid, void* arg ) {
     
    329329\subsection{Alternative: Trait-Based Coroutines}
    330330
    331 Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} and is used as a coroutine.
     331Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} (as defined below) and is used as a coroutine.
    332332
    333333\begin{cfacode}
     
    340340forall( dtype T | is_coroutine(T) ) void resume (T&);
    341341\end{cfacode}
    342 This ensures an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} only has the effect of implementing the getter and forward declarations required for users to implement the main routine.
     342This ensures that an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} simply has the effect of implementing the getter and forward declarations required for users to implement the main routine.
    343343
    344344\begin{center}
     
    385385\end{cfacode}
    386386
    387 Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics using overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
     387Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as
    388388\begin{cfacode}
    389389thread foo {};
     
    425425A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
    426426
    427 Of course for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
     427Of course, for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
    428428\begin{cfacode}
    429429thread World;
     
    466466\end{cfacode}
    467467
    468 However, one of the drawbacks of this approach is that threads always form a lattice, i.e., they are always destroyed in the opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
     468However, one of the drawbacks of this approach is that threads always form a tree where nodes must always outlive their children, i.e., they are always destroyed in the opposite order of construction because of C scoping rules. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
    469469
    470470\begin{cfacode}
  • doc/proposals/concurrency/text/cforall.tex

    rb158d8f rb6838214  
    5252% ======================================================================
    5353\section{Operators}
    54 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation occur, e.g.:
     54Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation appear, e.g.:
    5555\begin{cfacode}
    5656int ++? (int op);                       //unary prefix increment
     
    7272% ======================================================================
    7373\section{Constructors/Destructors}
    74 Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors :
     74Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors:
    7575\begin{cfacode}
    7676struct S {
     
    107107% ======================================================================
    108108\section{Parametric Polymorphism}
    109 Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clause, which gives \CFA its name. \code{forall} clauses allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition :
     109\label{s:ParametricPolymorphism}
     110Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clauses, which allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition:
    110111\begin{cfacode}
    111112//constraint type, 0 and +
     
    124125Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints:
    125126\begin{cfacode}
    126 trait sumable( otype T ) {
     127trait summable( otype T ) {
    127128        void ?{}(T *, zero_t);          //constructor from 0 literal
    128129        T ?+?(T, T);                            //assortment of additions
     
    131132        T ?++(T *);
    132133};
    133 forall( otype T | sumable(T) )  //use trait
     134forall( otype T | summable(T) ) //use trait
    134135T sum(T a[], size_t size);
    135136\end{cfacode}
     
    139140% ======================================================================
    140141\section{with Clause/Statement}
    141 Since \CFA lacks the concept of a receiver, certain functions end-up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
     142Since \CFA lacks the concept of a receiver, certain functions end up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
    142143\begin{cfacode}
    143144struct S { int i, j; };
  • doc/proposals/concurrency/text/concurrency.tex

    rb158d8f rb6838214  
    44% ======================================================================
    55% ======================================================================
    6 Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared-state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
     6Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared state, some languages and libraries simply disallow mutable shared state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
    77
    88Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
     
    1616
    1717\subsection{Mutual-Exclusion}
    18 As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to  higher-level mutual-exclusion methods, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g.: reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.
     18As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to  higher-level concurrency techniques, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.
    1919
    2020\subsection{Synchronization}
    21 As for mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete again to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.
     21As with mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding either better coupling between synchronization and data (e.g., message passing) or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called \textbf{barging}. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.
    2222
    2323% ======================================================================
     
    2626% ======================================================================
    2727% ======================================================================
    28 A monitor is a set of routines that ensure mutual exclusion when accessing shared state. This concept is generally associated with Object-Oriented Languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it :
     28A \textbf{monitor} is a set of routines that ensure mutual-exclusion when accessing shared state. More precisely, a monitor is a programming technique that associates mutual-exclusion to routine scopes, as opposed to mutex locks, where mutual-exclusion is defined by lock/release calls independently of any scoping of the calling routine. This strong association eases readability and maintainability, at the cost of flexibility. Note that both monitors and mutex locks, require an abstract handle to identify them. This concept is generally associated with object-oriented languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it:
    2929\begin{cfacode}
    3030typedef /*some monitor type*/ monitor;
    31 int f(monitor& m);
     31int f(monitor & m);
    3232
    3333int main() {
     
    4242% ======================================================================
    4343% ======================================================================
    44 The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are implicitly non-copy-able objects (\code{dtype}).
    45 
    46 Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Pass through can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter :
     44The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are non-copy-able objects (\code{dtype}).
     45
     46Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Passthrough can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter:
    4747
    4848\begin{cfacode}
     
    7171\end{tabular}
    7272\end{center}
    73 Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to \CC \code{atomic} template.
     73Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template \code{std::atomic}.
    7474
    7575Here, the constructor (\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. Furthermore, it allows the implementation greater freedom when it initializes the monitor locking. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
     
    9393\end{figure}
    9494
    95 Having both \code{mutex} and \code{nomutex} keywords is redundant based on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
     95Having both \code{mutex} and \code{nomutex} keywords can be redundant, depending on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
    9696
    9797The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations:
    9898\begin{cfacode}
    99 int f1(monitor& mutex m);
     99int f1(monitor & mutex m);
    100100int f2(const monitor & mutex m);
    101 int f3(monitor** mutex m);
    102 int f4(monitor* mutex m []);
    103 int f5(graph(monitor*) & mutex m);
     101int f3(monitor ** mutex m);
     102int f4(monitor * mutex m []);
     103int f5(graph(monitor *) & mutex m);
    104104\end{cfacode}
    105105The problem is to identify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to identify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then, making sure objects are only acquired once becomes none-trivial. This problem can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this project imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with at most one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed:
    106106\begin{cfacode}
    107 int f1(monitor& mutex m);   //Okay : recommended case
    108 int f2(monitor* mutex m);   //Okay : could be an array but probably not
     107int f1(monitor & mutex m);    //Okay : recommended case
     108int f2(monitor * mutex m);    //Not Okay : Could be an array
    109109int f3(monitor mutex m []);  //Not Okay : Array of unknown length
    110 int f4(monitor** mutex m);  //Not Okay : Could be an array
    111 int f5(monitor* mutex m []); //Not Okay : Array of unknown length
     110int f4(monitor ** mutex m);   //Not Okay : Could be an array
     111int f5(monitor * mutex m []); //Not Okay : Array of unknown length
    112112\end{cfacode}
    113113Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
    114114
    115 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to acquire mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
     115Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to specify the object that acquires mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
    116116\begin{cfacode}
    117117int f(MonitorA & mutex a, MonitorB & mutex b);
     
    137137The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
    138138
    139 However, such use leads to the lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore Undefined Behaviour. As shown~\cite{Lister77}, solving this problem requires:
     139However, such use leads to lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore undefined behaviour. As shown~\cite{Lister77}, solving this problem requires:
    140140\begin{enumerate}
    141         \item Dynamically tracking of the monitor-call order.
     141        \item Dynamically tracking the monitor-call order.
    142142        \item Implement rollback semantics.
    143143\end{enumerate}
     
    159159\subsection{\code{mutex} statement} \label{mutex-stmt}
    160160
    161 The call semantics discussed above have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
     161The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
    162162
    163163\begin{table}
     
    216216\end{cfacode}
    217217
    218 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is :
     218Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is:
    219219\begin{cfacode}
    220220trait is_monitor(dtype T) {
     
    223223};
    224224\end{cfacode}
    225 Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is Undefined Behaviour after the destructor has run.
     225Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is undefined behaviour after the destructor has run.
    226226
    227227% ======================================================================
     
    230230% ======================================================================
    231231% ======================================================================
    232 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
    233 
    234 First, here is a simple example of internal scheduling :
     232In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. With \textbf{scheduling} loosely defined as deciding which thread acquires the critical section next, \textbf{internal scheduling} means making the decision from inside the critical section (i.e., with access to the shared state), while \textbf{external scheduling} means making the decision when entering the critical section (i.e., without access to the shared state). Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
     233
     234First, here is a simple example of internal scheduling:
    235235
    236236\begin{cfacode}
     
    253253}
    254254\end{cfacode}
    255 There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semantic is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.
    256 
    257 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
     255There are two details to note here. First, \code{signal} is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section. This semantics is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.
     256
     257An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.
    258258
    259259% ======================================================================
     
    262262% ======================================================================
    263263% ======================================================================
    264 It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly names the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.
     264It is easy to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.
    265265
    266266\begin{multicols}{2}
     
    299299\noindent This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers a group of monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
    300300
    301 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem :
     301While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem:
    302302\begin{multicols}{2}
    303303\begin{pseudo}
     
    351351% ======================================================================
    352352
    353 A larger example is presented to show complex issues for \gls{bulk-acq} and all the implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex}-statement.
     353A larger example is presented to show complex issues for \gls{bulk-acq} and its implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex} statement.
    354354
    355355\begin{figure}[!t]
     
    446446\end{figure}
    447447
    448 The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options.
     448The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options:
    449449
    450450\subsubsection{Delaying Signals}
    451 The obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (i.e., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until it can be disbanded, which means the group is not passed further and a thread can retain its locks.
    452 
    453 However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal  becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen :
     451The obvious solution to the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed, because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (e.g., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until all its monitors are released, which means the group is not passed further and a thread can retain its locks.
     452
     453However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal  becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen:
    454454
    455455\paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor \code{A} needs to be passed to thread $\beta$ when thread $\alpha$ is done with it.
     
    459459Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line \ref{line:signal-a} before line \ref{line:signal-ab} and get the reverse effect for listing \ref{lst:dependency}.
    460460
    461 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to dispand a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
     461In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to release a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.
    462462
    463463\subsubsection{Dependency graphs}
     
    501501\end{figure}
    502502
    503 In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner close to polynomial. This complexity explosion can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
     503In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and has a super-linear complexity. This complexity can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.
    504504\begin{figure}
    505505\begin{multicols}{2}
     
    530530\end{figure}
    531531
    532 Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
     532Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependencies unfold. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.
    533533
    534534\subsubsection{Partial Signalling} \label{partial-sig}
    535 Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions, passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.
    536 
    537 Using partial signalling, listing \ref{lst:dependency} can be solved easily :
     535Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.
     536
     537Using partial signalling, listing \ref{lst:dependency} can be solved easily:
    538538\begin{itemize}
    539539        \item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor \code{B} to thread $\alpha$ and continues to hold monitor \code{A}.
     
    652652An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine.
    653653
    654 The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits, this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the frond end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.
     654The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.
    655655
    656656% ======================================================================
     
    659659% ======================================================================
    660660% ======================================================================
    661 An alternative to internal scheduling is external scheduling, e.g., in \uC.
    662 \begin{center}
     661An alternative to internal scheduling is external scheduling (see Table~\ref{tbl:sched}).
     662\begin{table}
    663663\begin{tabular}{|c|c|c|}
    664664Internal Scheduling & External Scheduling & Go\\
     
    720720\end{gocode}
    721721\end{tabular}
    722 \end{center}
    723 This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
     722\caption{Different forms of scheduling.}
     723\label{tbl:sched}
     724\end{table}
     725This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
    724726
    725727For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no other routine than \code{V} can acquire the monitor.
     
    761763\end{tabular}
    762764\end{center}
    763 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in the following figure:
    764 
    765 \begin{figure}[H]
    766 \begin{center}
    767 {\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    768 \end{center}
    769 \label{fig:monitor}
     765For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}.
     766
     767\begin{figure}
     768\centering
     769\subfloat[Classical Monitor] {
     770\label{fig:ClassicalMonitor}
     771{\resizebox{0.45\textwidth}{!}{\input{monitor}}}
     772}% subfloat
     773\qquad
     774\subfloat[\Gls{bulk-acq} Monitor] {
     775\label{fig:BulkMonitor}
     776{\resizebox{0.45\textwidth}{!}{\input{ext_monitor}}}
     777}% subfloat
     778\caption{External Scheduling Monitor}
    770779\end{figure}
    771780
    772 There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
    773 
    774 The alternative is to alter the implementation like this:
    775 \begin{center}
    776 {\resizebox{0.4\textwidth}{!}{\input{ext_monitor}}}
    777 \end{center}
     781There are other alternatives to these pictures, but in the case of the left picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects.
     782
     783The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}.
    778784Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate. Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the \code{waitfor} statement to check if a routine is already queued.
    779785
     
    793799\end{figure}
    794800
    795 Note that in the second picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.
     801Note that in the right picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.
    796802
    797803At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here, however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be  hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problem than writing locks that are as flexible as external scheduling in \CFA.
     
    838844\end{cfacode}
    839845
    840 Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is Undefined Behaviour.
    841 
    842 An important behaviour to note is when a set of monitors only match partially :
     846Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is undefined behaviour.
     847
     848An important behaviour to note is when a set of monitors only match partially:
    843849
    844850\begin{cfacode}
     
    862868}
    863869\end{cfacode}
    864 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake-up the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
     870While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wakeup the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
    865871
    866872% ======================================================================
     
    870876% ======================================================================
    871877
    872 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading but overloading is possible.
     878Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading, but overloading is possible.
    873879\begin{figure}
    874880\begin{cfacode}[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}]
     
    899905        waitfor(f4, a1);     //Incorrect : f4 ambiguous
    900906
    901         waitfor(f2, a1, b2); //Undefined Behaviour : b2 not mutex
     907        waitfor(f2, a1, b2); //Undefined behaviour : b2 not mutex
    902908}
    903909\end{cfacode}
    904910\end{figure}
    905911
    906 Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
     912Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2} demonstrates several complex masks and some incorrect ones.
    907913
    908914\begin{figure}
     
    972978% ======================================================================
    973979% ======================================================================
    974 An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
     980An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
    975981\begin{figure}
    976982\begin{cfacode}[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}]
  • doc/proposals/concurrency/text/frontpgs.tex

    rb158d8f rb6838214  
    3939        \vspace*{2.0cm}
    4040
    41         Waterloo, Ontario, Canada, 2017 \\
     41        Waterloo, Ontario, Canada, 2018 \\
    4242
    4343        \vspace*{1.0cm}
    4444
    45         \copyright\ Thierry Delisle 2017 \\
     45        \copyright\ Thierry Delisle 2018 \\
    4646        \end{center}
    4747\end{titlepage}
     
    154154% \newpage
    155155
     156% L I S T   O F   T A B L E S
     157% -----------------------------
     158\addcontentsline{toc}{chapter}{List of Acronyms}
     159\printglossary[type=\acronymtype,title={List of Acronyms}]
     160\cleardoublepage
     161\phantomsection         % allows hyperref to link to the correct page
     162
    156163% Change page numbering back to Arabic numerals
    157164\pagenumbering{arabic}
  • doc/proposals/concurrency/text/future.tex

    rb158d8f rb6838214  
    1111
    1212\subsection{Performance} \label{futur:perf}
    13 This thesis presents a first implementation of the \CFA runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.
     13This thesis presents a first implementation of the \CFA concurrency runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.
    1414
    1515\subsection{Flexible Scheduling} \label{futur:sched}
    1616An important part of concurrency is scheduling. Different scheduling algorithms can affect performance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted to the requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbitrary scheduling algorithms. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
    1717
    18 \subsection{Non-Blocking IO} \label{futur:nbio}
    19 While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, Non-Blocking IO is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
     18\subsection{Non-Blocking I/O} \label{futur:nbio}
     19While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, non-blocking I/O is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.
    2020
    2121\subsection{Other Concurrency Tools} \label{futur:tools}
  • doc/proposals/concurrency/text/internals.tex

    rb158d8f rb6838214  
    33There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This approach avoids the chicken and egg problem~\cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal means that memory management is a constant concern in the design of the system.
    44
    5 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while mute routines and blocking calls allow for an unbound amount, within the stack size.
     5The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while \code{mutex} routines and blocking calls allow for an unbound amount, within the stack size.
    66
    77Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed.
     
    1313% ======================================================================
    1414
    15 The first step towards the monitor implementation is simple mute routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is Undefined Behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
     15The first step towards the monitor implementation is simple \code{mutex} routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.
    1616\begin{figure}
    1717\begin{multicols}{2}
     
    3939\end{figure}
    4040
    41 \subsection{ Details: Interaction with polymorphism}
     41\subsection{Details: Interaction with polymorphism}
    4242Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues.
    4343
    44 First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options : \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:
     44First of all, interaction between \code{otype} polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options: \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:
    4545\begin{table}[H]
    4646\begin{center}
     
    109109\end{cfacode}
    110110
    111 Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e.: the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine multiple times, but there is only one entry point for many call sites.
     111Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e., the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine is called multiple times but there is only one entry point for many call sites.
    112112
    113113% ======================================================================
     
    127127\end{figure}
    128128
     129\subsection{Processors}
     130Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically \texttt{pthread}s in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
     131
     132\subsection{Stack Management}
     133One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all \texttt{pthread}s created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e., the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
     134
    129135\subsection{Context Switching}
    130 As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA but the changes required to add it are strictly additive.
    131 
    132 \subsection{Processors}
    133 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
    134 
    135 \subsection{Stack Management}
    136 One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all pthreads created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create there own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e. the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large.
     136As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA, but the changes required to add it are strictly additive.
    137137
    138138\subsection{Preemption} \label{preemption}
    139139Finally, an important aspect for any complete threading system is preemption. As mentioned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desirable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs a long loop can starve other threads, while in a preemptive system, starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload. However any preemptive system can become a cooperative system by making the time slices extremely large. Therefore, \CFA uses a preemptive threading system.
    140140
    141 Preemption in \CFA is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e. :
     141Preemption in \CFA\footnote{Note that the implementation of preemption is strongly tied with the underlying threading system. For this reason, only the Linux implementation is cover, \CFA does not run on Windows at the time of writting} is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e.:
    142142\begin{quote}
    143143A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
    144144SIGNAL(7) - Linux Programmer's Manual
    145145\end{quote}
    146 For the sake of simplicity and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one. Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.
    147 
    148 Involuntary context-switching is done by sending signal {\tt SIGUSER1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal-handler frame is eventually unwound when the thread is scheduled again. As a result, a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
     146For the sake of simplicity, and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one.
     147
     148Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread. Hence, involuntary context-switching is done by sending signal {\tt SIGUSR1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal handler frame is eventually unwound when the thread is scheduled again. As a result, a signal handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
    149149
    150150\subsection{Scheduler}
     
    156156% ======================================================================
    157157% ======================================================================
    158 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:monitor} for convenience) :
     158The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience):
    159159
    160160\begin{figure}[H]
     
    177177\end{figure}
    178178
    179 This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split the thread into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
     179This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.
    180180
    181181\begin{figure}[b]
     
    210210\end{figure}
    211211
    212 Some important things to notice about the exit routine. The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.
     212The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.
    213213
    214214\begin{figure}[H]
     
    220220\end{figure}
    221221
    222 Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive stacks for linking onto monitor. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.
     222Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.
    223223
    224224% ======================================================================
     
    227227% ======================================================================
    228228% ======================================================================
    229 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (e.g., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
    230 
    231 This algorithm choice has two consequences :
     229Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (i.e., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
     230
     231This algorithm choice has two consequences:
    232232\begin{itemize}
    233233        \item The queue of the monitor with the lowest address is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same lowest address monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
    234234        \item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor is the monitor which has the lowest address, every monitor needs to have the correct queues even though it is possible that some queues go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
    235235\end{itemize}
    236 Therefore, the following modifications need to be made to support external scheduling :
     236Therefore, the following modifications need to be made to support external scheduling:
    237237\begin{itemize}
    238         \item The threads waiting on the entry-queue need to keep track of which routine it is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack so the thread only needs to keep a pointer to that information.
     238        \item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information.
    239239        \item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that if a thread has acquired two monitors but executes a \code{waitfor} with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless. This becomes relevant when \code{when} clauses affect the number of monitors passed to a \code{waitfor} statement.
    240         \item The entry/exit routine need to be updated as shown in listing \ref{lst:entry3}.
     240        \item The entry/exit routines need to be updated as shown in listing \ref{lst:entry3}.
    241241\end{itemize}
    242242
    243243\subsection{External Scheduling - Destructors}
    244 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The waitfor semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
     244Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The \code{waitfor} semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
    245245
    246246\begin{figure}
  • doc/proposals/concurrency/text/intro.tex

    rb158d8f rb6838214  
    22\chapter{Introduction}
    33% ======================================================================
    4 
    5 This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also comes with an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
     4This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, called \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also provides an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
    65
    76There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
     7
     8In the context of this thesis, a \textbf{thread} is a fundamental unit of execution that runs a sequence of code, generally on a program stack. Having multiple simultaneous threads gives rise to concurrency and generally requires some kind of locking mechanism to ensure proper execution. Correspondingly, \textbf{concurrency} is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. Accordingly, \textbf{locking} (and by extension locks) are defined as a mechanism that prevents the progress of certain threads in order to avoid problems due to concurrency. Finally, in this thesis \textbf{parallelism} is distinct from concurrency and is defined as running multiple threads simultaneously. More precisely, parallelism implies \emph{actual} simultaneous execution as opposed to concurrency which only requires \emph{apparent} simultaneous execution. As such, parallelism is only observable in the differences in performance or, more generally, differences in timing.
  • doc/proposals/concurrency/text/parallelism.tex

    rb158d8f rb6838214  
    77% #       #     # #     # #     # ####### ####### ####### ####### ###  #####  #     #
    88\chapter{Parallelism}
    9 Historically, computer performance was about processor speeds and instructions count. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is not longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
     9Historically, computer performance was about processor speeds and instruction counts. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.
    1010
    1111\section{Paradigms}
    1212\subsection{User-Level Threads}
    13 A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees but the parallelism toolkit offers very little to reduce complexity in itself.
     13A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees, but the parallelism toolkit offers very little to reduce complexity in itself.
    1414
    1515Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    1616
    1717\subsection{Fibers : User-Level Threads Without Preemption} \label{fibers}
    18 A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
     18A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths, but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
    1919
    2020An example of a language that uses fibers is Go~\cite{Go}
     
    2626
    2727\subsection{Paradigm Performance}
    28 While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees that the \gls{pool}-based system has the best performance thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortized by the actual work done.
     28While the choice between the three paradigms listed above may have significant performance implications, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees equivalent performance across paradigms and that the \gls{pool}-based system has the best efficiency thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large, enough the paradigm choice is largely amortized by the actual work done.
    2929
    3030\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
    31 A \gls{cfacluster} is a group of \gls{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a plugable scheduler that can optimize the workload generated by the \glspl{uthread}.
     31A \gls{cfacluster} is a group of \glspl{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a pluggable scheduler that can optimize the workload generated by the \glspl{uthread}.
    3232
    33 \Glspl{cfacluster} have not been fully implemented in the context of this thesis, currently \CFA only supports one \gls{cfacluster}, the initial one.
     33\Glspl{cfacluster} have not been fully implemented in the context of this thesis. Currently \CFA only supports one \gls{cfacluster}, the initial one.
    3434
    3535\subsection{Future Work: Machine Setup}\label{machine}
    36 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and\/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx} which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
     36While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
    3737
    3838\subsection{Paradigms}\label{cfaparadigms}
  • doc/proposals/concurrency/text/results.tex

    rb158d8f rb6838214  
    3838
    3939\section{Micro Benchmarks}
    40 All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code :
     40All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code:
    4141\begin{pseudo}
    42 #define BENCH(run, result)
    43         before = gettime();
    44         run;
    45         after  = gettime();
     42#define BENCH(run, result) \
     43        before = gettime(); \
     44        run; \
     45        after  = gettime(); \
    4646        result = (after - before) / N;
    4747\end{pseudo}
     
    4949
    5050\subsection{Context-Switching}
    51 The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. In order to make the comparison fair, coroutines also execute a 2-step context-switch (\gls{uthread} to \gls{kthread} then \gls{kthread} to \gls{uthread}), which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests.
     51The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. Yielding causes the thread to context-switch to the scheduler and back, more precisely: from the \gls{uthread} to the \gls{kthread} then from the \gls{kthread} back to the same \gls{uthread} (or a different one in the general case). In order to make the comparison fair, coroutines also execute a 2-step context-switch by resuming another coroutine which does nothing but suspending in a tight loop, which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests. The difference between coroutines and threads can be attributed to the cost of scheduling.
    5252\begin{figure}
    5353\begin{multicols}{2}
     
    115115
    116116\subsection{Mutual-Exclusion}
    117 The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a pthread mutex lock is also measured. The results can be shown in table \ref{tab:mutex}.
     117The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a \code{pthread_mutex} lock is also measured. The results can be shown in table \ref{tab:mutex}.
    118118
    119119\begin{figure}
     
    199199\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
    200200\hline
     201Pthreads Condition Variable                     & 5902.5        & 6093.29       & 714.78 \\
    201202\uC \code{signal}                                       & 322           & 323   & 3.36   \\
    202203\CFA \code{signal}, 1 \code{monitor}    & 352.5 & 353.11        & 3.66   \\
     
    265266
    266267\subsection{Object Creation}
    267 Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for pthreads and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
    268 
    269 \begin{figure}
    270 \begin{center}
    271 pthread
     268Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for \texttt{pthread}s and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
     269
     270\begin{figure}
     271\begin{center}
     272\texttt{pthread}
    272273\begin{ccode}
    273274int main() {
     
    306307\end{cfacode}
    307308\end{center}
    308 \begin{cfacode}[caption={Benchmark code for pthreads and \CFA to measure object creation},label={lst:creation}]
     309\begin{cfacode}[caption={Benchmark code for \texttt{pthread}s and \CFA to measure object creation},label={lst:creation}]
    309310\end{cfacode}
    310311\end{figure}
  • doc/proposals/concurrency/text/together.tex

    rb158d8f rb6838214  
    77
    88\section{Threads As Monitors}
    9 As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
     9As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine:
    1010\begin{figure}[H]
    1111\begin{cfacode}[caption={Toy simulator using \code{thread}s and \code{monitor}s.},label={lst:engine-v1}]
     
    3838\end{cfacode}
    3939\end{figure}
    40 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner :
     40One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner:
    4141\begin{figure}[H]
    4242\begin{cfacode}[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}]
  • doc/proposals/concurrency/thesis.tex

    rb158d8f rb6838214  
    2323\usepackage{calc}
    2424\usepackage{xspace}
     25\usepackage[labelformat=simple]{subfig}
     26\renewcommand{\thesubfigure}{(\alph{subfigure})}
    2527\usepackage{graphicx}
    2628\usepackage{tabularx}
     
    3436\usepackage[usenames]{color}
    3537\usepackage[pagewise]{lineno}
     38\renewcommand{\linenumberfont}{\scriptsize\sffamily}
    3639\usepackage{fancyhdr}
    3740\usepackage{float}
    38 \renewcommand{\linenumberfont}{\scriptsize\sffamily}
    3941\usepackage{siunitx}
    4042\sisetup{ binary-units=true }
    4143\input{style}                                                   % bespoke macros used in the document
     44\usepackage{url}
    4245\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
    4346\usepackage{breakurl}
     47\urlstyle{rm}
    4448
    4549\usepackage{tikz}
    4650\def\checkmark{\tikz\fill[scale=0.4](0,.35) -- (.25,0) -- (1,.7) -- (.25,.15) -- cycle;}
    47 
    48 \renewcommand{\UrlFont}{\small\sf}
    4951
    5052\setlength{\topmargin}{-0.45in}                         % move running title into header
     
    123125\input{future}
    124126
    125 \clearpage
    126 \printglossary[type=\acronymtype]
    127 \printglossary
    128127
    129128\clearpage
     129
     130% B I B L I O G R A P H Y
     131% -----------------------------
     132\addcontentsline{toc}{chapter}{Bibliography}
    130133\bibliographystyle{plain}
    131 \bibliography{cw92,distSharedMem,lfp92,mlw92,parallel,parallelIO,partheory,pl,pldi92,ps,realtime,techreportsPAB,visual,local}
     134\bibliography{pl,local}
     135\cleardoublepage
     136\phantomsection         % allows hyperref to link to the correct page
     137
     138% G L O S S A R Y
     139% -----------------------------
     140\addcontentsline{toc}{chapter}{Glossary}
     141\printglossary
     142\cleardoublepage
     143\phantomsection         % allows hyperref to link to the correct page
    132144
    133145
  • doc/proposals/concurrency/version

    rb158d8f rb6838214  
    1 0.11.299
     10.11.403
  • src/CodeGen/FixMain.cc

    rb158d8f rb6838214  
    2323
    2424#include "Common/SemanticError.h"  // for SemanticError
     25#include "CodeGen/GenType.h"       // for GenType
    2526#include "SynTree/Declaration.h"   // for FunctionDecl, operator<<
    2627#include "SynTree/Type.h"          // for FunctionType
     
    2930        bool FixMain::replace_main = false;
    3031        std::unique_ptr<FunctionDecl> FixMain::main_signature = nullptr;
     32
     33        template<typename container>
     34        std::string genTypeAt(const container& p, size_t idx) {
     35                return genType((*std::next(p.begin(), idx))->get_type(), "");
     36        }
    3137
    3238        void FixMain::registerMain(FunctionDecl* functionDecl)
     
    4349
    4450                        os << main_signature->get_scopedMangleName() << "(";
    45                         switch(main_signature->get_functionType()->get_parameters().size()) {
    46                                 case 3: os << "argc, argv, envp"; break;
    47                                 case 2: os << "argc, argv"; break;
     51                        const auto& params = main_signature->get_functionType()->get_parameters();
     52                        switch(params.size()) {
     53                                case 3: os << "(" << genTypeAt(params, 0) << ")argc, (" << genTypeAt(params, 1) << ")argv, (" << genTypeAt(params, 2) << ")envp"; break;
     54                                case 2: os << "(" << genTypeAt(params, 0) << ")argc, (" << genTypeAt(params, 1) << ")argv"; break;
    4855                                case 0: break;
    4956                                default : assert(false);
  • src/CodeGen/GenType.cc

    rb158d8f rb6838214  
    2626
    2727namespace CodeGen {
    28         class GenType : public Visitor {
    29           public:
     28        struct GenType : public WithVisitorRef<GenType>, public WithShortCircuiting {
    3029                GenType( const std::string &typeString, bool pretty = false, bool genC = false, bool lineMarks = false );
    3130                std::string get_typeString() const { return typeString; }
    3231                void set_typeString( const std::string &newValue ) { typeString = newValue; }
    3332
    34                 virtual void visit( FunctionType *funcType );
    35                 virtual void visit( VoidType *voidType );
    36                 virtual void visit( BasicType *basicType );
    37                 virtual void visit( PointerType *pointerType );
    38                 virtual void visit( ArrayType *arrayType );
    39                 virtual void visit( ReferenceType *refType );
    40                 virtual void visit( StructInstType *structInst );
    41                 virtual void visit( UnionInstType *unionInst );
    42                 virtual void visit( EnumInstType *enumInst );
    43                 virtual void visit( TypeInstType *typeInst );
    44                 virtual void visit( TupleType * tupleType );
    45                 virtual void visit( VarArgsType *varArgsType );
    46                 virtual void visit( ZeroType *zeroType );
    47                 virtual void visit( OneType *oneType );
     33                void previsit( BaseSyntaxNode * );
     34                void postvisit( BaseSyntaxNode * );
     35
     36                void postvisit( FunctionType * funcType );
     37                void postvisit( VoidType * voidType );
     38                void postvisit( BasicType * basicType );
     39                void postvisit( PointerType * pointerType );
     40                void postvisit( ArrayType * arrayType );
     41                void postvisit( ReferenceType * refType );
     42                void postvisit( StructInstType * structInst );
     43                void postvisit( UnionInstType * unionInst );
     44                void postvisit( EnumInstType * enumInst );
     45                void postvisit( TypeInstType * typeInst );
     46                void postvisit( TupleType  * tupleType );
     47                void postvisit( VarArgsType * varArgsType );
     48                void postvisit( ZeroType * zeroType );
     49                void postvisit( OneType * oneType );
    4850
    4951          private:
     
    5961
    6062        std::string genType( Type *type, const std::string &baseString, bool pretty, bool genC , bool lineMarks ) {
    61                 GenType gt( baseString, pretty, genC, lineMarks );
     63                PassVisitor<GenType> gt( baseString, pretty, genC, lineMarks );
    6264                std::ostringstream os;
    6365
     
    6870
    6971                type->accept( gt );
    70                 return os.str() + gt.get_typeString();
     72                return os.str() + gt.pass.get_typeString();
    7173        }
    7274
     
    7779        GenType::GenType( const std::string &typeString, bool pretty, bool genC, bool lineMarks ) : typeString( typeString ), pretty( pretty ), genC( genC ), lineMarks( lineMarks ) {}
    7880
    79         void GenType::visit( VoidType *voidType ) {
     81        // *** BaseSyntaxNode
     82        void GenType::previsit( BaseSyntaxNode * ) {
     83                // turn off automatic recursion for all nodes, to allow each visitor to
     84                // precisely control the order in which its children are visited.
     85                visit_children = false;
     86        }
     87
     88        void GenType::postvisit( BaseSyntaxNode * node ) {
     89                std::stringstream ss;
     90                node->print( ss );
     91                assertf( false, "Unhandled node reached in GenType: %s", ss.str().c_str() );
     92        }
     93
     94        void GenType::postvisit( VoidType * voidType ) {
    8095                typeString = "void " + typeString;
    8196                handleQualifiers( voidType );
    8297        }
    8398
    84         void GenType::visit( BasicType *basicType ) {
    85                 BasicType::Kind kind = basicType->get_kind();
     99        void GenType::postvisit( BasicType * basicType ) {
     100                BasicType::Kind kind = basicType->kind;
    86101                assert( 0 <= kind && kind < BasicType::NUMBER_OF_BASIC_TYPES );
    87102                typeString = std::string( BasicType::typeNames[kind] ) + " " + typeString;
     
    89104        }
    90105
    91         void GenType::genArray( const Type::Qualifiers &qualifiers, Type *base, Expression *dimension, bool isVarLen, bool isStatic ) {
     106        void GenType::genArray( const Type::Qualifiers & qualifiers, Type * base, Expression *dimension, bool isVarLen, bool isStatic ) {
    92107                std::ostringstream os;
    93108                if ( typeString != "" ) {
     
    126141                typeString = os.str();
    127142
    128                 base->accept( *this );
    129         }
    130 
    131         void GenType::visit( PointerType *pointerType ) {
    132                 assert( pointerType->get_base() != 0);
    133                 if ( pointerType->get_isStatic() || pointerType->get_isVarLen() || pointerType->get_dimension() ) {
    134                         genArray( pointerType->get_qualifiers(), pointerType->get_base(), pointerType->get_dimension(), pointerType->get_isVarLen(), pointerType->get_isStatic() );
     143                base->accept( *visitor );
     144        }
     145
     146        void GenType::postvisit( PointerType * pointerType ) {
     147                assert( pointerType->base != 0);
     148                if ( pointerType->get_isStatic() || pointerType->get_isVarLen() || pointerType->dimension ) {
     149                        genArray( pointerType->get_qualifiers(), pointerType->base, pointerType->dimension, pointerType->get_isVarLen(), pointerType->get_isStatic() );
    135150                } else {
    136151                        handleQualifiers( pointerType );
     
    140155                                typeString = "*" + typeString;
    141156                        } // if
    142                         pointerType->get_base()->accept( *this );
    143                 } // if
    144         }
    145 
    146         void GenType::visit( ArrayType *arrayType ) {
    147                 genArray( arrayType->get_qualifiers(), arrayType->get_base(), arrayType->get_dimension(), arrayType->get_isVarLen(), arrayType->get_isStatic() );
    148         }
    149 
    150         void GenType::visit( ReferenceType *refType ) {
    151                 assert( refType->get_base() != 0);
     157                        pointerType->base->accept( *visitor );
     158                } // if
     159        }
     160
     161        void GenType::postvisit( ArrayType * arrayType ) {
     162                genArray( arrayType->get_qualifiers(), arrayType->base, arrayType->dimension, arrayType->get_isVarLen(), arrayType->get_isStatic() );
     163        }
     164
     165        void GenType::postvisit( ReferenceType * refType ) {
     166                assert( refType->base != 0);
    152167                assertf( ! genC, "Reference types should not reach code generation." );
    153168                handleQualifiers( refType );
    154169                typeString = "&" + typeString;
    155                 refType->get_base()->accept( *this );
    156         }
    157 
    158         void GenType::visit( FunctionType *funcType ) {
     170                refType->base->accept( *visitor );
     171        }
     172
     173        void GenType::postvisit( FunctionType * funcType ) {
    159174                std::ostringstream os;
    160175
     
    169184                /************* parameters ***************/
    170185
    171                 const std::list<DeclarationWithType *> &pars = funcType->get_parameters();
     186                const std::list<DeclarationWithType *> &pars = funcType->parameters;
    172187
    173188                if ( pars.empty() ) {
     
    191206                typeString = os.str();
    192207
    193                 if ( funcType->get_returnVals().size() == 0 ) {
     208                if ( funcType->returnVals.size() == 0 ) {
    194209                        typeString = "void " + typeString;
    195210                } else {
    196                         funcType->get_returnVals().front()->get_type()->accept( *this );
     211                        funcType->returnVals.front()->get_type()->accept( *visitor );
    197212                } // if
    198213
    199214                // add forall
    200                 if( ! funcType->get_forall().empty() && ! genC ) {
     215                if( ! funcType->forall.empty() && ! genC ) {
    201216                        // assertf( ! genC, "Aggregate type parameters should not reach code generation." );
    202217                        std::ostringstream os;
    203218                        PassVisitor<CodeGenerator> cg( os, pretty, genC, lineMarks );
    204219                        os << "forall(";
    205                         cg.pass.genCommaList( funcType->get_forall().begin(), funcType->get_forall().end() );
     220                        cg.pass.genCommaList( funcType->forall.begin(), funcType->forall.end() );
    206221                        os << ")" << std::endl;
    207222                        typeString = os.str() + typeString;
     
    221236        }
    222237
    223         void GenType::visit( StructInstType *structInst )  {
    224                 typeString = structInst->get_name() + handleGeneric( structInst ) + " " + typeString;
     238        void GenType::postvisit( StructInstType * structInst )  {
     239                typeString = structInst->name + handleGeneric( structInst ) + " " + typeString;
    225240                if ( genC ) typeString = "struct " + typeString;
    226241                handleQualifiers( structInst );
    227242        }
    228243
    229         void GenType::visit( UnionInstType *unionInst ) {
    230                 typeString = unionInst->get_name() + handleGeneric( unionInst ) + " " + typeString;
     244        void GenType::postvisit( UnionInstType * unionInst ) {
     245                typeString = unionInst->name + handleGeneric( unionInst ) + " " + typeString;
    231246                if ( genC ) typeString = "union " + typeString;
    232247                handleQualifiers( unionInst );
    233248        }
    234249
    235         void GenType::visit( EnumInstType *enumInst ) {
    236                 typeString = enumInst->get_name() + " " + typeString;
     250        void GenType::postvisit( EnumInstType * enumInst ) {
     251                typeString = enumInst->name + " " + typeString;
    237252                if ( genC ) typeString = "enum " + typeString;
    238253                handleQualifiers( enumInst );
    239254        }
    240255
    241         void GenType::visit( TypeInstType *typeInst ) {
    242                 typeString = typeInst->get_name() + " " + typeString;
     256        void GenType::postvisit( TypeInstType * typeInst ) {
     257                typeString = typeInst->name + " " + typeString;
    243258                handleQualifiers( typeInst );
    244259        }
    245260
    246         void GenType::visit( TupleType * tupleType ) {
     261        void GenType::postvisit( TupleType * tupleType ) {
    247262                assertf( ! genC, "Tuple types should not reach code generation." );
    248263                unsigned int i = 0;
     
    257272        }
    258273
    259         void GenType::visit( VarArgsType *varArgsType ) {
     274        void GenType::postvisit( VarArgsType * varArgsType ) {
    260275                typeString = "__builtin_va_list " + typeString;
    261276                handleQualifiers( varArgsType );
    262277        }
    263278
    264         void GenType::visit( ZeroType *zeroType ) {
     279        void GenType::postvisit( ZeroType * zeroType ) {
    265280                // ideally these wouldn't hit codegen at all, but should be safe to make them ints
    266281                typeString = (pretty ? "zero_t " : "long int ") + typeString;
     
    268283        }
    269284
    270         void GenType::visit( OneType *oneType ) {
     285        void GenType::postvisit( OneType * oneType ) {
    271286                // ideally these wouldn't hit codegen at all, but should be safe to make them ints
    272287                typeString = (pretty ? "one_t " : "long int ") + typeString;
     
    274289        }
    275290
    276         void GenType::handleQualifiers( Type *type ) {
     291        void GenType::handleQualifiers( Type * type ) {
    277292                if ( type->get_const() ) {
    278293                        typeString = "const " + typeString;
  • src/CodeTools/DeclStats.cc

    rb158d8f rb6838214  
    2323#include <utility>                 // for pair, make_pair
    2424
     25#include "Common/PassVisitor.h"
    2526#include "Common/SemanticError.h"  // for SemanticError
    2627#include "Common/VectorMap.h"      // for VectorMap
     
    3536namespace CodeTools {
    3637
    37         class DeclStats : public Visitor {
     38        struct DeclStats : public WithShortCircuiting {
    3839                template<typename T>
    3940                static void sum(T& a, const T& b) { a += b; }
     
    6162
    6263                struct ArgPackStats {
    63                         VectorMap<unsigned> n;                 ///< Count of decls with each number of elements
    64                         VectorMap<unsigned> n_basic;           ///< Count of decls with each number of basic type elements
    65                         VectorMap<unsigned> n_poly;            ///< Count of decls with each number of polymorphic elements
    66                         std::map<unsigned, unsigned> p_basic;  ///< Count of decls with each percentage of basic type elements
    67                         std::map<unsigned, unsigned> p_poly;   ///< Count of decls with each percentage of polymorphic elements
    68                         VectorMap<unsigned> n_types;           ///< Count of decls with each number of distinct types in the pack
     64                        VectorMap<unsigned> n;                   ///< Count of decls with each number of elements
     65                        VectorMap<unsigned> n_basic;             ///< Count of decls with each number of basic type elements
     66                        VectorMap<unsigned> n_generic;           ///< Count of decls with each number of generic type elements
     67                        VectorMap<unsigned> n_poly;              ///< Count of decls with each number of polymorphic elements
     68                        VectorMap<unsigned> n_compound;          ///< Count of decls with each number of non-generic compound types
     69                        std::map<unsigned, unsigned> p_basic;    ///< Count of decls with each percentage of basic type elements
     70                        std::map<unsigned, unsigned> p_generic;  ///< Count of decls with each percentage of generic type elements
     71                        std::map<unsigned, unsigned> p_poly;     ///< Count of decls with each percentage of polymorphic elements
     72                        std::map<unsigned, unsigned> p_compound; ///< Count of decls with each percentage of non-generic compound type elements
     73                        VectorMap<unsigned> n_types;             ///< Count of decls with each number of distinct types in the pack
    6974                        /// Count of decls with each percentage of new types in lists.
    7075                        /// Types used in the parameter list that recur in the return list are not considered to be new.
     
    7479                                sum(n, o.n);
    7580                                sum(n_basic, o.n_basic);
     81                                sum(n_generic, o.n_generic);
    7682                                sum(n_poly, o.n_poly);
     83                                sum(n_compound, o.n_compound);
    7784                                sum(p_basic, o.p_basic);
     85                                sum(p_generic, o.p_generic);
    7886                                sum(p_poly, o.p_poly);
     87                                sum(p_compound, o.p_compound);
    7988                                sum(n_types, o.n_types);
    8089                                sum(p_new, o.p_new);
     
    8897                        /// Count of declarations with each number of assertion parameters
    8998                        VectorMap<unsigned> n_type_params;
     99                        /// Count of generic types with each number of type parameters
     100                        VectorMap<unsigned> n_generic_params;
     101                        /// Count of maximum nesting depth of types
     102                        VectorMap<unsigned> n_generic_nesting;
    90103                        /// Count of declarations with each name
    91104                        std::unordered_map<std::string, unsigned> by_name;
    92105                        /// Count of uses of each basic type
    93106                        std::unordered_map<std::string, unsigned> basic_type_names;
    94                         /// Count of uses of each non-basic type
     107                        /// Count of uses of each generic type name (includes "*", "[]", "(*)", "[,]")
     108                        std::unordered_map<std::string, unsigned> generic_type_names;
     109                        /// Count of uses of each non-generic aggregate type
    95110                        std::unordered_map<std::string, unsigned> compound_type_names;
    96111                        /// Count of decls using each basic type
    97112                        std::unordered_map<std::string, unsigned> basic_type_decls;
     113                        /// Count of decls using each generic type (includes "*", "[]", "(*)", "[,]")
     114                        std::unordered_map<std::string, unsigned> generic_type_decls;
    98115                        /// Count of decls using each compound type
    99116                        std::unordered_map<std::string, unsigned> compound_type_decls;
     
    110127                        ArgPackStats assn_returns;
    111128
    112                         Stats() : n_decls(0), n_type_params(), by_name(), basic_type_names(), compound_type_names(), basic_type_decls(), compound_type_decls(), params(), returns(), n_assns(), assn_params(), assn_returns() {}
     129                        Stats() : n_decls(0), n_type_params(), n_generic_params(), n_generic_nesting(),
     130                                by_name(), basic_type_names(), generic_type_names(), compound_type_names(),
     131                                basic_type_decls(), generic_type_decls(), compound_type_decls(), params(),
     132                                returns(), n_assns(), assn_params(), assn_returns() {}
    113133
    114134                public:
     
    116136                                sum( n_decls, o.n_decls );
    117137                                sum( n_type_params, o.n_type_params );
     138                                sum( n_generic_params, o.n_generic_params );
     139                                sum( n_generic_nesting, o.n_generic_nesting );
    118140                                sum( by_name, o.by_name );
    119141                                sum( basic_type_names, o.basic_type_names );
     142                                sum( generic_type_names, o.generic_type_names );
    120143                                sum( compound_type_names, o.compound_type_names );
    121144                                sum( basic_type_decls, o.basic_type_decls );
     145                                sum( generic_type_decls, o.generic_type_decls );
    122146                                sum( compound_type_decls, o.compound_type_decls );
    123147                                sum( params, o.params );
     
    131155                };
    132156
    133                 Stats for_linkage[LinkageSpec::NoOfSpecs];   ///< Stores separate stats per linkage
     157                /// number of counting bins for linkages
     158                static const unsigned n_named_specs = 8;
     159                /// map from total number of specs to bins
     160                static const unsigned ind_for_linkage[16];
     161
     162                Stats for_linkage[n_named_specs];            ///< Stores separate stats per linkage
    134163                std::unordered_set<std::string> seen_names;  ///< Stores manglenames already seen to avoid double-counting
    135164                Stats total;
     
    137166                std::map<std::pair<unsigned, unsigned>, unsigned> exprs_by_fanout_at_depth;
    138167
     168                void countType( const std::string& name, unsigned& n, std::unordered_map<std::string,
     169                                unsigned>& names, std::unordered_map<std::string, unsigned>& decls,
     170                                std::unordered_set<std::string>& elSeen ) {
     171                        ++n;
     172                        ++names[ name ];
     173                        if ( elSeen.insert( name ).second ) { ++decls[ name ]; }
     174                }
     175
     176                void update_max( unsigned& max, unsigned crnt ) {
     177                        if ( crnt > max ) max = crnt;
     178                }
     179
     180                void analyzeSubtype( Type* ty, Stats& stats, std::unordered_set<std::string>& elSeen,
     181                                unsigned& n_poly, bool& seen_poly, unsigned& max_depth, unsigned depth ) {
     182                        unsigned x;
     183                        analyzeType( ty, stats, elSeen, x, x, n_poly, x, seen_poly, max_depth, depth + 1 );
     184                }
     185
     186                void analyzeSubtypes( std::list<DeclarationWithType*>& tys, Stats& stats,
     187                                std::unordered_set<std::string>& elSeen, unsigned& n_poly, bool& seen_poly,
     188                                unsigned& max_depth, unsigned depth, unsigned& n_subs ) {
     189                        for ( DeclarationWithType* dwt : tys ) {
     190                                Type* ty = dwt->get_type();
     191                                n_subs += (unsigned)( dynamic_cast<VoidType*>(ty) != nullptr );
     192                                analyzeSubtype( ty, stats, elSeen, n_poly, seen_poly, max_depth, depth );
     193                        }
     194                }
     195
     196                void analyzeSubtypes( std::list<Expression*>& tys, Stats& stats,
     197                                std::unordered_set<std::string>& elSeen, unsigned& n_poly, bool& seen_poly,
     198                                unsigned& max_depth, unsigned depth ) {
     199                        for ( Expression* expr : tys ) {
     200                                TypeExpr* texpr = dynamic_cast<TypeExpr*>(expr);
     201                                if ( ! texpr ) continue;
     202                                Type* ty = texpr->get_type();
     203                                analyzeSubtype( ty, stats, elSeen, n_poly, seen_poly, max_depth, depth );
     204                        }
     205                }
     206
     207                void analyzeSubtypes( std::list<Type*>& tys, Stats& stats,
     208                                std::unordered_set<std::string>& elSeen, unsigned& n_poly, bool& seen_poly,
     209                                unsigned& max_depth, unsigned depth ) {
     210                        for ( Type* ty : tys ) {
     211                                analyzeSubtype( ty, stats, elSeen, n_poly, seen_poly, max_depth, depth );
     212                        }
     213                }
     214
     215                void analyzeType( Type* ty, Stats& stats, std::unordered_set<std::string>& elSeen,
     216                                unsigned& n_basic, unsigned& n_generic, unsigned& n_poly, unsigned& n_agg,
     217                                bool& seen_poly, unsigned& max_depth, unsigned depth = 0 ) {
     218                        if ( BasicType* bt = dynamic_cast<BasicType*>(ty) ) {
     219                                std::string name = BasicType::typeNames[ bt->get_kind() ];
     220                                countType( name, n_basic, stats.basic_type_names, stats.basic_type_decls, elSeen );
     221                                update_max( max_depth, depth );
     222                        } else if ( PointerType* pt = dynamic_cast<PointerType*>(ty) ) {
     223                                std::string name = "*";
     224                                countType(
     225                                        name, n_generic, stats.generic_type_names, stats.generic_type_decls, elSeen);
     226                                analyzeSubtype(
     227                                        pt->get_base(), stats, elSeen, n_poly, seen_poly, max_depth, depth );
     228                                ++stats.n_generic_params.at( 1 );
     229                        } else if ( ArrayType* at = dynamic_cast<ArrayType*>(ty) ) {
     230                                std::string name = "[]";
     231                                countType(
     232                                        name, n_generic, stats.generic_type_names, stats.generic_type_decls, elSeen);
     233                                analyzeSubtype(
     234                                        at->get_base(), stats, elSeen, n_poly, seen_poly, max_depth, depth );
     235                                ++stats.n_generic_params.at( 1 );
     236                        } else if ( ReferenceType* rt = dynamic_cast<ReferenceType*>(ty) ) {
     237                                std::string name = "&";
     238                                countType(
     239                                        name, n_generic, stats.generic_type_names, stats.generic_type_decls, elSeen);
     240                                analyzeSubtype(
     241                                        rt->get_base(), stats, elSeen, n_poly, seen_poly, max_depth, depth );
     242                                ++stats.n_generic_params.at( 1 );
     243                        } else if ( FunctionType* ft = dynamic_cast<FunctionType*>(ty) ) {
     244                                std::string name = "(*)";
     245                                countType(
     246                                        name, n_generic, stats.generic_type_names, stats.generic_type_decls, elSeen);
     247                                unsigned n_subs = 0;
     248                                analyzeSubtypes(
     249                                        ft->get_returnVals(), stats, elSeen, n_poly, seen_poly, max_depth, depth,
     250                                        n_subs );
     251                                analyzeSubtypes(
     252                                        ft->get_parameters(), stats, elSeen, n_poly, seen_poly, max_depth, depth,
     253                                        n_subs );
     254                                ++stats.n_generic_params.at( n_subs );
     255                        } else if ( TypeInstType* vt = dynamic_cast<TypeInstType*>(ty) ) {
     256                                if ( ! seen_poly ) {
     257                                        ++n_poly;
     258                                        seen_poly = true;
     259                                }
     260                                countType(
     261                                        vt->get_name(), n_agg, stats.compound_type_names, stats.compound_type_decls,
     262                                        elSeen );
     263                                update_max( max_depth, depth );
     264                        } else if ( ReferenceToType* st = dynamic_cast<ReferenceToType*>(ty) ) {
     265                                std::list<Expression*>& params = st->get_parameters();
     266                                if ( params.empty() ) {
     267                                        countType(
     268                                                st->get_name(), n_agg, stats.compound_type_names,
     269                                                stats.compound_type_decls, elSeen );
     270                                        update_max( max_depth, depth );
     271                                } else {
     272                                        countType(
     273                                                st->get_name(), n_generic, stats.generic_type_names,
     274                                                stats.generic_type_decls, elSeen);
     275                                        analyzeSubtypes( params, stats, elSeen, n_poly, seen_poly, max_depth, depth );
     276                                        ++stats.n_generic_params.at( params.size() );
     277                                }
     278                        } else if ( TupleType* tt = dynamic_cast<TupleType*>(ty) ) {
     279                                std::string name = "[,]";
     280                                countType(
     281                                        name, n_generic, stats.generic_type_names, stats.generic_type_decls, elSeen);
     282                                analyzeSubtypes(
     283                                        tt->get_types(), stats, elSeen, n_poly, seen_poly, max_depth, depth );
     284                                ++stats.n_generic_params.at( tt->size() );
     285                        } else if ( dynamic_cast<VarArgsType*>(ty) ) {
     286                                std::string name = "...";
     287                                countType(
     288                                        name, n_agg, stats.compound_type_names, stats.compound_type_decls, elSeen );
     289                                update_max( max_depth, depth );
     290                        } else if ( dynamic_cast<ZeroType*>(ty) ) {
     291                                std::string name = "0";
     292                                countType( name, n_basic, stats.basic_type_names, stats.basic_type_decls, elSeen );
     293                                update_max( max_depth, depth );
     294                        } else if ( dynamic_cast<OneType*>(ty) ) {
     295                                std::string name = "1";
     296                                countType( name, n_basic, stats.basic_type_names, stats.basic_type_decls, elSeen );
     297                                update_max( max_depth, depth );
     298                        }
     299                }
     300
    139301                /// Update arg pack stats based on a declaration list
    140                 void analyze( Stats& stats, std::unordered_set<std::string>& seen, ArgPackStats& pstats, std::list<DeclarationWithType*>& decls ) {
     302                void analyze( Stats& stats, std::unordered_set<std::string>& seen,
     303                                std::unordered_set<std::string>& elSeen, ArgPackStats& pstats,
     304                                std::list<DeclarationWithType*>& decls ) {
    141305                        std::unordered_set<std::string> types;
    142                         unsigned n = 0;        ///< number of args/returns
    143                         unsigned n_basic = 0;  ///< number of basic types
    144                         unsigned n_poly = 0;   ///< number of polymorphic types
    145                         unsigned n_new = 0;    ///< number of new types
     306                        unsigned n = 0;                 ///< number of args/returns
     307                        unsigned n_basic = 0;           ///< number of basic types
     308                        unsigned n_generic = 0;         ///< number of generic types (includes "*", "&", "[]", "(*)", "[,]")
     309                        unsigned n_poly = 0;            ///< number of polymorphic types
     310                        unsigned n_agg = 0;             ///< number of non-generic aggregate types
     311                        unsigned n_new = 0;             ///< number of new types
     312
    146313                        for ( auto decl : decls ) {
    147314                                Type* dt = decl->get_type();
     
    152319                                dt->print( ss );
    153320                                types.insert( ss.str() );
    154                                 bool this_new = seen.insert( ss.str() ).second;
    155                                 if ( this_new ) { ++n_new; }
    156 
    157                                 if ( dynamic_cast<BasicType*>( dt ) ) {
    158                                         ++n_basic;
    159                                         ++stats.basic_type_names[ ss.str() ];
    160                                         if ( this_new ) {
    161                                                 ++stats.basic_type_decls[ ss.str() ];
    162                                         }
    163                                 } else if ( GenPoly::hasPolyBase( dt ) ) {
    164                                         ++n_poly;
    165                                 } else {
    166                                         ++stats.compound_type_names[ ss.str() ];
    167                                         if ( this_new ) {
    168                                                 ++stats.compound_type_decls[ ss.str() ];
    169                                         }
    170                                 }
    171                         }
     321                                if ( seen.insert( ss.str() ).second ) { ++n_new; }
     322
     323                                bool seen_poly = false;
     324                                unsigned max_depth = 0;
     325                                analyzeType(
     326                                        dt, stats, elSeen, n_basic, n_generic, n_poly, n_agg, seen_poly, max_depth );
     327                                ++stats.n_generic_nesting.at( max_depth );
     328                        }
     329
    172330                        ++pstats.n.at( n );
    173331                        ++pstats.n_basic.at( n_basic );
     332                        ++pstats.n_generic.at( n_generic );
    174333                        ++pstats.n_poly.at( n_poly );
     334                        ++pstats.n_compound.at( n_agg );
    175335                        if ( n > 0 ) {
    176336                                ++pstats.p_basic[ n_basic*100/n ];
     337                                ++pstats.p_generic[ n_generic*100/n ];
    177338                                ++pstats.p_poly[ n_poly*100/n ];
    178                                 ++pstats.p_new[ n_new*100/n ];
     339                                ++pstats.p_compound[ n_agg*100/n ];
     340                                if ( n > 1 ) ++pstats.p_new[ (n_new-1)*100/(n-1) ];
    179341                        }
    180342                        ++pstats.n_types.at( types.size() );
     
    183345                void analyzeFunc( FunctionType* fnTy, Stats& stats, ArgPackStats& params, ArgPackStats& returns ) {
    184346                        std::unordered_set<std::string> seen;
    185                         analyze( stats, seen, params, fnTy->get_parameters() );
    186                         analyze( stats, seen, returns, fnTy->get_returnVals() );
     347                        std::unordered_set<std::string> elSeen;
     348                        analyze( stats, seen, elSeen, params, fnTy->get_parameters() );
     349                        analyze( stats, seen, elSeen, returns, fnTy->get_returnVals() );
    187350                }
    188351
     
    200363
    201364        public:
    202                 using Visitor::visit;
    203 
    204                 virtual void visit( FunctionDecl *decl ) {
     365                void previsit( FunctionDecl *decl ) {
    205366                        // skip if already seen declaration for this function
    206                         const std::string& mangleName = decl->get_mangleName().empty() ? decl->get_name() : decl->get_mangleName();
    207                         if ( ! seen_names.insert( mangleName ).second ) {
    208                                 maybeAccept( decl->get_statements(), *this );
    209                                 return;
    210                         }
    211 
    212                         Stats& stats = for_linkage[ decl->get_linkage() ];
    213 
    214                         ++stats.n_decls;
    215                         FunctionType* fnTy = decl->get_functionType();
    216                         const Type::ForallList& forall = fnTy->get_forall();
    217                         ++stats.n_type_params.at( forall.size() );
    218                         unsigned n_assns = 0;
    219                         for ( TypeDecl* fdecl : forall ) {
    220                                 n_assns += fdecl->get_assertions().size();
    221                                 for ( DeclarationWithType* assn : fdecl->get_assertions() ) {
    222                                         FunctionType *assnTy = 0;
    223                                         if ( ObjectDecl *assnObj = dynamic_cast<ObjectDecl*>(assn) ) {
    224                                                 if ( PointerType *ptrTy = dynamic_cast<PointerType*>(assnObj->get_type()) ) {
    225                                                         assnTy = dynamic_cast<FunctionType*>(ptrTy->get_base());
    226                                                 } else assnTy = dynamic_cast<FunctionType*>(assnObj->get_type());
    227                                         } else if ( FunctionDecl *assnDecl = dynamic_cast<FunctionDecl*>(assn) ) {
    228                                                 assnTy = assnDecl->get_functionType();
     367                        const std::string& mangleName = decl->get_mangleName().empty() ? decl->name : decl->get_mangleName();
     368                        if ( seen_names.insert( mangleName ).second ) {
     369                                Stats& stats = for_linkage[ ind_for_linkage[ decl->linkage ] ];
     370
     371                                ++stats.n_decls;
     372                                FunctionType* fnTy = decl->type;
     373                                const Type::ForallList& forall = fnTy->forall;
     374                                ++stats.n_type_params.at( forall.size() );
     375                                unsigned n_assns = 0;
     376                                for ( TypeDecl* fdecl : forall ) {
     377                                        n_assns += fdecl->assertions.size();
     378                                        for ( DeclarationWithType* assn : fdecl->assertions ) {
     379                                                FunctionType *assnTy = nullptr;
     380                                                if ( ObjectDecl *assnObj = dynamic_cast<ObjectDecl*>(assn) ) {
     381                                                        if ( PointerType *ptrTy = dynamic_cast<PointerType*>(assnObj->get_type()) ) {
     382                                                                assnTy = dynamic_cast<FunctionType*>(ptrTy->base);
     383                                                        } else assnTy = dynamic_cast<FunctionType*>(assnObj->get_type());
     384                                                } else if ( FunctionDecl *assnDecl = dynamic_cast<FunctionDecl*>(assn) ) {
     385                                                        assnTy = assnDecl->type;
     386                                                }
     387                                                if ( assnTy ) analyzeFunc( assnTy, stats, stats.assn_params, stats.assn_returns );
    229388                                        }
    230                                         if ( assnTy ) analyzeFunc( assnTy, stats, stats.assn_params, stats.assn_returns );
    231                                 }
    232                         }
    233                         ++stats.n_assns[ n_assns ];
    234 
    235                         ++stats.by_name[ decl->get_name() ];
    236 
    237                         analyzeFunc( fnTy, stats, stats.params, stats.returns );
    238 
    239                         // analyze expressions in decl statements
    240                         maybeAccept( decl->get_statements(), *this );
    241                 }
    242 
    243                 virtual void visit( UntypedExpr *expr ) {
     389                                }
     390                                ++stats.n_assns[ n_assns ];
     391                                ++stats.by_name[ decl->name ];
     392                                analyzeFunc( fnTy, stats, stats.params, stats.returns );
     393                        }
     394                }
     395
     396                void previsit( UntypedExpr *expr ) {
     397                        visit_children = false;
    244398                        analyzeExpr( expr, 0 );
    245399                }
     
    272426                template<typename F>
    273427                void printAllHisto( const std::string& name, F extract ) {
    274                         VectorMap<unsigned> histos[LinkageSpec::NoOfSpecs];
     428                        VectorMap<unsigned> histos[n_named_specs];
    275429                        VectorMap<unsigned> thisto;
    276430
    277431                        for ( const auto& entry : extract(total) ) { ++thisto.at( entry.second ); }
    278432
    279                         for ( unsigned i = 0; i < LinkageSpec::NoOfSpecs; ++i ) {
     433                        for ( unsigned i = 0; i < n_named_specs; ++i ) {
    280434                                // can't be a higher count in one of the sub-histograms than the total
    281435                                histos[i].reserve( thisto.size() );
     
    295449                template<typename F>
    296450                void printAllSparseHisto( const std::string& name, F extract ) {
    297                         std::map<unsigned, unsigned> histos[LinkageSpec::NoOfSpecs];
     451                        std::map<unsigned, unsigned> histos[n_named_specs];
    298452                        std::map<unsigned, unsigned> thisto;
    299453
    300454                        for ( const auto& entry : extract(total) ) { ++thisto[ entry.second ]; }
    301455
    302                         for ( unsigned i = 0; i < LinkageSpec::NoOfSpecs; ++i ) {
     456                        for ( unsigned i = 0; i < n_named_specs; ++i ) {
    303457                                for ( const auto& entry : extract(for_linkage[i]) ) { ++histos[i][entry.second]; }
    304458                        }
     
    307461                                const auto& key = entry.first;
    308462                                std::cout << "\"" << name << "\"," << key;
    309                                 for ( unsigned i = 0; i < LinkageSpec::NoOfSpecs; ++i ) {
     463                                for ( unsigned i = 0; i < n_named_specs; ++i ) {
    310464                                        auto it = histos[i].find( key );
    311465                                        if ( it == histos[i].end() ) std::cout << ",0";
     
    319473                void printAllPack( const std::string& name, F extract ) {
    320474                        printAllMap("n_basic_" + name, [&extract](const Stats& stats) { return extract(stats).n_basic; });
     475                        printAllMap("n_generic_" + name, [&extract](const Stats& stats) { return extract(stats).n_generic; });
    321476                        printAllMap("n_poly_" + name, [&extract](const Stats& stats) { return extract(stats).n_poly; });
     477                        printAllMap("n_compound_" + name, [&extract](const Stats& stats) { return extract(stats).n_compound; });
    322478                        printAllMap("n_" + name, [&extract](const Stats& stats) { return extract(stats).n; });
    323479                        printAllMap("%_basic_" + name, [&extract](const Stats& stats) { return extract(stats).p_basic; });
     480                        printAllMap("%_generic_" + name, [&extract](const Stats& stats) { return extract(stats).p_generic; });
    324481                        printAllMap("%_poly_" + name, [&extract](const Stats& stats) { return extract(stats).p_poly; });
     482                        printAllMap("%_compound_" + name, [&extract](const Stats& stats) { return extract(stats).p_compound; });
    325483                        printAllMap("n_distinct_types_" + name, [&extract](const Stats& stats) { return extract(stats).n_types; });
    326484                        printAllMap("%_new_types_in_" + name, [&extract](const Stats& stats) { return extract(stats).p_new; });
     
    342500                        }
    343501
    344                         std::cout << ",,\"intrinsic\",\"Cforall\",\"C\",\"autogen\",\"builtin\",\"TOTAL\"" << std::endl;
     502                        std::cout << ",,\"intrinsic\",\"Cforall\",\"C\",\"autogen\",\"compiler\",\"builtinCFA\",\"builtinC\",\"other\",\"TOTAL\"" << std::endl;
    345503
    346504                        printAllMap("n_type_params", [](const Stats& stats) { return stats.n_type_params; });
     505                        printAllMap("n_generic_params", [](const Stats& stats) { return stats.n_generic_params; });
     506                        printAllMap("n_generic_nesting", [](const Stats& stats) { return stats.n_generic_nesting; });
    347507                        printAll("n_decls", [](const Stats& stats) { return stats.n_decls; });
    348508                        printAll("unique_names", [](const Stats& stats) { return stats.by_name.size(); });
     
    351511                        printAllSparseHisto("basic_type_uses", [](const Stats& stats) { return stats.basic_type_names; });
    352512                        printAllSparseHisto("decls_using_basic_type", [](const Stats& stats) { return stats.basic_type_decls; });
     513                        printAll("generic_type_names", [](const Stats& stats) { return stats.generic_type_names.size(); });
     514                        printAllSparseHisto("generic_type_uses", [](const Stats& stats) { return stats.generic_type_names; });
     515                        printAllSparseHisto("decls_using_generic_type", [](const Stats& stats) { return stats.generic_type_decls; });
    353516                        printAll("compound_type_names", [](const Stats& stats) { return stats.compound_type_names.size(); });
    354517                        printAllSparseHisto("compound_type_uses", [](const Stats& stats) { return stats.compound_type_names; });
     
    365528        };
    366529
     530        const unsigned DeclStats::ind_for_linkage[]
     531                = { 7, 7, 2, 1,   7, 7, 7, 3,   4, 7, 6, 5,   7, 7, 7, 0 };
     532
    367533        void printDeclStats( std::list< Declaration * > &translationUnit ) {
    368                 DeclStats stats;
     534                PassVisitor<DeclStats> stats;
    369535                acceptAll( translationUnit, stats );
    370                 stats.print();
     536                stats.pass.print();
    371537        }
    372538
  • src/CodeTools/TrackLoc.cc

    rb158d8f rb6838214  
    6464                                }
    6565                                else {
    66                                         assertf( false, "Top level node has no CodeLocation %s", name.c_str() );
     66                                        assertf( false, "Top level node has no CodeLocation %s", toString( node ).c_str() );
    6767                                }
    6868                        }
  • src/Common/PassVisitor.h

    rb158d8f rb6838214  
    1919#include "SynTree/Expression.h"
    2020#include "SynTree/Constant.h"
    21 #include "SynTree/TypeSubstitution.h"
     21
     22class TypeSubstitution;
    2223
    2324#include "PassVisitor.proto.h"
     
    118119        virtual void visit( StmtExpr *  stmtExpr ) override final;
    119120        virtual void visit( UniqueExpr *  uniqueExpr ) override final;
     121        virtual void visit( UntypedInitExpr *  initExpr ) override final;
     122        virtual void visit( InitExpr *  initExpr ) override final;
    120123
    121124        virtual void visit( VoidType * basicType ) override final;
     
    210213        virtual Expression * mutate( StmtExpr *  stmtExpr ) override final;
    211214        virtual Expression * mutate( UniqueExpr *  uniqueExpr ) override final;
     215        virtual Expression * mutate( UntypedInitExpr *  initExpr ) override final;
     216        virtual Expression * mutate( InitExpr *  initExpr ) override final;
    212217
    213218        virtual Type * mutate( VoidType * basicType ) override final;
     
    403408};
    404409
     410#include "SynTree/TypeSubstitution.h"
    405411#include "PassVisitor.impl.h"
  • src/Common/PassVisitor.impl.h

    rb158d8f rb6838214  
    6262
    6363template< typename pass_type >
    64 static inline void acceptAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& visitor ) {
     64inline void acceptAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& visitor ) {
    6565        DeclList_t* beforeDecls = visitor.get_beforeDecls();
    6666        DeclList_t* afterDecls  = visitor.get_afterDecls();
     
    9090
    9191template< typename pass_type >
    92 static inline void mutateAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& mutator ) {
     92inline void mutateAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& mutator ) {
    9393        DeclList_t* beforeDecls = mutator.get_beforeDecls();
    9494        DeclList_t* afterDecls  = mutator.get_afterDecls();
     
    18531853}
    18541854
     1855//--------------------------------------------------------------------------
     1856// UntypedInitExpr
     1857template< typename pass_type >
     1858void PassVisitor< pass_type >::visit( UntypedInitExpr * node ) {
     1859        VISIT_START( node );
     1860
     1861        indexerScopedAccept( node->result, *this );
     1862        maybeAccept_impl   ( node->expr  , *this );
     1863        // not currently visiting initAlts, but this doesn't matter since this node is only used in the resolver.
     1864
     1865        VISIT_END( node );
     1866}
     1867
     1868template< typename pass_type >
     1869Expression * PassVisitor< pass_type >::mutate( UntypedInitExpr * node ) {
     1870        MUTATE_START( node );
     1871
     1872        indexerScopedMutate( node->env   , *this );
     1873        indexerScopedMutate( node->result, *this );
     1874        maybeMutate_impl   ( node->expr  , *this );
     1875        // not currently visiting initAlts, but this doesn't matter since this node is only used in the resolver.
     1876
     1877        MUTATE_END( Expression, node );
     1878}
     1879
     1880//--------------------------------------------------------------------------
     1881// InitExpr
     1882template< typename pass_type >
     1883void PassVisitor< pass_type >::visit( InitExpr * node ) {
     1884        VISIT_START( node );
     1885
     1886        indexerScopedAccept( node->result, *this );
     1887        maybeAccept_impl   ( node->expr  , *this );
     1888        maybeAccept_impl   ( node->designation, *this );
     1889
     1890        VISIT_END( node );
     1891}
     1892
     1893template< typename pass_type >
     1894Expression * PassVisitor< pass_type >::mutate( InitExpr * node ) {
     1895        MUTATE_START( node );
     1896
     1897        indexerScopedMutate( node->env   , *this );
     1898        indexerScopedMutate( node->result, *this );
     1899        maybeMutate_impl   ( node->expr  , *this );
     1900        maybeMutate_impl   ( node->designation, *this );
     1901
     1902        MUTATE_END( Expression, node );
     1903}
     1904
    18551905template< typename pass_type >
    18561906void PassVisitor< pass_type >::visit( VoidType * node ) {
  • src/Concurrency/Keywords.cc

    rb158d8f rb6838214  
    257257        // Generic keyword implementation
    258258        //=============================================================================================
     259        void fixupGenerics(FunctionType * func, StructDecl * decl) {
     260                cloneAll(decl->parameters, func->forall);
     261                for ( TypeDecl * td : func->forall ) {
     262                        strict_dynamic_cast<StructInstType*>(
     263                                func->parameters.front()->get_type()->stripReferences()
     264                        )->parameters.push_back(
     265                                new TypeExpr( new TypeInstType( noQualifiers, td->name, td ) )
     266                        );
     267                }
     268        }
     269
    259270        void ConcurrentSueKeyword::postvisit(StructDecl * decl) {
    260271                if( decl->name == type_name && decl->body ) {
     
    301312                );
    302313
    303                 get_type->get_parameters().push_back( this_decl );
     314                get_type->get_parameters().push_back( this_decl->clone() );
    304315                get_type->get_returnVals().push_back(
    305316                        new ObjectDecl(
     
    318329                        )
    319330                );
     331                fixupGenerics(get_type, decl);
    320332
    321333                FunctionDecl * get_decl = new FunctionDecl(
     
    343355                                nullptr
    344356                        );
    345                 }
     357                        fixupGenerics(main_type, decl);
     358                }
     359
     360                delete this_decl;
    346361
    347362                declsToAddBefore.push_back( forward );
     
    377392                                        new MemberExpr(
    378393                                                field,
    379                                                 UntypedExpr::createDeref( new VariableExpr( func->get_functionType()->get_parameters().front() ) )
     394                                                new CastExpr(
     395                                                        new VariableExpr( func->get_functionType()->get_parameters().front() ),
     396                                                        func->get_functionType()->get_parameters().front()->get_type()->stripReferences()->clone()
     397                                                )
    380398                                        )
    381399                                )
  • src/Concurrency/Waitfor.cc

    rb158d8f rb6838214  
    507507                        new ListInit({
    508508                                new SingleInit( new AddressExpr( new VariableExpr( index ) ) ),
    509                                 new SingleInit( new VariableExpr( acceptables ) ),
    510                                 new SingleInit( new ConstantExpr( Constant::from_ulong( count ) ) )
     509                                new ListInit({
     510                                        new SingleInit( new VariableExpr( acceptables ) ),
     511                                        new SingleInit( new ConstantExpr( Constant::from_ulong( count ) ) )
     512                                })
    511513                        })
    512514                );
  • src/ControlStruct/ExceptTranslate.cc

    rb158d8f rb6838214  
    316316                                VarExprReplacer::DeclMap mapping;
    317317                                mapping[ handler_decl ] = local_except;
    318                                 VarExprReplacer mapper( mapping );
    319                                 handler->get_body()->accept( mapper );
     318                                VarExprReplacer::replace( handler->body, mapping );
    320319                        }
    321320
  • src/ControlStruct/LabelFixer.cc

    rb158d8f rb6838214  
    4444
    4545        void LabelFixer::postvisit( FunctionDecl * functionDecl ) {
    46                 MLEMutator mlemut( resolveJumps(), generator );
     46                PassVisitor<MLEMutator> mlemut( resolveJumps(), generator );
    4747                functionDecl->acceptMutator( mlemut );
    4848        }
  • src/ControlStruct/MLEMutator.cc

    rb158d8f rb6838214  
    4646        void MLEMutator::fixBlock( std::list< Statement * > &kids ) {
    4747                for ( std::list< Statement * >::iterator k = kids.begin(); k != kids.end(); k++ ) {
    48                         *k = (*k)->acceptMutator(*this);
     48                        *k = (*k)->acceptMutator(*visitor);
    4949
    5050                        if ( ! get_breakLabel().empty() ) {
     
    5757        }
    5858
    59         CompoundStmt* MLEMutator::mutate( CompoundStmt *cmpndStmt ) {
    60                 bool labeledBlock = !(cmpndStmt->get_labels().empty());
     59        void MLEMutator::premutate( CompoundStmt *cmpndStmt ) {
     60                visit_children = false;
     61                bool labeledBlock = !(cmpndStmt->labels.empty());
    6162                if ( labeledBlock ) {
    6263                        Label brkLabel = generator->newLabel("blockBreak", cmpndStmt);
     
    6566
    6667                // a child statement may set the break label - if they do, attach it to the next statement
    67                 std::list< Statement * > &kids = cmpndStmt->get_kids();
     68                std::list< Statement * > &kids = cmpndStmt->kids;
    6869                fixBlock( kids );
    6970
     
    7576                        enclosingControlStructures.pop_back();
    7677                } // if
    77 
    78                 return cmpndStmt;
    79         }
    80 
    81         template< typename LoopClass >
    82         Statement *MLEMutator::handleLoopStmt( LoopClass *loopStmt ) {
    83                 // remember this as the most recent enclosing loop, then mutate the body of the loop -- this will determine
    84                 // whether brkLabel and contLabel are used with branch statements and will recursively do the same to nested
    85                 // loops
    86                 Label brkLabel = generator->newLabel("loopBreak", loopStmt);
    87                 Label contLabel = generator->newLabel("loopContinue", loopStmt);
    88                 enclosingControlStructures.push_back( Entry( loopStmt, brkLabel, contLabel ) );
    89                 loopStmt->set_body ( loopStmt->get_body()->acceptMutator( *this ) );
    90 
    91                 assert( ! enclosingControlStructures.empty() );
    92                 Entry &e = enclosingControlStructures.back();
    93                 // sanity check that the enclosing loops have been popped correctly
    94                 assert ( e == loopStmt );
    95 
    96                 // this will take the necessary steps to add definitions of the previous two labels, if they are used.
    97                 loopStmt->set_body( mutateLoop( loopStmt->get_body(), e ) );
    98                 enclosingControlStructures.pop_back();
    99 
    100                 return loopStmt;
    101         }
    102 
    103         Statement *MLEMutator::mutate( CaseStmt *caseStmt ) {
    104                 caseStmt->set_condition( maybeMutate( caseStmt->get_condition(), *this ) );
    105                 fixBlock( caseStmt->get_statements() );
    106 
    107                 return caseStmt;
    108         }
    109 
    110         template< typename IfClass >
    111         Statement *MLEMutator::handleIfStmt( IfClass *ifStmt ) {
    112                 // generate a label for breaking out of a labeled if
    113                 bool labeledBlock = !(ifStmt->get_labels().empty());
    114                 if ( labeledBlock ) {
    115                         Label brkLabel = generator->newLabel("blockBreak", ifStmt);
    116                         enclosingControlStructures.push_back( Entry( ifStmt, brkLabel ) );
    117                 } // if
    118 
    119                 Parent::mutate( ifStmt );
    120 
    121                 if ( labeledBlock ) {
    122                         if ( ! enclosingControlStructures.back().useBreakExit().empty() ) {
    123                                 set_breakLabel( enclosingControlStructures.back().useBreakExit() );
    124                         } // if
    125                         enclosingControlStructures.pop_back();
    126                 } // if
    127                 return ifStmt;
    128         }
    129 
    130         template< typename SwitchClass >
    131         Statement *MLEMutator::handleSwitchStmt( SwitchClass *switchStmt ) {
    132                 // generate a label for breaking out of a labeled switch
    133                 Label brkLabel = generator->newLabel("switchBreak", switchStmt);
    134                 enclosingControlStructures.push_back( Entry(switchStmt, brkLabel) );
    135                 mutateAll( switchStmt->get_statements(), *this );
    136 
    137                 Entry &e = enclosingControlStructures.back();
    138                 assert ( e == switchStmt );
    139 
    140                 // only generate break label if labeled break is used
    141                 if ( e.isBreakUsed() ) {
    142                         // for the purposes of keeping switch statements uniform (i.e. all statements that are direct children of a
    143                         // switch should be CastStmts), append the exit label + break to the last case statement; create a default
    144                         // case if there are no cases
    145                         std::list< Statement * > &statements = switchStmt->get_statements();
    146                         if ( statements.empty() ) {
    147                                 statements.push_back( CaseStmt::makeDefault() );
    148                         } // if
    149 
    150                         if ( CaseStmt * c = dynamic_cast< CaseStmt * >( statements.back() ) ) {
    151                                 Statement * stmt = new BranchStmt( Label("brkLabel"), BranchStmt::Break );
    152                                 stmt->labels.push_back( brkLabel );
    153                                 c->get_statements().push_back( stmt );
    154                         } else assert(0); // as of this point, all statements of a switch are still CaseStmts
    155                 } // if
    156 
    157                 assert ( enclosingControlStructures.back() == switchStmt );
    158                 enclosingControlStructures.pop_back();
    159                 return switchStmt;
    160         }
     78        }
     79
    16180
    16281        void addUnused( Statement * stmt, const Label & originalTarget ) {
     
    17998
    18099
    181         Statement *MLEMutator::mutate( BranchStmt *branchStmt ) throw ( SemanticError ) {
    182                 std::string originalTarget = branchStmt->get_originalTarget();
     100        Statement *MLEMutator::postmutate( BranchStmt *branchStmt ) throw ( SemanticError ) {
     101                std::string originalTarget = branchStmt->originalTarget;
    183102
    184103                std::list< Entry >::reverse_iterator targetEntry;
     
    215134                // branch error checks, get the appropriate label name and create a goto
    216135                Label exitLabel;
    217                 switch ( branchStmt->get_type() ) {
     136                switch ( branchStmt->type ) {
    218137                  case BranchStmt::Break:
    219138                                assert( targetEntry->useBreakExit() != "");
     
    229148
    230149                // add unused attribute to label to silence warnings
    231                 addUnused( targetEntry->get_controlStructure(), branchStmt->get_originalTarget() );
     150                addUnused( targetEntry->get_controlStructure(), branchStmt->originalTarget );
    232151
    233152                // transform break/continue statements into goto to simplify later handling of branches
     
    260179        }
    261180
    262         Statement *MLEMutator::mutate( WhileStmt *whileStmt ) {
    263                 return handleLoopStmt( whileStmt );
    264         }
    265 
    266         Statement *MLEMutator::mutate( ForStmt *forStmt ) {
    267                 return handleLoopStmt( forStmt );
    268         }
    269 
    270         Statement *MLEMutator::mutate( IfStmt *ifStmt ) {
    271                 return handleIfStmt( ifStmt );
    272         }
    273 
    274         Statement *MLEMutator::mutate( SwitchStmt *switchStmt ) {
    275                 return handleSwitchStmt( switchStmt );
     181        template< typename LoopClass >
     182        void MLEMutator::prehandleLoopStmt( LoopClass * loopStmt ) {
     183                // remember this as the most recent enclosing loop, then mutate the body of the loop -- this will determine
     184                // whether brkLabel and contLabel are used with branch statements and will recursively do the same to nested
     185                // loops
     186                Label brkLabel = generator->newLabel("loopBreak", loopStmt);
     187                Label contLabel = generator->newLabel("loopContinue", loopStmt);
     188                enclosingControlStructures.push_back( Entry( loopStmt, brkLabel, contLabel ) );
     189        }
     190
     191        template< typename LoopClass >
     192        Statement * MLEMutator::posthandleLoopStmt( LoopClass * loopStmt ) {
     193                assert( ! enclosingControlStructures.empty() );
     194                Entry &e = enclosingControlStructures.back();
     195                // sanity check that the enclosing loops have been popped correctly
     196                assert ( e == loopStmt );
     197
     198                // this will take the necessary steps to add definitions of the previous two labels, if they are used.
     199                loopStmt->set_body( mutateLoop( loopStmt->get_body(), e ) );
     200                enclosingControlStructures.pop_back();
     201                return loopStmt;
     202        }
     203
     204        void MLEMutator::premutate( WhileStmt * whileStmt ) {
     205                return prehandleLoopStmt( whileStmt );
     206        }
     207
     208        void MLEMutator::premutate( ForStmt * forStmt ) {
     209                return prehandleLoopStmt( forStmt );
     210        }
     211
     212        Statement * MLEMutator::postmutate( WhileStmt * whileStmt ) {
     213                return posthandleLoopStmt( whileStmt );
     214        }
     215
     216        Statement * MLEMutator::postmutate( ForStmt * forStmt ) {
     217                return posthandleLoopStmt( forStmt );
     218        }
     219
     220        void MLEMutator::premutate( IfStmt * ifStmt ) {
     221                // generate a label for breaking out of a labeled if
     222                bool labeledBlock = !(ifStmt->get_labels().empty());
     223                if ( labeledBlock ) {
     224                        Label brkLabel = generator->newLabel("blockBreak", ifStmt);
     225                        enclosingControlStructures.push_back( Entry( ifStmt, brkLabel ) );
     226                } // if
     227        }
     228
     229        Statement * MLEMutator::postmutate( IfStmt * ifStmt ) {
     230                bool labeledBlock = !(ifStmt->get_labels().empty());
     231                if ( labeledBlock ) {
     232                        if ( ! enclosingControlStructures.back().useBreakExit().empty() ) {
     233                                set_breakLabel( enclosingControlStructures.back().useBreakExit() );
     234                        } // if
     235                        enclosingControlStructures.pop_back();
     236                } // if
     237                return ifStmt;
     238        }
     239
     240        void MLEMutator::premutate( CaseStmt *caseStmt ) {
     241                visit_children = false;
     242                caseStmt->condition = maybeMutate( caseStmt->condition, *visitor );
     243                fixBlock( caseStmt->stmts );
     244        }
     245
     246        void MLEMutator::premutate( SwitchStmt *switchStmt ) {
     247                // generate a label for breaking out of a labeled switch
     248                Label brkLabel = generator->newLabel("switchBreak", switchStmt);
     249                enclosingControlStructures.push_back( Entry(switchStmt, brkLabel) );
     250        }
     251
     252        Statement * MLEMutator::postmutate( SwitchStmt * switchStmt ) {
     253                Entry &e = enclosingControlStructures.back();
     254                assert ( e == switchStmt );
     255
     256                // only generate break label if labeled break is used
     257                if ( e.isBreakUsed() ) {
     258                        // for the purposes of keeping switch statements uniform (i.e. all statements that are direct children of a
     259                        // switch should be CastStmts), append the exit label + break to the last case statement; create a default
     260                        // case if there are no cases
     261                        std::list< Statement * > &statements = switchStmt->statements;
     262                        if ( statements.empty() ) {
     263                                statements.push_back( CaseStmt::makeDefault() );
     264                        } // if
     265
     266                        if ( CaseStmt * c = dynamic_cast< CaseStmt * >( statements.back() ) ) {
     267                                Statement * stmt = new BranchStmt( Label("brkLabel"), BranchStmt::Break );
     268                                stmt->labels.push_back( e.useBreakExit() );
     269                                c->stmts.push_back( stmt );
     270                        } else assert(0); // as of this point, all statements of a switch are still CaseStmts
     271                } // if
     272
     273                assert ( enclosingControlStructures.back() == switchStmt );
     274                enclosingControlStructures.pop_back();
     275                return switchStmt;
    276276        }
    277277} // namespace ControlStruct
  • src/ControlStruct/MLEMutator.h

    rb158d8f rb6838214  
    2020#include <string>                  // for string
    2121
     22#include "Common/PassVisitor.h"
    2223#include "Common/SemanticError.h"  // for SemanticError
    2324#include "SynTree/Label.h"         // for Label
     
    2627
    2728namespace ControlStruct {
    28 class LabelGenerator;
     29        class LabelGenerator;
    2930
    30         class MLEMutator : public Mutator {
     31        class MLEMutator : public WithVisitorRef<MLEMutator>, public WithShortCircuiting {
    3132                class Entry;
    3233
    33                 typedef Mutator Parent;
    3434          public:
    3535                MLEMutator( std::map<Label, Statement *> *t, LabelGenerator *gen = 0 ) : targetTable( t ), breakLabel(std::string("")), generator( gen ) {}
    3636                ~MLEMutator();
    3737
    38                 virtual CompoundStmt *mutate( CompoundStmt *cmpndStmt ) override;
    39                 virtual Statement *mutate( WhileStmt *whileStmt ) override;
    40                 virtual Statement *mutate( ForStmt *forStmt ) override;
    41                 virtual Statement *mutate( BranchStmt *branchStmt ) throw ( SemanticError ) override;
    42                 virtual Statement *mutate( CaseStmt *caseStmt ) override;
    43                 virtual Statement *mutate( IfStmt *ifStmt ) override;
    44                 virtual Statement *mutate( SwitchStmt *switchStmt ) override;
     38                void premutate( CompoundStmt *cmpndStmt );
     39                Statement * postmutate( BranchStmt *branchStmt ) throw ( SemanticError );
     40                void premutate( WhileStmt *whileStmt );
     41                Statement * postmutate( WhileStmt *whileStmt );
     42                void premutate( ForStmt *forStmt );
     43                Statement * postmutate( ForStmt *forStmt );
     44                void premutate( CaseStmt *caseStmt );
     45                void premutate( IfStmt *ifStmt );
     46                Statement * postmutate( IfStmt *ifStmt );
     47                void premutate( SwitchStmt *switchStmt );
     48                Statement * postmutate( SwitchStmt *switchStmt );
    4549
    4650                Statement *mutateLoop( Statement *bodyLoop, Entry &e );
     
    5458                                loop( _loop ), breakExit( _breakExit ), contExit( _contExit ), breakUsed(false), contUsed(false) {}
    5559
    56                         bool operator==( const Statement *stmt ) { return ( loop == stmt ); }
    57                         bool operator!=( const Statement *stmt ) { return ( loop != stmt ); }
     60                        bool operator==( const Statement *stmt ) { return loop == stmt; }
     61                        bool operator!=( const Statement *stmt ) { return loop != stmt; }
    5862
    59                         bool operator==( const Entry &other ) { return ( loop == other.get_controlStructure() ); }
     63                        bool operator==( const Entry &other ) { return loop == other.get_controlStructure(); }
    6064
    6165                        Statement *get_controlStructure() const { return loop; }
     
    7882
    7983                template< typename LoopClass >
    80                 Statement *handleLoopStmt( LoopClass *loopStmt );
     84                void prehandleLoopStmt( LoopClass * loopStmt );
    8185
    82                 template< typename IfClass >
    83                 Statement *handleIfStmt( IfClass *switchStmt );
    84 
    85                 template< typename SwitchClass >
    86                 Statement *handleSwitchStmt( SwitchClass *switchStmt );
     86                template< typename LoopClass >
     87                Statement * posthandleLoopStmt( LoopClass * loopStmt );
    8788
    8889                void fixBlock( std::list< Statement * > &kids );
  • src/GenPoly/Box.cc

    rb158d8f rb6838214  
    302302        Expression *makeOp( const std::string &name, Expression *arg ) {
    303303                UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) );
    304                 expr->get_args().push_back( arg );
     304                expr->args.push_back( arg );
    305305                return expr;
    306306        }
     
    309309        Expression *makeOp( const std::string &name, Expression *lhs, Expression *rhs ) {
    310310                UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) );
    311                 expr->get_args().push_back( lhs );
    312                 expr->get_args().push_back( rhs );
     311                expr->args.push_back( lhs );
     312                expr->args.push_back( rhs );
    313313                return expr;
    314314        }
     
    316316        /// Returns the dereference of a local pointer variable
    317317        Expression *derefVar( ObjectDecl *var ) {
    318                 return makeOp( "*?", new VariableExpr( var ) );
     318                return UntypedExpr::createDeref( new VariableExpr( var ) );
    319319        }
    320320
     
    831831                                if ( ! isPolyType( arg->get_type() ) ) {
    832832                                        UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) );
    833                                         deref->get_args().push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
    834                                         deref->set_result( arg->get_type()->clone() );
     833                                        deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) );
     834                                        deref->result = arg->get_type()->clone();
     835                                        deref->result->set_lvalue( true );
    835836                                        return deref;
    836837                                } // if
  • src/GenPoly/Lvalue.cc

    rb158d8f rb6838214  
    4747                        if ( SymTab::dereferenceOperator ) {
    4848                                VariableExpr * deref = new VariableExpr( SymTab::dereferenceOperator );
    49                                 deref->set_result( new PointerType( Type::Qualifiers(), deref->get_result() ) );
    50                                 Type * base = InitTweak::getPointerBase( arg->get_result() );
    51                                 assertf( base, "expected pointer type in dereference (type was %s)", toString( arg->get_result() ).c_str() );
     49                                deref->result = new PointerType( Type::Qualifiers(), deref->result );
     50                                Type * base = InitTweak::getPointerBase( arg->result );
     51                                assertf( base, "expected pointer type in dereference (type was %s)", toString( arg->result ).c_str() );
    5252                                ApplicationExpr * ret = new ApplicationExpr( deref, { arg } );
    53                                 delete ret->get_result();
    54                                 ret->set_result( base->clone() );
    55                                 ret->get_result()->set_lvalue( true );
     53                                delete ret->result;
     54                                ret->result = base->clone();
     55                                ret->result->set_lvalue( true );
    5656                                return ret;
    5757                        } else {
     
    308308                                        int diff = depth1-depth2;
    309309                                        if ( diff == 0 ) {
     310                                                // conversion between references of the same depth
    310311                                                assertf( depth1 == depth2, "non-intrinsic reference with cast of reference to reference not yet supported: %d %d %s", depth1, depth2, toString( castExpr ).c_str() );
    311312                                                PRINT( std::cerr << castExpr << std::endl; )
    312313                                                return castExpr;
    313314                                        } else if ( diff < 0 ) {
    314                                                 Expression * ret = castExpr->get_arg();
     315                                                // conversion from reference to reference with less depth (e.g. int && -> int &): add dereferences
     316                                                Expression * ret = castExpr->arg;
    315317                                                for ( int i = 0; i < diff; ++i ) {
    316318                                                        ret = mkDeref( ret );
    317319                                                }
    318                                                 ret->set_env( castExpr->get_env() );
    319                                                 delete ret->get_result();
    320                                                 ret->set_result( castExpr->get_result() );
    321                                                 castExpr->set_env( nullptr );
    322                                                 castExpr->set_arg( nullptr );
    323                                                 castExpr->set_result( nullptr );
     320                                                ret->env = castExpr->env;
     321                                                delete ret->result;
     322                                                ret->result = castExpr->result;
     323                                                ret->result->set_lvalue( true ); // ensure result is lvalue
     324                                                castExpr->env = nullptr;
     325                                                castExpr->arg = nullptr;
     326                                                castExpr->result = nullptr;
    324327                                                delete castExpr;
    325328                                                return ret;
    326329                                        } else if ( diff > 0 ) {
    327                                                 Expression * ret = castExpr->get_arg();
     330                                                // conversion from reference to reference with more depth (e.g. int & -> int &&): add address-of
     331                                                Expression * ret = castExpr->arg;
    328332                                                for ( int i = 0; i < diff; ++i ) {
    329333                                                        ret = new AddressExpr( ret );
    330334                                                }
    331                                                 ret->set_env( castExpr->get_env() );
    332                                                 delete ret->get_result();
    333                                                 ret->set_result( castExpr->get_result() );
    334                                                 castExpr->set_env( nullptr );
    335                                                 castExpr->set_arg( nullptr );
    336                                                 castExpr->set_result( nullptr );
     335                                                ret->env = castExpr->env;
     336                                                delete ret->result;
     337                                                ret->result = castExpr->result;
     338                                                castExpr->env = nullptr;
     339                                                castExpr->arg = nullptr;
     340                                                castExpr->result = nullptr;
    337341                                                delete castExpr;
    338342                                                return ret;
     
    342346                                        PRINT( std::cerr << castExpr << std::endl; )
    343347                                        return castExpr;
    344                                 } else if ( castExpr->get_arg()->get_result()->get_lvalue() ) {
     348                                } else if ( castExpr->arg->result->get_lvalue() ) {
    345349                                        // conversion from lvalue to reference
    346350                                        // xxx - keep cast, but turn into pointer cast??
     
    348352                                        PRINT(
    349353                                                std::cerr << "convert lvalue to reference -- &" << std::endl;
    350                                                 std::cerr << castExpr->get_arg() << std::endl;
     354                                                std::cerr << castExpr->arg << std::endl;
    351355                                        )
    352                                         AddressExpr * ret = new AddressExpr( castExpr->get_arg() );
    353                                         if ( refType->get_base()->get_qualifiers() != castExpr->get_arg()->get_result()->get_qualifiers() ) {
     356                                        AddressExpr * ret = new AddressExpr( castExpr->arg );
     357                                        if ( refType->base->get_qualifiers() != castExpr->arg->result->get_qualifiers() ) {
    354358                                                // must keep cast if cast-to type is different from the actual type
    355                                                 castExpr->set_arg( ret );
     359                                                castExpr->arg = ret;
    356360                                                return castExpr;
    357361                                        }
    358                                         ret->set_env( castExpr->get_env() );
    359                                         delete ret->get_result();
    360                                         ret->set_result( castExpr->get_result() );
    361                                         castExpr->set_env( nullptr );
    362                                         castExpr->set_arg( nullptr );
    363                                         castExpr->set_result( nullptr );
     362                                        ret->env = castExpr->env;
     363                                        delete ret->result;
     364                                        ret->result = castExpr->result;
     365                                        castExpr->env = nullptr;
     366                                        castExpr->arg = nullptr;
     367                                        castExpr->result = nullptr;
    364368                                        delete castExpr;
    365369                                        return ret;
     
    368372                                }
    369373                                assertf( false, "Only conversions to reference from lvalue are currently supported: %s", toString( castExpr ).c_str() );
    370                         } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * >( castExpr->get_arg()->get_result() ) ) {
     374                        } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * >( castExpr->arg->result ) ) {
    371375                                (void)refType;
    372376                                // conversion from reference to rvalue
     
    375379                                        std::cerr << "was = " << castExpr << std::endl;
    376380                                )
    377                                 Expression * ret = castExpr->get_arg();
    378                                 TypeSubstitution * env = castExpr->get_env();
     381                                Expression * ret = castExpr->arg;
     382                                TypeSubstitution * env = castExpr->env;
    379383                                castExpr->set_env( nullptr );
    380384                                if ( ! isIntrinsicReference( ret ) ) {
     
    382386                                        ret = mkDeref( ret );
    383387                                }
    384                                 if ( ResolvExpr::typesCompatibleIgnoreQualifiers( castExpr->get_result(), castExpr->get_arg()->get_result()->stripReferences(), SymTab::Indexer() ) ) {
     388                                if ( ResolvExpr::typesCompatibleIgnoreQualifiers( castExpr->result, castExpr->arg->result->stripReferences(), SymTab::Indexer() ) ) {
    385389                                        // can remove cast if types are compatible, changing expression type to value type
    386                                         ret->set_result( castExpr->get_result()->clone() );
    387                                         castExpr->set_arg( nullptr );
     390                                        ret->result = castExpr->result->clone();
     391                                        ret->result->set_lvalue( true );  // ensure result is lvalue
     392                                        castExpr->arg = nullptr;
    388393                                        delete castExpr;
    389394                                } else {
    390395                                        // must keep cast if types are different
    391                                         castExpr->set_arg( ret );
     396                                        castExpr->arg = ret;
    392397                                        ret = castExpr;
    393398                                }
  • src/GenPoly/ScrubTyVars.cc

    rb158d8f rb6838214  
    2525
    2626namespace GenPoly {
    27         Type * ScrubTyVars::mutate( TypeInstType *typeInst ) {
     27        Type * ScrubTyVars::postmutate( TypeInstType * typeInst ) {
    2828                if ( ! tyVars ) {
    2929                        if ( typeInst->get_isFtype() ) {
     
    3131                                return new PointerType( Type::Qualifiers(), new FunctionType( Type::Qualifiers(), true ) );
    3232                        } else {
    33                                 PointerType *ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
     33                                PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
    3434                                delete typeInst;
    3535                                return ret;
     
    3737                }
    3838
    39                 TyVarMap::const_iterator tyVar = tyVars->find( typeInst->get_name() );
     39                TyVarMap::const_iterator tyVar = tyVars->find( typeInst->name );
    4040                if ( tyVar != tyVars->end() ) {
    4141                        switch ( tyVar->second.kind ) {
     
    4343                          case TypeDecl::Ttype:
    4444                                {
    45                                         PointerType *ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
     45                                        PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );
    4646                                        delete typeInst;
    4747                                        return ret;
     
    5555        }
    5656
    57         Type * ScrubTyVars::mutateAggregateType( Type *ty ) {
     57        Type * ScrubTyVars::mutateAggregateType( Type * ty ) {
    5858                if ( shouldScrub( ty ) ) {
    59                         PointerType *ret = new PointerType( Type::Qualifiers(), new VoidType( ty->get_qualifiers() ) );
     59                        PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( ty->get_qualifiers() ) );
    6060                        delete ty;
    6161                        return ret;
     
    6464        }
    6565
    66         Type * ScrubTyVars::mutate( StructInstType *structInst ) {
     66        Type * ScrubTyVars::postmutate( StructInstType * structInst ) {
    6767                return mutateAggregateType( structInst );
    6868        }
    6969
    70         Type * ScrubTyVars::mutate( UnionInstType *unionInst ) {
     70        Type * ScrubTyVars::postmutate( UnionInstType * unionInst ) {
    7171                return mutateAggregateType( unionInst );
    7272        }
    7373
    74         Expression * ScrubTyVars::mutate( SizeofExpr *szeof ) {
     74        void ScrubTyVars::primeBaseScrub( Type * type ) {
     75                // need to determine whether type needs to be scrubbed to determine whether
     76                // automatic recursion is necessary
     77                if ( Type * t = shouldScrub( type ) ) {
     78                        visit_children = false;
     79                        GuardValue( dynType );
     80                        dynType = t;
     81                }
     82        }
     83
     84        Expression * ScrubTyVars::postmutate( SizeofExpr * szeof ) {
    7585                // sizeof( T ) => _sizeof_T parameter, which is the size of T
    76                 if ( Type *dynType = shouldScrub( szeof->get_type() ) ) {
     86                if ( dynType ) {
    7787                        Expression *expr = new NameExpr( sizeofName( mangleType( dynType ) ) );
    7888                        return expr;
    79                 } else {
    80                         return Mutator::mutate( szeof );
    8189                } // if
     90                return szeof;
    8291        }
    8392
    84         Expression * ScrubTyVars::mutate( AlignofExpr *algnof ) {
     93        Expression * ScrubTyVars::postmutate( AlignofExpr * algnof ) {
    8594                // alignof( T ) => _alignof_T parameter, which is the alignment of T
    86                 if ( Type *dynType = shouldScrub( algnof->get_type() ) ) {
     95                if ( dynType ) {
    8796                        Expression *expr = new NameExpr( alignofName( mangleType( dynType ) ) );
    8897                        return expr;
    89                 } else {
    90                         return Mutator::mutate( algnof );
    9198                } // if
     99                return algnof;
    92100        }
    93101
    94         Type * ScrubTyVars::mutate( PointerType *pointer ) {
    95 //              // special case of shouldScrub that takes all TypeInstType pointer bases, even if they're not dynamic
    96 //              Type *base = pointer->get_base();
    97 //              Type *dynType = 0;
    98 //              if ( dynamicOnly ) {
    99 //                      if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( base ) ) {
    100 //                              if ( tyVars.find( typeInst->get_name() ) != tyVars.end() ) { dynType = typeInst; }
    101 //                      } else {
    102 //                              dynType = isDynType( base, tyVars );
    103 //                      }
    104 //              } else {
    105 //                      dynType = isPolyType( base, tyVars );
    106 //              }
    107 //              if ( dynType ) {
    108                 if ( Type *dynType = shouldScrub( pointer->get_base() ) ) {
    109                         Type *ret = dynType->acceptMutator( *this );
     102        Type * ScrubTyVars::postmutate( PointerType * pointer ) {
     103                if ( dynType ) {
     104                        Type * ret = dynType->acceptMutator( *visitor );
    110105                        ret->get_qualifiers() |= pointer->get_qualifiers();
    111                         pointer->set_base( 0 );
     106                        pointer->base = nullptr;
    112107                        delete pointer;
    113108                        return ret;
    114109                }
    115                 return Mutator::mutate( pointer );
     110                return pointer;
    116111        }
    117112} // namespace GenPoly
  • src/GenPoly/ScrubTyVars.h

    rb158d8f rb6838214  
    1818#include <cassert>            // for assert
    1919
     20#include "Common/PassVisitor.h"
    2021#include "GenPoly.h"          // for TyVarMap, isPolyType, isDynType
    2122#include "SynTree/Mutator.h"  // for Mutator
     
    2728
    2829namespace GenPoly {
    29         class ScrubTyVars : public Mutator {
     30        struct ScrubTyVars : public WithVisitorRef<ScrubTyVars>, public WithShortCircuiting, public WithGuards {
    3031                /// Whether to scrub all type variables from the provided map, dynamic type variables from the provided map, or all type variables
    3132                enum ScrubMode { FromMap, DynamicFromMap, All };
     
    5152                static SynTreeClass *scrubAll( SynTreeClass *target );
    5253
    53                 virtual Type* mutate( TypeInstType *typeInst );
    54                 virtual Type* mutate( StructInstType *structInst );
    55                 virtual Type* mutate( UnionInstType *unionInst );
    56                 virtual Expression* mutate( SizeofExpr *szeof );
    57                 virtual Expression* mutate( AlignofExpr *algnof );
    58                 virtual Type* mutate( PointerType *pointer );
     54                /// determine if children should be visited based on whether base type should be scrubbed.
     55                void primeBaseScrub( Type * );
     56
     57                void premutate( TypeInstType * ) { visit_children = false; }
     58                void premutate( StructInstType * ) { visit_children = false; }
     59                void premutate( UnionInstType * ) { visit_children = false; }
     60                void premutate( SizeofExpr * szeof ) { primeBaseScrub( szeof->type ); }
     61                void premutate( AlignofExpr * algnof ) { primeBaseScrub( algnof->type ); }
     62                void premutate( PointerType * pointer ) { primeBaseScrub( pointer->base ); }
     63
     64                Type * postmutate( TypeInstType * typeInst );
     65                Type * postmutate( StructInstType * structInst );
     66                Type * postmutate( UnionInstType * unionInst );
     67                Expression * postmutate( SizeofExpr * szeof );
     68                Expression * postmutate( AlignofExpr * algnof );
     69                Type * postmutate( PointerType * pointer );
    5970
    6071          private:
     
    7586                const TyVarMap *tyVars;  ///< Type variables to scrub
    7687                ScrubMode mode;          ///< which type variables to scrub? [FromMap]
     88
     89                Type * dynType = nullptr; ///< result of shouldScrub
    7790        };
    7891
    7992        template< typename SynTreeClass >
    8093        SynTreeClass * ScrubTyVars::scrub( SynTreeClass *target, const TyVarMap &tyVars ) {
    81                 ScrubTyVars scrubber( tyVars );
     94                PassVisitor<ScrubTyVars> scrubber( tyVars );
    8295                return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) );
    8396        }
     
    8598        template< typename SynTreeClass >
    8699        SynTreeClass * ScrubTyVars::scrubDynamic( SynTreeClass *target, const TyVarMap &tyVars ) {
    87                 ScrubTyVars scrubber( tyVars, ScrubTyVars::DynamicFromMap );
     100                PassVisitor<ScrubTyVars> scrubber( tyVars, ScrubTyVars::DynamicFromMap );
    88101                return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) );
    89102        }
     
    91104        template< typename SynTreeClass >
    92105        SynTreeClass * ScrubTyVars::scrubAll( SynTreeClass *target ) {
    93                 ScrubTyVars scrubber;
     106                PassVisitor<ScrubTyVars> scrubber;
    94107                return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) );
    95108        }
  • src/InitTweak/FixInit.cc

    rb158d8f rb6838214  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // FixInit.h --
     7// FixInit.cc --
    88//
    99// Author           : Rob Schluntz
     
    197197                };
    198198
    199                 struct GenStructMemberCalls final : public WithGuards, public WithShortCircuiting, public WithIndexer {
     199                struct GenStructMemberCalls final : public WithGuards, public WithShortCircuiting, public WithIndexer, public WithVisitorRef<GenStructMemberCalls> {
    200200                        /// generate default/copy ctor and dtor calls for user-defined struct ctor/dtors
    201201                        /// for any member that is missing a corresponding ctor/dtor call.
     
    203203                        static void generate( std::list< Declaration * > & translationUnit );
    204204
    205                         void previsit( FunctionDecl * funcDecl );
    206                         void postvisit( FunctionDecl * funcDecl );
    207 
    208                         void previsit( MemberExpr * memberExpr );
    209                         void previsit( ApplicationExpr * appExpr );
     205                        void premutate( FunctionDecl * funcDecl );
     206                        DeclarationWithType * postmutate( FunctionDecl * funcDecl );
     207
     208                        void premutate( MemberExpr * memberExpr );
     209                        void premutate( ApplicationExpr * appExpr );
     210
     211                        /// Note: this post mutate used to be in a separate visitor. If this pass breaks, one place to examine is whether it is
     212                        /// okay for this part of the recursion to occur alongside the rest.
     213                        Expression * postmutate( UntypedExpr * expr );
    210214
    211215                        SemanticError errors;
     
    220224                        bool isCtor = false; // true if current function is a constructor
    221225                        StructDecl * structDecl = nullptr;
    222                 };
    223 
    224                 // very simple resolver-like mutator class - used to
    225                 // resolve UntypedExprs that are found within newly
    226                 // generated constructor/destructor calls
    227                 class MutatingResolver final : public Mutator {
    228                   public:
    229                         MutatingResolver( SymTab::Indexer & indexer ) : indexer( indexer ) {}
    230 
    231                         using Mutator::mutate;
    232                         virtual DeclarationWithType* mutate( ObjectDecl *objectDecl ) override;
    233                         virtual Expression* mutate( UntypedExpr *untypedExpr ) override;
    234 
    235                   private:
    236                         SymTab::Indexer & indexer;
    237226                };
    238227
     
    315304                void GenStructMemberCalls::generate( std::list< Declaration * > & translationUnit ) {
    316305                        PassVisitor<GenStructMemberCalls> warner;
    317                         acceptAll( translationUnit, warner );
     306                        mutateAll( translationUnit, warner );
    318307                }
    319308
     
    365354                        // arrays are not copy constructed, so this should always be an ExprStmt
    366355                        ImplicitCtorDtorStmt * stmt = genCtorDtor( fname, var, cpArg );
    367                         ExprStmt * exprStmt = strict_dynamic_cast< ExprStmt * >( stmt->get_callStmt() );
     356                        assertf( stmt, "ResolveCopyCtors: genCtorDtor returned nullptr: %s / %s / %s", fname.c_str(), toString( var ).c_str(), toString( cpArg ).c_str() );
     357                        ExprStmt * exprStmt = strict_dynamic_cast< ExprStmt * >( stmt->callStmt );
    368358                        Expression * resolved = exprStmt->expr;
    369359                        exprStmt->expr = nullptr; // take ownership of expr
     
    382372                        } // if
    383373                        delete stmt;
     374                        if ( TupleAssignExpr * assign = dynamic_cast< TupleAssignExpr * >( resolved ) ) {
     375                                // fix newly generated StmtExpr
     376                                postvisit( assign->stmtExpr );
     377                        }
    384378                        return resolved;
    385379                }
     
    475469                                static UniqueName retNamer("_tmp_stmtexpr_ret");
    476470
    477                                 // create variable that will hold the result of the stmt expr
    478471                                result = result->clone();
    479472                                env->apply( result );
     473                                if ( ! InitTweak::isConstructable( result ) ) {
     474                                        delete result;
     475                                        return;
     476                                }
     477
     478                                // create variable that will hold the result of the stmt expr
    480479                                ObjectDecl * ret = ObjectDecl::newObject( retNamer.newName(), result, nullptr );
    481                                 ret->get_type()->set_const( false );
    482                                 stmtExpr->get_returnDecls().push_front( ret );
     480                                ret->type->set_const( false );
     481                                stmtExpr->returnDecls.push_front( ret );
    483482
    484483                                // must have a non-empty body, otherwise it wouldn't have a result
    485                                 CompoundStmt * body = stmtExpr->get_statements();
     484                                CompoundStmt * body = stmtExpr->statements;
    486485                                assert( ! body->get_kids().empty() );
    487486                                // must be an ExprStmt, otherwise it wouldn't have a result
    488487                                ExprStmt * last = strict_dynamic_cast< ExprStmt * >( body->get_kids().back() );
    489                                 last->set_expr( makeCtorDtor( "?{}", ret, last->get_expr() ) );
    490 
    491                                 stmtExpr->get_dtors().push_front( makeCtorDtor( "^?{}", ret ) );
     488                                last->expr = makeCtorDtor( "?{}", ret, last->get_expr() );
     489
     490                                stmtExpr->dtors.push_front( makeCtorDtor( "^?{}", ret ) );
    492491                        } // if
    493492                }
     
    590589                        // to the outer context, rather than inside of the statement expression.
    591590                        visit_children = false;
    592                         std::list< Statement * > & stmts = stmtExpr->get_statements()->get_kids();
     591                        std::list< Statement * > & stmts = stmtExpr->statements->get_kids();
    593592                        for ( Statement *& stmt : stmts ) {
    594593                                stmt = stmt->acceptMutator( *visitor );
    595594                        } // for
    596                         assert( stmtExpr->get_result() );
    597                         Type * result = stmtExpr->get_result();
     595                        assert( stmtExpr->result );
     596                        Type * result = stmtExpr->result;
    598597                        if ( ! result->isVoid() ) {
    599                                 for ( ObjectDecl * obj : stmtExpr->get_returnDecls() ) {
     598                                for ( ObjectDecl * obj : stmtExpr->returnDecls ) {
    600599                                        stmtsToAddBefore.push_back( new DeclStmt( obj ) );
    601600                                } // for
    602601                                // add destructors after current statement
    603                                 for ( Expression * dtor : stmtExpr->get_dtors() ) {
     602                                for ( Expression * dtor : stmtExpr->dtors ) {
    604603                                        stmtsToAddAfter.push_back( new ExprStmt( dtor ) );
    605604                                } // for
    606605                                // must have a non-empty body, otherwise it wouldn't have a result
    607606                                assert( ! stmts.empty() );
    608                                 assert( ! stmtExpr->get_returnDecls().empty() );
    609                                 stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->get_returnDecls().front() ) ) );
    610                                 stmtExpr->get_returnDecls().clear();
    611                                 stmtExpr->get_dtors().clear();
    612                         }
    613                         assert( stmtExpr->get_returnDecls().empty() );
    614                         assert( stmtExpr->get_dtors().empty() );
     607                                assertf( ! stmtExpr->returnDecls.empty() || stmtExpr->dtors.empty(), "StmtExpr returns non-void, but no return decls: %s", toString( stmtExpr ).c_str() );
     608                                // if there is a return decl, add a use as the last statement; will not have return decl on non-constructable returns
     609                                if ( ! stmtExpr->returnDecls.empty() ) {
     610                                        stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->returnDecls.front() ) ) );
     611                                }
     612                                stmtExpr->returnDecls.clear();
     613                                stmtExpr->dtors.clear();
     614                        }
     615                        assert( stmtExpr->returnDecls.empty() );
     616                        assert( stmtExpr->dtors.empty() );
    615617                }
    616618
     
    937939                }
    938940
    939                 void GenStructMemberCalls::previsit( FunctionDecl * funcDecl ) {
     941                void GenStructMemberCalls::premutate( FunctionDecl * funcDecl ) {
    940942                        GuardValue( function );
    941943                        GuardValue( unhandled );
     
    971973                }
    972974
    973                 void GenStructMemberCalls::postvisit( FunctionDecl * funcDecl ) {
     975                DeclarationWithType * GenStructMemberCalls::postmutate( FunctionDecl * funcDecl ) {
    974976                        // remove the unhandled objects from usedUninit, because a call is inserted
    975977                        // to handle them - only objects that are later constructed are used uninitialized.
     
    10251027                                                Statement * callStmt = stmt.front();
    10261028
    1027                                                 MutatingResolver resolver( indexer );
    10281029                                                try {
    1029                                                         callStmt->acceptMutator( resolver );
     1030                                                        callStmt->acceptMutator( *visitor );
    10301031                                                        if ( isCtor ) {
    10311032                                                                function->get_statements()->push_front( callStmt );
     
    10431044                                throw errors;
    10441045                        }
     1046                        return funcDecl;
    10451047                }
    10461048
     
    10681070                }
    10691071
    1070                 void GenStructMemberCalls::previsit( ApplicationExpr * appExpr ) {
     1072                void GenStructMemberCalls::premutate( ApplicationExpr * appExpr ) {
    10711073                        if ( ! checkWarnings( function ) ) {
    10721074                                visit_children = false;
     
    10931095                }
    10941096
    1095                 void GenStructMemberCalls::previsit( MemberExpr * memberExpr ) {
     1097                void GenStructMemberCalls::premutate( MemberExpr * memberExpr ) {
    10961098                        if ( ! checkWarnings( function ) || ! isCtor ) {
    10971099                                visit_children = false;
     
    11211123                }
    11221124
    1123                 DeclarationWithType * MutatingResolver::mutate( ObjectDecl * objectDecl ) {
    1124                         // add object to the indexer assumes that there will be no name collisions
    1125                         // in generated code. If this changes, add mutate methods for entities with
    1126                         // scope and call {enter,leave}Scope explicitly.
    1127                         indexer.addId( objectDecl );
    1128                         return objectDecl;
    1129                 }
    1130 
    1131                 Expression * MutatingResolver::mutate( UntypedExpr * untypedExpr ) {
     1125                Expression * GenStructMemberCalls::postmutate( UntypedExpr * untypedExpr ) {
    11321126                        Expression * newExpr = untypedExpr;
    11331127                        ResolvExpr::findVoidExpression( newExpr, indexer );
  • src/InitTweak/GenInit.cc

    rb158d8f rb6838214  
    3030#include "InitTweak.h"             // for isConstExpr, InitExpander, checkIn...
    3131#include "Parser/LinkageSpec.h"    // for isOverridable, C
     32#include "ResolvExpr/Resolver.h"
    3233#include "SymTab/Autogen.h"        // for genImplicitCall, SizeType
    3334#include "SymTab/Mangler.h"        // for Mangler
     
    4041#include "SynTree/Type.h"          // for Type, ArrayType, Type::Qualifiers
    4142#include "SynTree/Visitor.h"       // for acceptAll, maybeAccept
     43#include "Tuples/Tuples.h"         // for maybeImpure
    4244
    4345namespace InitTweak {
     
    8991        };
    9092
    91         struct HoistArrayDimension final : public WithDeclsToAdd, public WithShortCircuiting, public WithGuards {
     93        struct HoistArrayDimension final : public WithDeclsToAdd, public WithShortCircuiting, public WithGuards, public WithIndexer {
    9294                /// hoist dimension from array types in object declaration so that it uses a single
    9395                /// const variable of type size_t, so that side effecting array dimensions are only
     
    104106                void premutate( FunctionType * ) { visit_children = false; }
    105107
     108                // need this so that enumerators are added to the indexer, due to premutate(AggregateDecl *)
     109                void premutate( EnumDecl * ) {}
     110
    106111                void hoist( Type * type );
    107112
     
    135140                                if ( varExpr->var == retVal ) return;
    136141                        }
    137                         stmtsToAddBefore.push_back( genCtorDtor( "?{}", retVal, returnStmt->get_expr() ) );
     142                        Statement * stmt = genCtorDtor( "?{}", retVal, returnStmt->expr );
     143                        assertf( stmt, "ReturnFixer: genCtorDtor returned nullptr: %s / %s", toString( retVal ).c_str(), toString( returnStmt->expr ).c_str() );
     144                        stmtsToAddBefore.push_back( stmt );
    138145
    139146                        // return the retVal object
     
    178185                        if ( ! arrayType->get_dimension() ) return; // xxx - recursive call to hoist?
    179186
    180                         // don't need to hoist dimension if it's a constexpr - only need to if there's potential for side effects.
    181                         if ( isConstExpr( arrayType->get_dimension() ) ) return;
     187                        // need to resolve array dimensions in order to accurately determine if constexpr
     188                        ResolvExpr::findSingleExpression( arrayType->dimension, SymTab::SizeType->clone(), indexer );
     189                        // array is variable-length when the dimension is not constexpr
     190                        arrayType->isVarLen = ! isConstExpr( arrayType->dimension );
     191                        // don't need to hoist dimension if it's definitely pure - only need to if there's potential for side effects.
     192                        if ( ! Tuples::maybeImpure( arrayType->dimension ) ) return;
    182193
    183194                        ObjectDecl * arrayDimension = new ObjectDecl( dimensionName.newName(), storageClasses, LinkageSpec::C, 0, SymTab::SizeType->clone(), new SingleInit( arrayType->get_dimension() ) );
     
    194205        void HoistArrayDimension::premutate( FunctionDecl * ) {
    195206                GuardValue( inFunction );
     207                inFunction = true;
    196208        }
    197209
     
    220232                Type * type = objDecl->get_type();
    221233                while ( ArrayType * at = dynamic_cast< ArrayType * >( type ) ) {
     234                        // must always construct VLAs with an initializer, since this is an error in C
     235                        if ( at->isVarLen && objDecl->init ) return true;
    222236                        type = at->get_base();
    223237                }
  • src/InitTweak/InitTweak.cc

    rb158d8f rb6838214  
    564564                void previsit( ConstantExpr * ) {}
    565565
     566                void previsit( VariableExpr * varExpr ) {
     567                        visit_children = false;
     568
     569                        if ( EnumInstType * inst = dynamic_cast< EnumInstType * >( varExpr->result ) ) {
     570                                long long int value;
     571                                if ( inst->baseEnum->valueOf( varExpr->var, value ) ) {
     572                                        // enumerators are const expr
     573                                        return;
     574                                }
     575                        }
     576                        isConstExpr = false;
     577                }
     578
    566579                bool isConstExpr = true;
    567580        };
  • src/Parser/DeclarationNode.cc

    rb158d8f rb6838214  
    343343        DeclarationNode * newnode = new DeclarationNode;
    344344        newnode->type = new TypeData( kind == OperKinds::PointTo ? TypeData::Pointer : TypeData::Reference );
     345        if ( kind == OperKinds::And ) {
     346                // T && is parsed as 'And' operator rather than two references => add a second reference type
     347                TypeData * td = new TypeData( TypeData::Reference );
     348                td->base = newnode->type;
     349                newnode->type = td;
     350        }
    345351        if ( qualifiers ) {
    346352                return newnode->addQualifiers( qualifiers );
  • src/Parser/parser.yy

    rb158d8f rb6838214  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Nov 27 17:23:35 2017
    13 // Update Count     : 2992
     12// Last Modified On : Thu Dec 21 11:32:56 2017
     13// Update Count     : 2996
    1414//
    1515
     
    317317%type<decl> cfa_typedef_declaration cfa_variable_declaration cfa_variable_specifier
    318318
    319 %type<decl> c_declaration
     319%type<decl> c_declaration static_assert
    320320%type<decl> KR_function_declarator KR_function_no_ptr KR_function_ptr KR_function_array
    321321%type<decl> KR_declaration_list KR_declaration_list_opt
     
    835835        | exception_statement
    836836        | asm_statement
     837        ;
    837838
    838839labeled_statement:
     
    12821283        c_declaration pop ';'
    12831284        | cfa_declaration pop ';'                                                       // CFA
    1284         | STATICASSERT '(' constant_expression ',' string_literal ')' ';' // C11
     1285        | static_assert
     1286        ;
     1287
     1288static_assert:
     1289        STATICASSERT '(' constant_expression ',' string_literal ')' ';' // C11
    12851290                { throw SemanticError("Static assert is currently unimplemented."); $$ = nullptr; }     // FIX ME
    1286         ;
    12871291
    12881292// C declaration syntax is notoriously confusing and error prone. Cforall provides its own type, variable and function
     
    18901894                        $$ = distAttr( $2, $3 );
    18911895                }
     1896        | static_assert
    18921897        ;
    18931898
  • src/ResolvExpr/AlternativeFinder.cc

    rb158d8f rb6838214  
    6060
    6161namespace ResolvExpr {
     62        struct AlternativeFinder::Finder : public WithShortCircuiting {
     63                Finder( AlternativeFinder & altFinder ) : altFinder( altFinder ), indexer( altFinder.indexer ), alternatives( altFinder.alternatives ), env( altFinder.env ), targetType( altFinder.targetType )  {}
     64
     65                void previsit( BaseSyntaxNode * ) { visit_children = false; }
     66
     67                void postvisit( ApplicationExpr * applicationExpr );
     68                void postvisit( UntypedExpr * untypedExpr );
     69                void postvisit( AddressExpr * addressExpr );
     70                void postvisit( LabelAddressExpr * labelExpr );
     71                void postvisit( CastExpr * castExpr );
     72                void postvisit( VirtualCastExpr * castExpr );
     73                void postvisit( UntypedMemberExpr * memberExpr );
     74                void postvisit( MemberExpr * memberExpr );
     75                void postvisit( NameExpr * variableExpr );
     76                void postvisit( VariableExpr * variableExpr );
     77                void postvisit( ConstantExpr * constantExpr );
     78                void postvisit( SizeofExpr * sizeofExpr );
     79                void postvisit( AlignofExpr * alignofExpr );
     80                void postvisit( UntypedOffsetofExpr * offsetofExpr );
     81                void postvisit( OffsetofExpr * offsetofExpr );
     82                void postvisit( OffsetPackExpr * offsetPackExpr );
     83                void postvisit( AttrExpr * attrExpr );
     84                void postvisit( LogicalExpr * logicalExpr );
     85                void postvisit( ConditionalExpr * conditionalExpr );
     86                void postvisit( CommaExpr * commaExpr );
     87                void postvisit( ImplicitCopyCtorExpr  * impCpCtorExpr );
     88                void postvisit( ConstructorExpr  * ctorExpr );
     89                void postvisit( RangeExpr  * rangeExpr );
     90                void postvisit( UntypedTupleExpr * tupleExpr );
     91                void postvisit( TupleExpr * tupleExpr );
     92                void postvisit( TupleIndexExpr * tupleExpr );
     93                void postvisit( TupleAssignExpr * tupleExpr );
     94                void postvisit( UniqueExpr * unqExpr );
     95                void postvisit( StmtExpr * stmtExpr );
     96                void postvisit( UntypedInitExpr * initExpr );
     97
     98                /// Adds alternatives for anonymous members
     99                void addAnonConversions( const Alternative & alt );
     100                /// Adds alternatives for member expressions, given the aggregate, conversion cost for that aggregate, and name of the member
     101                template< typename StructOrUnionType > void addAggMembers( StructOrUnionType *aggInst, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member );
     102                /// Adds alternatives for member expressions where the left side has tuple type
     103                void addTupleMembers( TupleType * tupleType, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member );
     104                /// Adds alternatives for offsetof expressions, given the base type and name of the member
     105                template< typename StructOrUnionType > void addOffsetof( StructOrUnionType *aggInst, const std::string &name );
     106                /// Takes a final result and checks if its assertions can be satisfied
     107                template<typename OutputIterator>
     108                void validateFunctionAlternative( const Alternative &func, ArgPack& result, const std::vector<ArgPack>& results, OutputIterator out );
     109                /// Finds matching alternatives for a function, given a set of arguments
     110                template<typename OutputIterator>
     111                void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const ExplodedArgs& args, OutputIterator out );
     112                /// Checks if assertion parameters match for a new alternative
     113                template< typename OutputIterator >
     114                void inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out );
     115        private:
     116                AlternativeFinder & altFinder;
     117                const SymTab::Indexer &indexer;
     118                AltList & alternatives;
     119                const TypeEnvironment &env;
     120                Type *& targetType;
     121        };
     122
    62123        Expression *resolveInVoidContext( Expression *expr, const SymTab::Indexer &indexer, TypeEnvironment &env ) {
    63124                CastExpr *castToVoid = new CastExpr( expr );
     
    152213
    153214                void renameTypes( Expression *expr ) {
    154                         expr->get_result()->accept( global_renamer );
     215                        renameTyVars( expr->result );
    155216                }
    156217        } // namespace
     
    185246
    186247        void AlternativeFinder::find( Expression *expr, bool adjust, bool prune, bool failFast ) {
    187                 expr->accept( *this );
     248                PassVisitor<Finder> finder( *this );
     249                expr->accept( finder );
    188250                if ( failFast && alternatives.empty() ) {
    189251                        PRINT(
     
    244306        }
    245307
    246         void AlternativeFinder::addAnonConversions( const Alternative & alt ) {
     308        void AlternativeFinder::Finder::addAnonConversions( const Alternative & alt ) {
    247309                // adds anonymous member interpretations whenever an aggregate value type is seen.
    248310                // it's okay for the aggregate expression to have reference type -- cast it to the base type to treat the aggregate as the referenced value
     
    265327
    266328        template< typename StructOrUnionType >
    267         void AlternativeFinder::addAggMembers( StructOrUnionType *aggInst, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member ) {
     329        void AlternativeFinder::Finder::addAggMembers( StructOrUnionType *aggInst, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member ) {
    268330                // by this point, member must be a name expr
    269331                NameExpr * nameExpr = dynamic_cast< NameExpr * >( member );
     
    284346        }
    285347
    286         void AlternativeFinder::addTupleMembers( TupleType * tupleType, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member ) {
     348        void AlternativeFinder::Finder::addTupleMembers( TupleType * tupleType, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member ) {
    287349                if ( ConstantExpr * constantExpr = dynamic_cast< ConstantExpr * >( member ) ) {
    288350                        // get the value of the constant expression as an int, must be between 0 and the length of the tuple type to have meaning
     
    308370        }
    309371
    310         void AlternativeFinder::visit( ApplicationExpr *applicationExpr ) {
     372        void AlternativeFinder::Finder::postvisit( ApplicationExpr *applicationExpr ) {
    311373                alternatives.push_back( Alternative( applicationExpr->clone(), env, Cost::zero ) );
    312374        }
     
    485547                        Type *adjType = candidate->get_type()->clone();
    486548                        adjustExprType( adjType, newEnv, indexer );
    487                         adjType->accept( global_renamer );
     549                        renameTyVars( adjType );
    488550                        PRINT(
    489551                                std::cerr << "unifying ";
     
    541603
    542604        template< typename OutputIterator >
    543         void AlternativeFinder::inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out ) {
     605        void AlternativeFinder::Finder::inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out ) {
    544606//      PRINT(
    545607//          std::cerr << "inferParameters: assertions needed are" << std::endl;
     
    596658                ArgPack()
    597659                        : parent(0), expr(), cost(Cost::zero), env(), need(), have(), openVars(), nextArg(0),
    598 
    599660                          tupleStart(0), nextExpl(0), explAlt(0) {}
    600661
     
    706767                                                Type* argType;
    707768
    708                                                 if ( nTuples > 0 ) {
    709                                                         // first iteration, push empty tuple expression
     769                                                if ( nTuples > 0 || ! results[i].expr ) {
     770                                                        // first iteration or no expression to clone,
     771                                                        // push empty tuple expression
    710772                                                        newResult.parent = i;
    711773                                                        std::list<Expression*> emptyList;
     
    892954
    893955        template<typename OutputIterator>
    894         void AlternativeFinder::validateFunctionAlternative( const Alternative &func, ArgPack& result,
     956        void AlternativeFinder::Finder::validateFunctionAlternative( const Alternative &func, ArgPack& result,
    895957                        const std::vector<ArgPack>& results, OutputIterator out ) {
    896958                ApplicationExpr *appExpr = new ApplicationExpr( func.expr->clone() );
     
    915977
    916978        template<typename OutputIterator>
    917         void AlternativeFinder::makeFunctionAlternatives( const Alternative &func,
     979        void AlternativeFinder::Finder::makeFunctionAlternatives( const Alternative &func,
    918980                        FunctionType *funcType, const ExplodedArgs &args, OutputIterator out ) {
    919981                OpenVarSet funcOpenVars;
     
    10221084        }
    10231085
    1024         void AlternativeFinder::visit( UntypedExpr *untypedExpr ) {
     1086        void AlternativeFinder::Finder::postvisit( UntypedExpr *untypedExpr ) {
    10251087                AlternativeFinder funcFinder( indexer, env );
    10261088                funcFinder.findWithAdjustment( untypedExpr->get_function() );
     
    10291091
    10301092                std::vector< AlternativeFinder > argAlternatives;
    1031                 findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(),
     1093                altFinder.findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(),
    10321094                        back_inserter( argAlternatives ) );
    10331095
    10341096                // take care of possible tuple assignments
    10351097                // if not tuple assignment, assignment is taken care of as a normal function call
    1036                 Tuples::handleTupleAssignment( *this, untypedExpr, argAlternatives );
     1098                Tuples::handleTupleAssignment( altFinder, untypedExpr, argAlternatives );
    10371099
    10381100                // find function operators
     
    11721234                        // fix this issue in a more robust way.
    11731235                        targetType = nullptr;
    1174                         visit( untypedExpr );
     1236                        postvisit( untypedExpr );
    11751237                }
    11761238        }
     
    11811243        }
    11821244
    1183         void AlternativeFinder::visit( AddressExpr *addressExpr ) {
     1245        void AlternativeFinder::Finder::postvisit( AddressExpr *addressExpr ) {
    11841246                AlternativeFinder finder( indexer, env );
    11851247                finder.find( addressExpr->get_arg() );
     
    11921254        }
    11931255
    1194         void AlternativeFinder::visit( LabelAddressExpr * expr ) {
     1256        void AlternativeFinder::Finder::postvisit( LabelAddressExpr * expr ) {
    11951257                alternatives.push_back( Alternative{ expr->clone(), env, Cost::zero } );
    11961258        }
     
    12231285        }
    12241286
    1225         void AlternativeFinder::visit( CastExpr *castExpr ) {
     1287        void AlternativeFinder::Finder::postvisit( CastExpr *castExpr ) {
    12261288                Type *& toType = castExpr->get_result();
    12271289                assert( toType );
     
    12781340        }
    12791341
    1280         void AlternativeFinder::visit( VirtualCastExpr * castExpr ) {
     1342        void AlternativeFinder::Finder::postvisit( VirtualCastExpr * castExpr ) {
    12811343                assertf( castExpr->get_result(), "Implicate virtual cast targets not yet supported." );
    12821344                AlternativeFinder finder( indexer, env );
     
    12901352        }
    12911353
    1292         void AlternativeFinder::visit( UntypedMemberExpr *memberExpr ) {
     1354        void AlternativeFinder::Finder::postvisit( UntypedMemberExpr *memberExpr ) {
    12931355                AlternativeFinder funcFinder( indexer, env );
    12941356                funcFinder.findWithAdjustment( memberExpr->get_aggregate() );
     
    13121374        }
    13131375
    1314         void AlternativeFinder::visit( MemberExpr *memberExpr ) {
     1376        void AlternativeFinder::Finder::postvisit( MemberExpr *memberExpr ) {
    13151377                alternatives.push_back( Alternative( memberExpr->clone(), env, Cost::zero ) );
    13161378        }
    13171379
    1318         void AlternativeFinder::visit( NameExpr *nameExpr ) {
     1380        void AlternativeFinder::Finder::postvisit( NameExpr *nameExpr ) {
    13191381                std::list< SymTab::Indexer::IdData > declList;
    13201382                indexer.lookupId( nameExpr->get_name(), declList );
     
    13371399        }
    13381400
    1339         void AlternativeFinder::visit( VariableExpr *variableExpr ) {
     1401        void AlternativeFinder::Finder::postvisit( VariableExpr *variableExpr ) {
    13401402                // not sufficient to clone here, because variable's type may have changed
    13411403                // since the VariableExpr was originally created.
     
    13431405        }
    13441406
    1345         void AlternativeFinder::visit( ConstantExpr *constantExpr ) {
     1407        void AlternativeFinder::Finder::postvisit( ConstantExpr *constantExpr ) {
    13461408                alternatives.push_back( Alternative( constantExpr->clone(), env, Cost::zero ) );
    13471409        }
    13481410
    1349         void AlternativeFinder::visit( SizeofExpr *sizeofExpr ) {
     1411        void AlternativeFinder::Finder::postvisit( SizeofExpr *sizeofExpr ) {
    13501412                if ( sizeofExpr->get_isType() ) {
    13511413                        Type * newType = sizeofExpr->get_type()->clone();
     
    13681430        }
    13691431
    1370         void AlternativeFinder::visit( AlignofExpr *alignofExpr ) {
     1432        void AlternativeFinder::Finder::postvisit( AlignofExpr *alignofExpr ) {
    13711433                if ( alignofExpr->get_isType() ) {
    13721434                        Type * newType = alignofExpr->get_type()->clone();
     
    13901452
    13911453        template< typename StructOrUnionType >
    1392         void AlternativeFinder::addOffsetof( StructOrUnionType *aggInst, const std::string &name ) {
     1454        void AlternativeFinder::Finder::addOffsetof( StructOrUnionType *aggInst, const std::string &name ) {
    13931455                std::list< Declaration* > members;
    13941456                aggInst->lookup( name, members );
     
    14031465        }
    14041466
    1405         void AlternativeFinder::visit( UntypedOffsetofExpr *offsetofExpr ) {
     1467        void AlternativeFinder::Finder::postvisit( UntypedOffsetofExpr *offsetofExpr ) {
    14061468                AlternativeFinder funcFinder( indexer, env );
    14071469                // xxx - resolveTypeof?
     
    14131475        }
    14141476
    1415         void AlternativeFinder::visit( OffsetofExpr *offsetofExpr ) {
     1477        void AlternativeFinder::Finder::postvisit( OffsetofExpr *offsetofExpr ) {
    14161478                alternatives.push_back( Alternative( offsetofExpr->clone(), env, Cost::zero ) );
    14171479        }
    14181480
    1419         void AlternativeFinder::visit( OffsetPackExpr *offsetPackExpr ) {
     1481        void AlternativeFinder::Finder::postvisit( OffsetPackExpr *offsetPackExpr ) {
    14201482                alternatives.push_back( Alternative( offsetPackExpr->clone(), env, Cost::zero ) );
    14211483        }
     
    14441506        }
    14451507
    1446         void AlternativeFinder::visit( AttrExpr *attrExpr ) {
     1508        void AlternativeFinder::Finder::postvisit( AttrExpr *attrExpr ) {
    14471509                // assume no 'pointer-to-attribute'
    14481510                NameExpr *nameExpr = dynamic_cast< NameExpr* >( attrExpr->get_attr() );
     
    14581520                                        if ( function->get_parameters().size() == 1 ) {
    14591521                                                if ( attrExpr->get_isType() ) {
    1460                                                         resolveAttr( data, function, attrExpr->get_type(), env, *this );
     1522                                                        resolveAttr( data, function, attrExpr->get_type(), env, altFinder);
    14611523                                                } else {
    14621524                                                        AlternativeFinder finder( indexer, env );
     
    14641526                                                        for ( AltList::iterator choice = finder.alternatives.begin(); choice != finder.alternatives.end(); ++choice ) {
    14651527                                                                if ( choice->expr->get_result()->size() == 1 ) {
    1466                                                                         resolveAttr(data, function, choice->expr->get_result(), choice->env, *this );
     1528                                                                        resolveAttr(data, function, choice->expr->get_result(), choice->env, altFinder );
    14671529                                                                } // fi
    14681530                                                        } // for
     
    14791541        }
    14801542
    1481         void AlternativeFinder::visit( LogicalExpr *logicalExpr ) {
     1543        void AlternativeFinder::Finder::postvisit( LogicalExpr *logicalExpr ) {
    14821544                AlternativeFinder firstFinder( indexer, env );
    14831545                firstFinder.findWithAdjustment( logicalExpr->get_arg1() );
     
    14921554        }
    14931555
    1494         void AlternativeFinder::visit( ConditionalExpr *conditionalExpr ) {
     1556        void AlternativeFinder::Finder::postvisit( ConditionalExpr *conditionalExpr ) {
    14951557                // find alternatives for condition
    14961558                AlternativeFinder firstFinder( indexer, env );
     
    15241586        }
    15251587
    1526         void AlternativeFinder::visit( CommaExpr *commaExpr ) {
     1588        void AlternativeFinder::Finder::postvisit( CommaExpr *commaExpr ) {
    15271589                TypeEnvironment newEnv( env );
    15281590                Expression *newFirstArg = resolveInVoidContext( commaExpr->get_arg1(), indexer, newEnv );
     
    15351597        }
    15361598
    1537         void AlternativeFinder::visit( RangeExpr * rangeExpr ) {
     1599        void AlternativeFinder::Finder::postvisit( RangeExpr * rangeExpr ) {
    15381600                // resolve low and high, accept alternatives whose low and high types unify
    15391601                AlternativeFinder firstFinder( indexer, env );
     
    15571619        }
    15581620
    1559         void AlternativeFinder::visit( UntypedTupleExpr *tupleExpr ) {
     1621        void AlternativeFinder::Finder::postvisit( UntypedTupleExpr *tupleExpr ) {
    15601622                std::vector< AlternativeFinder > subExprAlternatives;
    1561                 findSubExprs( tupleExpr->get_exprs().begin(), tupleExpr->get_exprs().end(),
     1623                altFinder.findSubExprs( tupleExpr->get_exprs().begin(), tupleExpr->get_exprs().end(),
    15621624                        back_inserter( subExprAlternatives ) );
    15631625                std::vector< AltList > possibilities;
     
    15751637        }
    15761638
    1577         void AlternativeFinder::visit( TupleExpr *tupleExpr ) {
     1639        void AlternativeFinder::Finder::postvisit( TupleExpr *tupleExpr ) {
    15781640                alternatives.push_back( Alternative( tupleExpr->clone(), env, Cost::zero ) );
    15791641        }
    15801642
    1581         void AlternativeFinder::visit( ImplicitCopyCtorExpr * impCpCtorExpr ) {
     1643        void AlternativeFinder::Finder::postvisit( ImplicitCopyCtorExpr * impCpCtorExpr ) {
    15821644                alternatives.push_back( Alternative( impCpCtorExpr->clone(), env, Cost::zero ) );
    15831645        }
    15841646
    1585         void AlternativeFinder::visit( ConstructorExpr * ctorExpr ) {
     1647        void AlternativeFinder::Finder::postvisit( ConstructorExpr * ctorExpr ) {
    15861648                AlternativeFinder finder( indexer, env );
    15871649                // don't prune here, since it's guaranteed all alternatives will have the same type
     
    15931655        }
    15941656
    1595         void AlternativeFinder::visit( TupleIndexExpr *tupleExpr ) {
     1657        void AlternativeFinder::Finder::postvisit( TupleIndexExpr *tupleExpr ) {
    15961658                alternatives.push_back( Alternative( tupleExpr->clone(), env, Cost::zero ) );
    15971659        }
    15981660
    1599         void AlternativeFinder::visit( TupleAssignExpr *tupleAssignExpr ) {
     1661        void AlternativeFinder::Finder::postvisit( TupleAssignExpr *tupleAssignExpr ) {
    16001662                alternatives.push_back( Alternative( tupleAssignExpr->clone(), env, Cost::zero ) );
    16011663        }
    16021664
    1603         void AlternativeFinder::visit( UniqueExpr *unqExpr ) {
     1665        void AlternativeFinder::Finder::postvisit( UniqueExpr *unqExpr ) {
    16041666                AlternativeFinder finder( indexer, env );
    16051667                finder.findWithAdjustment( unqExpr->get_expr() );
     
    16111673        }
    16121674
    1613         void AlternativeFinder::visit( StmtExpr *stmtExpr ) {
     1675        void AlternativeFinder::Finder::postvisit( StmtExpr *stmtExpr ) {
    16141676                StmtExpr * newStmtExpr = stmtExpr->clone();
    16151677                ResolvExpr::resolveStmtExpr( newStmtExpr, indexer );
     
    16181680        }
    16191681
    1620         void AlternativeFinder::visit( UntypedInitExpr *initExpr ) {
     1682        void AlternativeFinder::Finder::postvisit( UntypedInitExpr *initExpr ) {
    16211683                // handle each option like a cast
    16221684                AltList candidates;
    1623                 PRINT( std::cerr << "untyped init expr: " << initExpr << std::endl; )
     1685                PRINT(
     1686                        std::cerr << "untyped init expr: " << initExpr << std::endl;
     1687                )
    16241688                // O(N^2) checks of d-types with e-types
    16251689                for ( InitAlternative & initAlt : initExpr->get_initAlts() ) {
     
    16371701                                AssertionSet needAssertions, haveAssertions;
    16381702                                OpenVarSet openVars;  // find things in env that don't have a "representative type" and claim those are open vars?
    1639                                 PRINT( std::cerr << "  @ " << toType << " " << initAlt.designation << std::endl; )
     1703                                PRINT(
     1704                                        std::cerr << "  @ " << toType << " " << initAlt.designation << std::endl;
     1705                                 )
    16401706                                // It's possible that a cast can throw away some values in a multiply-valued expression.  (An example is a
    16411707                                // cast-to-void, which casts from one value to zero.)  Figure out the prefix of the subexpression results
  • src/ResolvExpr/AlternativeFinder.h

    rb158d8f rb6838214  
    3838        using ExplodedArgs = std::vector< std::vector< ExplodedActual > >;
    3939
    40         class AlternativeFinder : public Visitor {
     40        class AlternativeFinder {
    4141          public:
    4242                AlternativeFinder( const SymTab::Indexer &indexer, const TypeEnvironment &env );
     
    9494                void findSubExprs( InputIterator begin, InputIterator end, OutputIterator out );
    9595          private:
    96                 virtual void visit( ApplicationExpr *applicationExpr );
    97                 virtual void visit( UntypedExpr *untypedExpr );
    98                 virtual void visit( AddressExpr *addressExpr );
    99                 virtual void visit( LabelAddressExpr *labelExpr );
    100                 virtual void visit( CastExpr *castExpr );
    101                 virtual void visit( VirtualCastExpr *castExpr );
    102                 virtual void visit( UntypedMemberExpr *memberExpr );
    103                 virtual void visit( MemberExpr *memberExpr );
    104                 virtual void visit( NameExpr *variableExpr );
    105                 virtual void visit( VariableExpr *variableExpr );
    106                 virtual void visit( ConstantExpr *constantExpr );
    107                 virtual void visit( SizeofExpr *sizeofExpr );
    108                 virtual void visit( AlignofExpr *alignofExpr );
    109                 virtual void visit( UntypedOffsetofExpr *offsetofExpr );
    110                 virtual void visit( OffsetofExpr *offsetofExpr );
    111                 virtual void visit( OffsetPackExpr *offsetPackExpr );
    112                 virtual void visit( AttrExpr *attrExpr );
    113                 virtual void visit( LogicalExpr *logicalExpr );
    114                 virtual void visit( ConditionalExpr *conditionalExpr );
    115                 virtual void visit( CommaExpr *commaExpr );
    116                 virtual void visit( ImplicitCopyCtorExpr * impCpCtorExpr );
    117                 virtual void visit( ConstructorExpr * ctorExpr );
    118                 virtual void visit( RangeExpr * rangeExpr );
    119                 virtual void visit( UntypedTupleExpr *tupleExpr );
    120                 virtual void visit( TupleExpr *tupleExpr );
    121                 virtual void visit( TupleIndexExpr *tupleExpr );
    122                 virtual void visit( TupleAssignExpr *tupleExpr );
    123                 virtual void visit( UniqueExpr *unqExpr );
    124                 virtual void visit( StmtExpr *stmtExpr );
    125                 virtual void visit( UntypedInitExpr *initExpr );
    126 
    127                 /// Adds alternatives for anonymous members
    128                 void addAnonConversions( const Alternative & alt );
    129                 /// Adds alternatives for member expressions, given the aggregate, conversion cost for that aggregate, and name of the member
    130                 template< typename StructOrUnionType > void addAggMembers( StructOrUnionType *aggInst, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member );
    131                 /// Adds alternatives for member expressions where the left side has tuple type
    132                 void addTupleMembers( TupleType * tupleType, Expression *expr, const Cost &newCost, const TypeEnvironment & env, Expression * member );
    133                 /// Adds alternatives for offsetof expressions, given the base type and name of the member
    134                 template< typename StructOrUnionType > void addOffsetof( StructOrUnionType *aggInst, const std::string &name );
    135                 /// Takes a final result and checks if its assertions can be satisfied
    136                 template<typename OutputIterator>
    137                 void validateFunctionAlternative( const Alternative &func, ArgPack& result, const std::vector<ArgPack>& results, OutputIterator out );
    138                 /// Finds matching alternatives for a function, given a set of arguments
    139                 template<typename OutputIterator>
    140                 void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const ExplodedArgs& args, OutputIterator out );
    141                 /// Checks if assertion parameters match for a new alternative
    142                 template< typename OutputIterator >
    143                 void inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out );
    144 
     96                struct Finder;
    14597                const SymTab::Indexer &indexer;
    14698                AltList alternatives;
  • src/ResolvExpr/CastCost.cc

    rb158d8f rb6838214  
    3131
    3232namespace ResolvExpr {
    33         class CastCost : public ConversionCost {
     33        struct CastCost : public ConversionCost {
    3434          public:
    35                 CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env );
     35                CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc );
    3636
    37                 virtual void visit( BasicType *basicType );
    38                 virtual void visit( PointerType *pointerType );
     37                using ConversionCost::previsit;
     38                using ConversionCost::postvisit;
     39                void postvisit( BasicType * basicType );
     40                void postvisit( PointerType * pointerType );
    3941        };
    4042
     
    5254                                // all typedefs should be gone by this point
    5355                                TypeDecl *type = strict_dynamic_cast< TypeDecl* >( namedType );
    54                                 if ( type->get_base() ) {
    55                                         return castCost( src, type->get_base(), indexer, env ) + Cost::safe;
     56                                if ( type->base ) {
     57                                        return castCost( src, type->base, indexer, env ) + Cost::safe;
    5658                                } // if
    5759                        } // if
     
    7476                } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * > ( dest ) ) {
    7577                        PRINT( std::cerr << "conversionCost: dest is reference" << std::endl; )
    76                         return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const TypeEnvironment & env, const SymTab::Indexer & indexer) {
     78                        return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const SymTab::Indexer & indexer, const TypeEnvironment & env ) {
    7779                                return ptrsCastable( t1, t2, env, indexer );
    7880                        });
    7981                } else {
    80                         CastCost converter( dest, indexer, env );
     82                        PassVisitor<CastCost> converter( dest, indexer, env, castCost );
    8183                        src->accept( converter );