Changeset 9cdfb4d0
- Timestamp:
- Jan 22, 2018, 3:09:01 PM (5 years ago)
- Branches:
- ADT, aaron-thesis, arm-eh, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
- Children:
- e23d20b
- Parents:
- 326cd2b (diff), 4bf3b2b (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - Files:
-
- 7 added
- 12 deleted
- 93 edited
- 26 moved
Legend:
- Unmodified
- Added
- Removed
-
doc/proposals/concurrency/.gitignore
r326cd2b r9cdfb4d0 24 24 build/*.toc 25 25 *.pdf 26 *.png 27 figures/*.tex 26 28 27 29 examples -
doc/proposals/concurrency/Makefile
r326cd2b r9cdfb4d0 1 1 ## Define the appropriate configuration variables. 2 2 3 TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies: 3 TeXLIB = .:./style:./text:./annex:./build:../../LaTeXmacros:../../LaTeXmacros/listings:../../LaTeXmacros/enumitem:~/bibliographies:/usr/local/bibliographies: 4 4 LaTeX = TEXINPUTS=${TeXLIB} && export TEXINPUTS && latex -halt-on-error -output-directory=build -interaction=nonstopmode 5 5 BibTeX = BIBINPUTS=${TeXLIB} && export BIBINPUTS && bibtex -terse … … 75 75 build/*.tex \ 76 76 build/*.toc \ 77 build/*.lof \ 78 build/*.lol \ 79 build/*.lot \ 80 figures/*.tex \ 81 *.png \ 77 82 78 83 … … 117 122 fig2dev -L pstex_t -p $@ $< > $@_t 118 123 124 figures/%.tex: build/%.pstex 125 echo -n "\documentclass[preview]{standalone}\n" \ 126 "\usepackage[T1]{fontenc}\n" \ 127 "\usepackage[usenames]{color}\n" \ 128 "\usepackage{graphicx}\n" \ 129 "\usepackage{listings}\n" \ 130 "\usepackage{xspace}\n" \ 131 "\input{style}\n" \ 132 "\\\\begin{document}\n" \ 133 "{\\\\resizebox{3\\\\textwidth}{!}{\input{${basename ${notdir $@}}.pstex_t}}}\n" \ 134 "\end{document}" > $@ 135 136 %.png : build/%.pstex figures/%.tex 137 echo ${basename $@} 138 ${LaTeX} figures/${basename $@}.tex 139 dvips build/${basename $@}.dvi -o build/${basename $@}.ps 140 ps2pdf build/${basename $@}.ps 141 convert -negate ${basename $@}.pdf $@ 142 143 119 144 120 145 # Local Variables: # -
doc/proposals/concurrency/annex/glossary.tex
r326cd2b r9cdfb4d0 98 98 \newacronym{tls}{TLS}{Thread Local Storage} 99 99 \newacronym{api}{API}{Application Program Interface} 100 \newacronym{raii}{RAII}{Res source Acquisition Is Initialization}100 \newacronym{raii}{RAII}{Resource Acquisition Is Initialization} 101 101 \newacronym{numa}{NUMA}{Non-Uniform Memory Access} -
doc/proposals/concurrency/annex/local.bib
r326cd2b r9cdfb4d0 36 36 37 37 @article{TBB, 38 key = {TBB}, 38 39 keywords = {Intel, TBB}, 39 40 title = {Intel Thread Building Blocks}, … … 42 43 43 44 @manual{www-cfa, 45 key = {CFA}, 44 46 keywords = {Cforall}, 45 title = {Cforall Programmming Language}, 46 address = {https://plg.uwaterloo.ca/~cforall/} 47 author = {C$\forall$}, 48 title = {C$\forall$ Programmming Language}, 49 note = {\url{https://plg.uwaterloo.ca/~cforall}}, 47 50 } 48 51 49 @ article{rob-thesis,52 @mastersthesis{rob-thesis, 50 53 keywords = {Constructors, Destructors, Tuples}, 51 54 author = {Rob Schluntz}, 52 55 title = {Resource Management and Tuples in Cforall}, 53 year = 2017 56 year = 2017, 57 school = {University of Waterloo}, 58 note = {\url{https://uwspace.uwaterloo.ca/handle/10012/11830}}, 54 59 } 55 60 … … 64 69 65 70 @article{BankTransfer, 71 key = {Bank Transfer}, 66 72 keywords = {Bank Transfer}, 67 73 title = {Bank Account Transfer Problem}, … … 89 95 } 90 96 91 @book{Herlihy93, 92 title={Transactional memory: Architectural support for lock-free data structures}, 93 author={Herlihy, Maurice and Moss, J Eliot B}, 94 volume={21}, 95 number={2}, 96 year={1993}, 97 publisher={ACM} 97 @article{Herlihy93, 98 author = {Herlihy, Maurice and Moss, J. Eliot B.}, 99 title = {Transactional memory: architectural support for lock-free data structures}, 100 journal = {SIGARCH Comput. Archit. News}, 101 issue_date = {May 1993}, 102 volume = {21}, 103 number = {2}, 104 month = may, 105 year = {1993}, 106 pages = {289--300}, 107 numpages = {12}, 108 publisher = {ACM}, 109 address = {New York, NY, USA}, 98 110 } 99 111 100 112 @manual{affinityLinux, 113 key = {TBB}, 101 114 title = "{Linux man page - sched\_setaffinity(2)}" 102 115 } … … 122 135 } 123 136 124 125 137 @misc{NodeJs, 126 138 title = "{Node.js}", -
doc/proposals/concurrency/figures/system.fig
r326cd2b r9cdfb4d0 93 93 4 0 0 50 -1 0 11 0.0000 2 120 570 4125 7126 thread 4\001 94 94 -6 95 6 6975 4050 9525 7875 96 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5 97 7125 5400 7575 5400 7575 5850 7125 5850 7125 5400 98 2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5 99 7125 4200 7575 4200 7575 4650 7125 4650 7125 4200 100 2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5 101 7125 4800 7575 4800 7575 5250 7125 5250 7125 4800 102 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5 103 6975 4050 9525 4050 9525 7875 6975 7875 6975 4050 104 3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10 105 7350 6900 7500 6975 7200 7050 7500 7125 7200 7200 7500 7275 106 7200 7350 7500 7425 7200 7500 7350 7575 107 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 108 -1.000 0.000 109 3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10 110 7350 6000 7500 6075 7200 6150 7500 6225 7200 6300 7500 6375 111 7200 6450 7500 6525 7200 6600 7350 6675 112 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 113 -1.000 0.000 114 4 0 0 50 -1 0 11 0.0000 2 120 945 7725 4500 Pthread stack\001 115 4 0 0 50 -1 0 11 0.0000 2 150 1530 7725 5100 Pthread stack (stolen)\001 116 4 0 0 50 -1 0 11 0.0000 2 120 540 7725 6375 Pthread\001 117 4 0 0 50 -1 0 11 0.0000 2 150 1065 7725 7275 $\\CFA$ thread\001 118 4 0 0 50 -1 0 11 0.0000 2 150 990 7725 5700 $\\CFA$ stack\001 119 -6 95 120 1 2 0 1 0 7 50 -1 -1 0.000 1 3.1416 3150 5250 750 450 2400 4800 3900 5700 96 121 2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2 … … 100 125 2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2 101 126 5550 3900 3825 5025 102 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5103 7125 3525 7575 3525 7575 3975 7125 3975 7125 3525104 2 2 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 5105 6975 2175 9525 2175 9525 6000 6975 6000 6975 2175106 127 2 1 0 1 0 7 50 -1 -1 0.000 0 1 -1 0 0 2 107 128 900 6225 2400 5400 … … 122 143 2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5 123 144 5100 975 5850 975 5850 2625 5100 2625 5100 975 124 2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5125 7125 2325 7575 2325 7575 2775 7125 2775 7125 2325126 2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5127 7125 2925 7575 2925 7575 3375 7125 3375 7125 2925128 145 2 2 0 1 0 7 50 -1 45 0.000 0 1 -1 0 0 5 129 146 525 7425 1275 7425 1275 9075 525 9075 525 7425 … … 143 160 2 2 0 1 0 7 50 -1 18 0.000 0 1 -1 0 0 5 144 161 2025 2625 2775 2625 2775 975 2025 975 2025 2625 145 3 2 0 2 0 7 49 -1 -1 0.000 1 0 0 10146 7350 5025 7500 5100 7200 5175 7500 5250 7200 5325 7500 5400147 7200 5475 7500 5550 7200 5625 7350 5700148 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000149 -1.000 0.000150 3 2 0 4 0 7 49 -1 -1 0.000 1 0 0 10151 7350 4125 7500 4200 7200 4275 7500 4350 7200 4425 7500 4500152 7200 4575 7500 4650 7200 4725 7350 4800153 0.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000154 -1.000 0.000155 162 4 0 0 50 -1 0 18 0.0000 2 30 225 4500 3150 ...\001 156 163 4 0 0 50 -1 0 18 0.0000 2 30 225 3750 4500 ...\001 157 164 4 0 0 50 -1 0 11 0.0000 2 120 705 2775 5325 Scheduler\001 158 4 0 0 50 -1 0 11 0.0000 2 120 945 7725 2625 Pthread stack\001159 4 0 0 50 -1 0 11 0.0000 2 150 1530 7725 3225 Pthread stack (stolen)\001160 4 0 0 50 -1 0 11 0.0000 2 120 540 7725 4500 Pthread\001161 4 0 0 50 -1 0 11 0.0000 2 150 1065 7725 5400 $\\CFA$ thread\001162 165 4 0 0 50 -1 0 18 0.0000 2 30 225 4950 6600 ...\001 163 166 4 0 0 50 -1 0 18 0.0000 2 30 225 4200 5850 ...\001 164 4 0 0 50 -1 0 11 0.0000 2 150 990 7725 3825 $\\CFA$ stack\001 -
doc/proposals/concurrency/text/basics.tex
r326cd2b r9cdfb4d0 9 9 At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution. 10 10 11 Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.12 13 Therefore, a minimal concurrency system can be achieved by creating coroutines , which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller?s stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (akanon-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.11 Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread's perspective) across the stacks is called concurrency. 12 13 Therefore, a minimal concurrency system can be achieved by creating coroutines (see Section \ref{coroutine}), which instead of context-switching among each other, always ask an oracle where to context-switch next. While coroutines can execute on the caller's stack-frame, stack-full coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a., non-preemptive scheduling). The oracle/scheduler can either be a stack-less or stack-full entity and correspondingly require one or two context-switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption. 14 14 15 15 A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context switches occur. Mutual exclusion and synchronization are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desirable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows. 16 16 17 17 \section{\protect\CFA's Thread Building Blocks} 18 One of the important features that are missing in C is threading . On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.18 One of the important features that are missing in C is threading\footnote{While the C11 standard defines a ``threads.h'' header, it is minimal and defined as optional. As such, library support for threading is far from widespread. At the time of writing the thesis, neither \texttt{gcc} nor \texttt{clang} support ``threads.h'' in their respective standard libraries.}. On modern architectures, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write efficient concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay. 19 19 20 20 \section{Coroutines: A Stepping Stone}\label{coroutine} 21 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context switches and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}.21 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. \textbf{Coroutine}s are generalized routines which have predefined points where execution is suspended and can be resumed at a later time. Therefore, they need to deal with context switches and other context-management operations. This proposal includes coroutines both as an intermediate step for the implementation of threads, and a first-class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolves around two features: independent call-stacks and \code{suspend}/\code{resume}. 22 22 23 23 \begin{table} … … 129 129 \end{tabular} 130 130 \end{center} 131 \caption{Different implementations of a Fibonacci sequence generator in C.} ,131 \caption{Different implementations of a Fibonacci sequence generator in C.} 132 132 \label{lst:fibonacci-c} 133 133 \end{table} 134 134 135 A good example of a problem made easier with coroutines is generators, like the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Table\ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.135 A good example of a problem made easier with coroutines is generators, e.g., generating the Fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Listing \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and centre approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed. 136 136 137 137 Listing \ref{lst:fibonacci-cfa} is an example of a solution to the Fibonacci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the next generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the implementation is very similar to the \code{fibonacci_func} example. … … 233 233 One important design challenge for implementing coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads. 234 234 235 The runtime system needs to create the coroutine ?s stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expectto have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine.235 The runtime system needs to create the coroutine's stack and, more importantly, prepare it for the first resumption. The timing of the creation is non-trivial since users expect both to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. There are several solutions to this problem but the chosen option effectively forces the design of the coroutine. 236 236 237 237 Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when cast to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks: … … 268 268 } 269 269 \end{ccode} 270 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes Undefined Behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks.270 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behaviour; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call stacks. 271 271 272 272 \subsection{Alternative: Composition} … … 310 310 symmetric_coroutine<>::yield_type 311 311 \end{cfacode} 312 Often, the canonical threading paradigm in languages is based on function pointers, pthreadbeing one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.313 314 A variation of this would be to use a simple function pointer in the same way pthread does for threads:312 Often, the canonical threading paradigm in languages is based on function pointers, \texttt{pthread} being one of the most well-known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little. 313 314 A variation of this would be to use a simple function pointer in the same way \texttt{pthread} does for threads: 315 315 \begin{cfacode} 316 316 void foo( coroutine_t cid, void* arg ) { … … 329 329 \subsection{Alternative: Trait-Based Coroutines} 330 330 331 Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} and is used as a coroutine.331 Finally, the underlying approach, which is the one closest to \CFA idioms, is to use trait-based lazy coroutines. This approach defines a coroutine as anything that satisfies the trait \code{is_coroutine} (as defined below) and is used as a coroutine. 332 332 333 333 \begin{cfacode} … … 340 340 forall( dtype T | is_coroutine(T) ) void resume (T&); 341 341 \end{cfacode} 342 This ensures an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} only has the effect of implementing the getter and forward declarations required for users to implement the main routine.342 This ensures that an object is not a coroutine until \code{resume} is called on the object. Correspondingly, any object that is passed to \code{resume} is a coroutine since it must satisfy the \code{is_coroutine} trait to compile. The advantage of this approach is that users can easily create different types of coroutines, for example, changing the memory layout of a coroutine is trivial when implementing the \code{get_coroutine} routine. The \CFA keyword \code{coroutine} simply has the effect of implementing the getter and forward declarations required for users to implement the main routine. 343 343 344 344 \begin{center} … … 385 385 \end{cfacode} 386 386 387 Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics usingoverloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as387 Obviously, for this thread implementation to be useful it must run some user code. Several other threading interfaces use a function-pointer representation as the interface of threads (for example \Csharp~\cite{Csharp} and Scala~\cite{Scala}). However, this proposal considers that statically tying a \code{main} routine to a thread supersedes this approach. Since the \code{main} routine is already a special routine in \CFA (where the program begins), it is a natural extension of the semantics to use overloading to declare mains for different threads (the normal main being the main of the initial thread). As such the \code{main} routine of a thread can be defined as 388 388 \begin{cfacode} 389 389 thread foo {}; … … 425 425 A consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}. 426 426 427 Of course for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.427 Of course, for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs. 428 428 \begin{cfacode} 429 429 thread World; … … 466 466 \end{cfacode} 467 467 468 However, one of the drawbacks of this approach is that threads always form a lattice, i.e., they are always destroyed in the opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.468 However, one of the drawbacks of this approach is that threads always form a tree where nodes must always outlive their children, i.e., they are always destroyed in the opposite order of construction because of C scoping rules. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created. 469 469 470 470 \begin{cfacode} -
doc/proposals/concurrency/text/cforall.tex
r326cd2b r9cdfb4d0 52 52 % ====================================================================== 53 53 \section{Operators} 54 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation occur, e.g.:54 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation appear, e.g.: 55 55 \begin{cfacode} 56 56 int ++? (int op); //unary prefix increment … … 72 72 % ====================================================================== 73 73 \section{Constructors/Destructors} 74 Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors 74 Object lifetime is often a challenge in concurrency. \CFA uses the approach of giving concurrent meaning to object lifetime as a means of synchronization and/or mutual exclusion. Since \CFA relies heavily on the lifetime of objects, constructors and destructors is a core feature required for concurrency and parallelism. \CFA uses the following syntax for constructors and destructors: 75 75 \begin{cfacode} 76 76 struct S { … … 107 107 % ====================================================================== 108 108 \section{Parametric Polymorphism} 109 Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clause, which gives \CFA its name. \code{forall} clauses allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition : 109 \label{s:ParametricPolymorphism} 110 Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clauses, which allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition: 110 111 \begin{cfacode} 111 112 //constraint type, 0 and + … … 124 125 Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints: 125 126 \begin{cfacode} 126 trait sum able( otype T ) {127 trait summable( otype T ) { 127 128 void ?{}(T *, zero_t); //constructor from 0 literal 128 129 T ?+?(T, T); //assortment of additions … … 131 132 T ?++(T *); 132 133 }; 133 forall( otype T | sum able(T) ) //use trait134 forall( otype T | summable(T) ) //use trait 134 135 T sum(T a[], size_t size); 135 136 \end{cfacode} … … 139 140 % ====================================================================== 140 141 \section{with Clause/Statement} 141 Since \CFA lacks the concept of a receiver, certain functions end -up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).142 Since \CFA lacks the concept of a receiver, certain functions end up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal). 142 143 \begin{cfacode} 143 144 struct S { int i, j; }; -
doc/proposals/concurrency/text/concurrency.tex
r326cd2b r9cdfb4d0 4 4 % ====================================================================== 5 5 % ====================================================================== 6 Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared -state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.6 Several tools can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared state, some languages and libraries simply disallow mutable shared state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels~\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine calls). This distinction in turn means that, in order to be effective, programmers need to learn two sets of design patterns. While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account. 7 7 8 8 Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desirable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}. … … 16 16 17 17 \subsection{Mutual-Exclusion} 18 As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to higher-level mutual-exclusion methods, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g.:reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer.18 As mentioned above, mutual-exclusion is the guarantee that only a fix number of threads can enter a critical section at once. However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use. Methods range from low-level locks, which are fast and flexible but require significant attention to be correct, to higher-level concurrency techniques, which sacrifice some performance in order to improve ease of use. Ease of use comes by either guaranteeing some problems cannot occur (e.g., being deadlock free) or by offering a more explicit coupling between data and corresponding critical section. For example, the \CC \code{std::atomic<T>} offers an easy way to express mutual-exclusion on a restricted set of operations (e.g., reading/writing large types atomically). Another challenge with low-level locks is composability. Locks have restricted composability because it takes careful organizing for multiple locks to be used while preventing deadlocks. Easing composability is another feature higher-level mutual-exclusion mechanisms often offer. 19 19 20 20 \subsection{Synchronization} 21 As for mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete againto acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention.21 As with mutual-exclusion, low-level synchronization primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanisms often simplify usage by adding either better coupling between synchronization and data (e.g., message passing) or offering a simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronization happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property is called \textbf{barging}. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic example is the thread that finishes using a resource and unblocks a thread waiting to use the resource, but the unblocked thread must compete to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use flag variables to detect barging threads are said to be using barging avoidance, while algorithms that baton-pass locks~\cite{Andrews89} between threads instead of releasing the locks are said to be using barging prevention. 22 22 23 23 % ====================================================================== … … 26 26 % ====================================================================== 27 27 % ====================================================================== 28 A monitor is a set of routines that ensure mutual exclusion when accessing shared state. This concept is generally associated with Object-Oriented Languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it:28 A \textbf{monitor} is a set of routines that ensure mutual-exclusion when accessing shared state. More precisely, a monitor is a programming technique that associates mutual-exclusion to routine scopes, as opposed to mutex locks, where mutual-exclusion is defined by lock/release calls independently of any scoping of the calling routine. This strong association eases readability and maintainability, at the cost of flexibility. Note that both monitors and mutex locks, require an abstract handle to identify them. This concept is generally associated with object-oriented languages like Java~\cite{Java} or \uC~\cite{uC++book} but does not strictly require OO semantics. The only requirement is the ability to declare a handle to a shared object and a set of routines that act on it: 29 29 \begin{cfacode} 30 30 typedef /*some monitor type*/ monitor; 31 int f(monitor & m);31 int f(monitor & m); 32 32 33 33 int main() { … … 42 42 % ====================================================================== 43 43 % ====================================================================== 44 The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are implicitlynon-copy-able objects (\code{dtype}).45 46 Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Pass through can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter:44 The above monitor example displays some of the intrinsic characteristics. First, it is necessary to use pass-by-reference over pass-by-value for monitor routines. This semantics is important, because at their core, monitors are implicit mutual-exclusion objects (locks), and these objects cannot be copied. Therefore, monitors are non-copy-able objects (\code{dtype}). 45 46 Another aspect to consider is when a monitor acquires its mutual exclusion. For example, a monitor may need to be passed through multiple helper routines that do not acquire the monitor mutual-exclusion on entry. Passthrough can occur for generic helper routines (\code{swap}, \code{sort}, etc.) or specific helper routines like the following to implement an atomic counter: 47 47 48 48 \begin{cfacode} … … 71 71 \end{tabular} 72 72 \end{center} 73 Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to \CC \code{atomic} template.73 Notice how the counter is used without any explicit synchronization and yet supports thread-safe semantics for both reading and writing, which is similar in usage to the \CC template \code{std::atomic}. 74 74 75 75 Here, the constructor (\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. Furthermore, it allows the implementation greater freedom when it initializes the monitor locking. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation. … … 93 93 \end{figure} 94 94 95 Having both \code{mutex} and \code{nomutex} keywords is redundant basedon the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.95 Having both \code{mutex} and \code{nomutex} keywords can be redundant, depending on the meaning of a routine having neither of these keywords. For example, it is reasonable that it should default to the safest option (\code{mutex}) when given a routine without qualifiers \code{void foo(counter_t & this)}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. On the other hand, \code{nomutex} is the ``normal'' parameter behaviour, it effectively states explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}. 96 96 97 97 The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations: 98 98 \begin{cfacode} 99 int f1(monitor & mutex m);99 int f1(monitor & mutex m); 100 100 int f2(const monitor & mutex m); 101 int f3(monitor ** mutex m);102 int f4(monitor * mutex m []);103 int f5(graph(monitor *) & mutex m);101 int f3(monitor ** mutex m); 102 int f4(monitor * mutex m []); 103 int f5(graph(monitor *) & mutex m); 104 104 \end{cfacode} 105 105 The problem is to identify which object(s) should be acquired. Furthermore, each object needs to be acquired only once. In the case of simple routines like \code{f1} and \code{f2} it is easy to identify an exhaustive list of objects to acquire on entry. Adding indirections (\code{f3}) still allows the compiler and programmer to identify which object is acquired. However, adding in arrays (\code{f4}) makes it much harder. Array lengths are not necessarily known in C, and even then, making sure objects are only acquired once becomes none-trivial. This problem can be extended to absurd limits like \code{f5}, which uses a graph of monitors. To make the issue tractable, this project imposes the requirement that a routine may only acquire one monitor per parameter and it must be the type of the parameter with at most one level of indirection (ignoring potential qualifiers). Also note that while routine \code{f3} can be supported, meaning that monitor \code{**m} is acquired, passing an array to this routine would be type-safe and yet result in undefined behaviour because only the first element of the array is acquired. However, this ambiguity is part of the C type-system with respects to arrays. For this reason, \code{mutex} is disallowed in the context where arrays may be passed: 106 106 \begin{cfacode} 107 int f1(monitor & mutex m);//Okay : recommended case108 int f2(monitor * mutex m); //Okay : could be an array but probably not107 int f1(monitor & mutex m); //Okay : recommended case 108 int f2(monitor * mutex m); //Not Okay : Could be an array 109 109 int f3(monitor mutex m []); //Not Okay : Array of unknown length 110 int f4(monitor ** mutex m);//Not Okay : Could be an array111 int f5(monitor * mutex m []); //Not Okay : Array of unknown length110 int f4(monitor ** mutex m); //Not Okay : Could be an array 111 int f5(monitor * mutex m []); //Not Okay : Array of unknown length 112 112 \end{cfacode} 113 113 Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic. 114 114 115 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to acquiremutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.115 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to specify the object that acquires mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls. 116 116 \begin{cfacode} 117 117 int f(MonitorA & mutex a, MonitorB & mutex b); … … 137 137 The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order. 138 138 139 However, such use leads to the lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore Undefined Behaviour. As shown~\cite{Lister77}, solving this problem requires:139 However, such use leads to lock acquiring order problems. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle difference means that calling these routines concurrently may lead to deadlock and is therefore undefined behaviour. As shown~\cite{Lister77}, solving this problem requires: 140 140 \begin{enumerate} 141 \item Dynamically tracking ofthe monitor-call order.141 \item Dynamically tracking the monitor-call order. 142 142 \item Implement rollback semantics. 143 143 \end{enumerate} … … 159 159 \subsection{\code{mutex} statement} \label{mutex-stmt} 160 160 161 The call semantics discussed above have one software engineering issue , only a namedroutine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.161 The call semantics discussed above have one software engineering issue: only a routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to work around the need for unnecessary names, avoiding a major software engineering problem~\cite{2FTwoHardThings}. Table \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters. 162 162 163 163 \begin{table} … … 216 216 \end{cfacode} 217 217 218 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is 218 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is: 219 219 \begin{cfacode} 220 220 trait is_monitor(dtype T) { … … 223 223 }; 224 224 \end{cfacode} 225 Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is Undefined Behaviour after the destructor has run.225 Note that the destructor of a monitor must be a \code{mutex} routine to prevent deallocation while a thread is accessing the monitor. As with any object, calls to a monitor, using \code{mutex} or otherwise, is undefined behaviour after the destructor has run. 226 226 227 227 % ====================================================================== … … 230 230 % ====================================================================== 231 231 % ====================================================================== 232 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.233 234 First, here is a simple example of internal scheduling 232 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronization. With monitors, this capability is generally achieved with internal or external scheduling as in~\cite{Hoare74}. With \textbf{scheduling} loosely defined as deciding which thread acquires the critical section next, \textbf{internal scheduling} means making the decision from inside the critical section (i.e., with access to the shared state), while \textbf{external scheduling} means making the decision when entering the critical section (i.e., without access to the shared state). Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side. 233 234 First, here is a simple example of internal scheduling: 235 235 236 236 \begin{cfacode} … … 253 253 } 254 254 \end{cfacode} 255 There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semanticis needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering.256 257 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency.255 There are two details to note here. First, \code{signal} is a delayed operation; it only unblocks the waiting thread when it reaches the end of the critical section. This semantics is needed to respect mutual-exclusion, i.e., the signaller and signalled thread cannot be in the monitor simultaneously. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, ensuring a basic ordering. 256 257 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to be the next thread to acquire the monitor (unless some other thread waited on the same condition). This guarantee offers the benefit of not having to loop around waits to recheck that a condition is met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design and implementation of \CFA concurrency. 258 258 259 259 % ====================================================================== … … 262 262 % ====================================================================== 263 263 % ====================================================================== 264 It is eas ier to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly namesthe monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A.264 It is easy to understand the problem of multi-monitor scheduling using a series of pseudo-code examples. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition variable as parameters and explicitly name the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a \emph{group} of monitors on first use (called branding), which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors. The example below shows the simple case of having two threads (one for each column) and a single monitor A. 265 265 266 266 \begin{multicols}{2} … … 299 299 \noindent This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers a group of monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate. 300 300 301 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem 301 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well-known deadlock problem is the Nested Monitor Problem~\cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem: 302 302 \begin{multicols}{2} 303 303 \begin{pseudo} … … 351 351 % ====================================================================== 352 352 353 A larger example is presented to show complex issues for \gls{bulk-acq} and all the implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex}-statement.353 A larger example is presented to show complex issues for \gls{bulk-acq} and its implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code to implement the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code, any method of introducing a monitor is acceptable, e.g., \code{mutex} parameters, global variables, pointer parameters, or using locals with the \code{mutex} statement. 354 354 355 355 \begin{figure}[!t] … … 446 446 \end{figure} 447 447 448 The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options.448 The complexity begins at code sections 4 and 8 in listing \ref{lst:int-bulk-pseudo}, which are where the existing semantics of internal scheduling needs to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired, which is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (listing \ref{lst:int-bulk-pseudo} line \ref{line:releaseFirst}), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership transfer is required in order to prevent barging into \code{B} by another thread, since both the signalling and signalled threads still need monitor \code{A}. There are three options: 449 449 450 450 \subsubsection{Delaying Signals} 451 The obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (i.e., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until it can be disbanded, which means the group is not passed further and a thread can retain its locks.452 453 However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen 451 The obvious solution to the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed, because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from multiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups. This solution releases the monitors once every monitor in a group can be released. However, since some monitors are never released (e.g., the monitor of a thread), this interpretation means a group might never be released. A more interesting interpretation is to transfer the group until all its monitors are released, which means the group is not passed further and a thread can retain its locks. 452 453 However, listing \ref{lst:int-secret} shows this solution can become much more complicated depending on what is executed while secretly holding B at line \ref{line:secret}, while avoiding the need to transfer ownership of a subset of the condition monitors. Listing \ref{lst:dependency} shows a slightly different example where a third thread is waiting on monitor \code{A}, using a different condition variable. Because the third thread is signalled when secretly holding \code{B}, the goal becomes unreachable. Depending on the order of signals (listing \ref{lst:dependency} line \ref{line:signal-ab} and \ref{line:signal-a}) two cases can happen: 454 454 455 455 \paragraph{Case 1: thread $\alpha$ goes first.} In this case, the problem is that monitor \code{A} needs to be passed to thread $\beta$ when thread $\alpha$ is done with it. … … 459 459 Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line \ref{line:signal-a} before line \ref{line:signal-ab} and get the reverse effect for listing \ref{lst:dependency}. 460 460 461 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to dispanda group becomes complex and inefficient (see next section) and therefore effectively precludes this approach.461 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means knowing when to release a group becomes complex and inefficient (see next section) and therefore effectively precludes this approach. 462 462 463 463 \subsubsection{Dependency graphs} … … 501 501 \end{figure} 502 502 503 In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner close to polynomial. This complexity explosioncan be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks.503 In listing \ref{lst:int-bulk-pseudo}, there is a solution that satisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} back to the signaller when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem is effectively resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and has a super-linear complexity. This complexity can be seen in listing \ref{lst:explosion}, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions. Furthermore, the presence of multiple solutions for ownership transfer can cause deadlock problems if a specific solution is not consistently picked; In the same way that multiple lock acquiring order can cause deadlocks. 504 504 \begin{figure} 505 505 \begin{multicols}{2} … … 530 530 \end{figure} 531 531 532 Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependenc y unfolds. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one.532 Given the three threads example in listing \ref{lst:dependency}, figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependencies unfold. Resolving dependency graphs being a complex and expensive endeavour, this solution is not the preferred one. 533 533 534 534 \subsubsection{Partial Signalling} \label{partial-sig} 535 Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions ,passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides.536 537 Using partial signalling, listing \ref{lst:dependency} can be solved easily 535 Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor \code{B} at lines \ref{line:signal1} to the waiter but does not wake the waiting thread since it is still using monitor \code{A}. Only when it reaches line \ref{line:lastRelease} does it actually wake up the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithms, which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any significant downsides. 536 537 Using partial signalling, listing \ref{lst:dependency} can be solved easily: 538 538 \begin{itemize} 539 539 \item When thread $\gamma$ reaches line \ref{line:release-ab} it transfers monitor \code{B} to thread $\alpha$ and continues to hold monitor \code{A}. … … 652 652 An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine. 653 653 654 The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits , this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the frondend and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call.654 The example in table \ref{tbl:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits; this behaviour requires additional synchronization when a two-way handshake is needed. To avoid this explicit synchronization, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This feature removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the front end and the back end of the call to \code{signal_block}, meaning no other thread can acquire the monitor either before or after the call. 655 655 656 656 % ====================================================================== … … 659 659 % ====================================================================== 660 660 % ====================================================================== 661 An alternative to internal scheduling is external scheduling , e.g., in \uC.662 \begin{ center}661 An alternative to internal scheduling is external scheduling (see Table~\ref{tbl:sched}). 662 \begin{table} 663 663 \begin{tabular}{|c|c|c|} 664 664 Internal Scheduling & External Scheduling & Go\\ … … 720 720 \end{gocode} 721 721 \end{tabular} 722 \end{center} 723 This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s. 722 \caption{Different forms of scheduling.} 723 \label{tbl:sched} 724 \end{table} 725 This method is more constrained and explicit, which helps users reduce the non-deterministic nature of concurrency. Indeed, as the following examples demonstrate, external scheduling allows users to wait for events from other threads without the concern of unrelated events occurring. External scheduling can generally be done either in terms of control flow (e.g., Ada with \code{accept}, \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strengths and weaknesses, but for this project, control-flow semantics was chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multiple-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s. 724 726 725 727 For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no other routine than \code{V} can acquire the monitor. … … 761 763 \end{tabular} 762 764 \end{center} 763 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in the following figure: 764 765 \begin{figure}[H] 766 \begin{center} 767 {\resizebox{0.4\textwidth}{!}{\input{monitor}}} 768 \end{center} 769 \label{fig:monitor} 765 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instructions. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in Figure~\ref{fig:ClassicalMonitor}. 766 767 \begin{figure} 768 \centering 769 \subfloat[Classical Monitor] { 770 \label{fig:ClassicalMonitor} 771 {\resizebox{0.45\textwidth}{!}{\input{monitor}}} 772 }% subfloat 773 \qquad 774 \subfloat[\Gls{bulk-acq} Monitor] { 775 \label{fig:BulkMonitor} 776 {\resizebox{0.45\textwidth}{!}{\input{ext_monitor}}} 777 }% subfloat 778 \caption{External Scheduling Monitor} 770 779 \end{figure} 771 780 772 There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects. 773 774 The alternative is to alter the implementation like this: 775 \begin{center} 776 {\resizebox{0.4\textwidth}{!}{\input{ext_monitor}}} 777 \end{center} 781 There are other alternatives to these pictures, but in the case of the left picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This approach requires a unique dense ordering of routines with an upper-bound and that ordering must be consistent across translation units. For OO languages these constraints are common, since objects only offer adding member routines consistently across translation units via inheritance. However, in \CFA users can extend objects with mutex routines that are only visible in certain translation unit. This means that establishing a program-wide dense-ordering among mutex routines can only be done in the program linking phase, and still could have issues when using dynamically shared objects. 782 783 The alternative is to alter the implementation as in Figure~\ref{fig:BulkMonitor}. 778 784 Here, the mutex routine called is associated with a thread on the entry queue while a list of acceptable routines is kept separate. Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function pointers replaces the single instruction bitmask comparison with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additional searches for the \code{waitfor} statement to check if a routine is already queued. 779 785 … … 793 799 \end{figure} 794 800 795 Note that in the secondpicture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity.801 Note that in the right picture, tasks need to always keep track of the monitors associated with mutex routines, and the routine mask needs to have both a function pointer and a set of monitors, as is discussed in the next section. These details are omitted from the picture for the sake of simplicity. 796 802 797 803 At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here, however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problem than writing locks that are as flexible as external scheduling in \CFA. … … 838 844 \end{cfacode} 839 845 840 Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is Undefined Behaviour.841 842 An important behaviour to note is when a set of monitors only match partially 846 Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is undefined behaviour. 847 848 An important behaviour to note is when a set of monitors only match partially: 843 849 844 850 \begin{cfacode} … … 862 868 } 863 869 \end{cfacode} 864 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake -up the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.870 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wakeup the waiting thread. It is also important to note that in the case of external scheduling the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition. 865 871 866 872 % ====================================================================== … … 870 876 % ====================================================================== 871 877 872 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading but overloading is possible.878 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expressions, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitors passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usages of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading, but overloading is possible. 873 879 \begin{figure} 874 880 \begin{cfacode}[caption={Various correct and incorrect uses of the waitfor statement},label={lst:waitfor}] … … 899 905 waitfor(f4, a1); //Incorrect : f4 ambiguous 900 906 901 waitfor(f2, a1, b2); //Undefined Behaviour : b2 not mutex907 waitfor(f2, a1, b2); //Undefined behaviour : b2 not mutex 902 908 } 903 909 \end{cfacode} 904 910 \end{figure} 905 911 906 Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2} ,demonstrates several complex masks and some incorrect ones.912 Finally, for added flexibility, \CFA supports constructing a complex \code{waitfor} statement using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} clauses can be chained together using \code{or}; this chain forms a single statement that uses baton pass to any function that fits one of the function+monitor set passed in. To enable users to tell which accepted function executed, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement, which is executed after the clause is triggered. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, which checks for a matching function call already arrived and otherwise continues. Any and all of these clauses can be preceded by a \code{when} condition to dynamically toggle the accept clauses on or off based on some current state. Listing \ref{lst:waitfor2} demonstrates several complex masks and some incorrect ones. 907 913 908 914 \begin{figure} … … 972 978 % ====================================================================== 973 979 % ====================================================================== 974 An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.980 An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense, since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip ordering of execution when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled. 975 981 \begin{figure} 976 982 \begin{cfacode}[caption={Example of an executor which executes action in series until the destructor is called.},label={lst:dtor-order}] -
doc/proposals/concurrency/text/frontpgs.tex
r326cd2b r9cdfb4d0 39 39 \vspace*{2.0cm} 40 40 41 Waterloo, Ontario, Canada, 201 7\\41 Waterloo, Ontario, Canada, 2018 \\ 42 42 43 43 \vspace*{1.0cm} 44 44 45 \copyright\ Thierry Delisle 201 7\\45 \copyright\ Thierry Delisle 2018 \\ 46 46 \end{center} 47 47 \end{titlepage} … … 154 154 % \newpage 155 155 156 % L I S T O F T A B L E S 157 % ----------------------------- 158 \addcontentsline{toc}{chapter}{List of Acronyms} 159 \printglossary[type=\acronymtype,title={List of Acronyms}] 160 \cleardoublepage 161 \phantomsection % allows hyperref to link to the correct page 162 156 163 % Change page numbering back to Arabic numerals 157 164 \pagenumbering{arabic} -
doc/proposals/concurrency/text/future.tex
r326cd2b r9cdfb4d0 11 11 12 12 \subsection{Performance} \label{futur:perf} 13 This thesis presents a first implementation of the \CFA runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant.13 This thesis presents a first implementation of the \CFA concurrency runtime. Therefore, there is still significant work to improve performance. Many of the data structures and algorithms may change in the future to more efficient versions. For example, the number of monitors in a single \gls{bulk-acq} is only bound by the stack size, this is probably unnecessarily generous. It may be possible that limiting the number helps increase performance. However, it is not obvious that the benefit would be significant. 14 14 15 15 \subsection{Flexible Scheduling} \label{futur:sched} 16 16 An important part of concurrency is scheduling. Different scheduling algorithms can affect performance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted to the requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbitrary scheduling algorithms. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA. 17 17 18 \subsection{Non-Blocking I O} \label{futur:nbio}19 While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, Non-Blocking IO is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear.18 \subsection{Non-Blocking I/O} \label{futur:nbio} 19 While most of the parallelism tools are aimed at data parallelism and control-flow parallelism, many modern workloads are not bound on computation but on IO operations, a common case being web servers and XaaS (anything as a service). These types of workloads often require significant engineering around amortizing costs of blocking IO operations. At its core, non-blocking I/O is an operating system level feature that allows queuing IO operations (e.g., network operations) and registering for notifications instead of waiting for requests to complete. In this context, the role of the language makes Non-Blocking IO easily available and with low overhead. The current trend is to use asynchronous programming using tools like callbacks and/or futures and promises, which can be seen in frameworks like Node.js~\cite{NodeJs} for JavaScript, Spring MVC~\cite{SpringMVC} for Java and Django~\cite{Django} for Python. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear. 20 20 21 21 \subsection{Other Concurrency Tools} \label{futur:tools} -
doc/proposals/concurrency/text/internals.tex
r326cd2b r9cdfb4d0 3 3 There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This approach avoids the chicken and egg problem~\cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal means that memory management is a constant concern in the design of the system. 4 4 5 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while muteroutines and blocking calls allow for an unbound amount, within the stack size.5 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues and all queues are designed with intrusive nodes, where each node has pre-allocated link fields for chaining, to avoid the need for memory allocation. Since several concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient.The only way to use a variable amount of memory without requiring memory allocation is to pre-allocate large buffers of memory eagerly and store the information in these buffers. Conveniently, the call stack fits that description and is easy to use, which is why it is used heavily in the implementation of internal scheduling, particularly variable-length arrays. Since stack allocation is based on scopes, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable-length array. The threads and the condition both have a fixed amount of memory, while \code{mutex} routines and blocking calls allow for an unbound amount, within the stack size. 6 6 7 7 Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristics of \CFA are considered as solved problems and therefore not discussed. … … 13 13 % ====================================================================== 14 14 15 The first step towards the monitor implementation is simple mute routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is Undefined Behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively.15 The first step towards the monitor implementation is simple \code{mutex} routines. In the single monitor case, mutual-exclusion is done using the entry/exit procedure in listing \ref{lst:entry1}. The entry/exit procedures do not have to be extended to support multiple monitors. Indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlock~\cite{Havender68}. In \CFA, ordering of monitor acquisition relies on memory ordering. This approach is sufficient because all objects are guaranteed to have distinct non-overlapping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behaviour. When a mutex call is made, the concerned monitors are aggregated into a variable-length pointer array and sorted based on pointer values. This array persists for the entire duration of the mutual-exclusion and its ordering reused extensively. 16 16 \begin{figure} 17 17 \begin{multicols}{2} … … 39 39 \end{figure} 40 40 41 \subsection{ 41 \subsection{Details: Interaction with polymorphism} 42 42 Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues. 43 43 44 First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options: \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example:44 First of all, interaction between \code{otype} polymorphism (see Section~\ref{s:ParametricPolymorphism}) and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options: \glspl{callsite-locking} and entry-point locking, i.e., acquiring the monitors before making a mutex routine-call or as the first operation of the mutex routine-call. For example: 45 45 \begin{table}[H] 46 46 \begin{center} … … 109 109 \end{cfacode} 110 110 111 Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e. : the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine multiple times,but there is only one entry point for many call sites.111 Both entry point and \gls{callsite-locking} are feasible implementations. The current \CFA implementation uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. It is harder to use \gls{raii} for call-site locking, as it does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e., the function body. For example, the monitor call can appear in the middle of an expression. Furthermore, entry-point locking requires less code generation since any useful routine is called multiple times but there is only one entry point for many call sites. 112 112 113 113 % ====================================================================== … … 127 127 \end{figure} 128 128 129 \subsection{Processors} 130 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically \texttt{pthread}s in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics. 131 132 \subsection{Stack Management} 133 One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all \texttt{pthread}s created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create their own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e., the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large. 134 129 135 \subsection{Context Switching} 130 As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA but the changes required to add it are strictly additive. 131 132 \subsection{Processors} 133 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operating-system libraries. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \gls{uthread} from the scheduler and run it; they are effectively executers for user-threads. The main benefit of this approach is that it offers a well-defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics. 134 135 \subsection{Stack Management} 136 One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all pthreads created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create there own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the \gls{kthread} stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e. the initial \gls{kthread} that is given to any program. In order to respect C user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor, which can grow very large. 136 As mentioned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading, because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads, however, do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operations happen. Obviously, this doubles the context-switch cost because threads must context-switch to an intermediate stack. The alternative 1-step context-switch uses the stack of the ``from'' thread to schedule and then context-switches directly to the ``to'' thread. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield} (see section \ref{results}). Additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch (akin to the Microsoft \code{SwitchToFiber}~\cite{switchToWindows} routine). This option is not currently present in \CFA, but the changes required to add it are strictly additive. 137 137 138 138 \subsection{Preemption} \label{preemption} 139 139 Finally, an important aspect for any complete threading system is preemption. As mentioned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desirable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs a long loop can starve other threads, while in a preemptive system, starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload. However any preemptive system can become a cooperative system by making the time slices extremely large. Therefore, \CFA uses a preemptive threading system. 140 140 141 Preemption in \CFA is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e.:141 Preemption in \CFA\footnote{Note that the implementation of preemption is strongly tied with the underlying threading system. For this reason, only the Linux implementation is cover, \CFA does not run on Windows at the time of writting} is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it inserts the time in a sorted order and sets a kernel timer for the closest one, effectively stepping through preemption events on each signal sent by the timer. These timers use the Linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem, because when delivering signals to a process, the kernel can deliver the signal to any kernel thread for which the signal is not blocked, i.e.: 142 142 \begin{quote} 143 143 A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal. 144 144 SIGNAL(7) - Linux Programmer's Manual 145 145 \end{quote} 146 For the sake of simplicity and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one. Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.147 148 Involuntary context-switching is done by sending signal {\tt SIGUSER1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal-handler frame is eventually unwound when the thread is scheduled again. As a result, a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.146 For the sake of simplicity, and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every kernel thread except one. 147 148 Now because of how involuntary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread. Hence, involuntary context-switching is done by sending signal {\tt SIGUSR1} to the corresponding proces\-sor and having the thread yield from inside the signal handler. This approach effectively context-switches away from the signal handler back to the kernel and the signal handler frame is eventually unwound when the thread is scheduled again. As a result, a signal handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal handlers save and restore signal masks because user-thread migration can cause a signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads, among which a user thread can migrate, differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distinguishes ``async-signal-safe'' functions from other functions.}. However, since the kernel thread handling preemption requires a different signal mask, executing user threads on the kernel-alarm thread can cause deadlocks. For this reason, the alarm thread is in a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent through the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel. 149 149 150 150 \subsection{Scheduler} … … 156 156 % ====================================================================== 157 157 % ====================================================================== 158 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig: monitor} for convenience):158 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:ClassicalMonitor} for convenience): 159 159 160 160 \begin{figure}[H] … … 177 177 \end{figure} 178 178 179 This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split the threadinto N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack.179 This picture and the proper entry and leave algorithms (see listing \ref{lst:entry2}) is the fundamental implementation of internal scheduling. Note that when a thread is moved from the condition to the AS-stack, it is conceptually split into N pieces, where N is the number of monitors specified in the parameter list. The thread is woken up when all the pieces have popped from the AS-stacks and made active. In this picture, the threads are split into halves but this is only because there are two monitors. For a specific signalling operation every monitor needs a piece of thread on its AS-stack. 180 180 181 181 \begin{figure}[b] … … 210 210 \end{figure} 211 211 212 Some important things to notice about the exit routine.The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines.212 The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a separate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structures used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the call stack of the \code{wait} and \code{signal_block} routines. 213 213 214 214 \begin{figure}[H] … … 220 220 \end{figure} 221 221 222 Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive stacks for linking onto monitor. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}.222 Figure \ref{fig:structs} shows a high-level representation of these data structures. The main idea behind them is that, a thread cannot contain an arbitrary number of intrusive ``next'' pointers for linking onto monitors. The \code{condition node} is the data structure that is queued onto a condition variable and, when signalled, the condition queue is popped and each \code{condition criterion} is moved to the AS-stack. Once all the criteria have been popped from their respective AS-stacks, the thread is woken up, which is what is shown in listing \ref{lst:entry2}. 223 223 224 224 % ====================================================================== … … 227 227 % ====================================================================== 228 228 % ====================================================================== 229 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation ( e.g., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.230 231 This algorithm choice has two consequences 229 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentioned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables, which are still unique for a given scheduling operation (i.e., no signal statement uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. These monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statement. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering (sorting by address) and specify that the monitor that is acquired first is the one with the relevant waiting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint. 230 231 This algorithm choice has two consequences: 232 232 \begin{itemize} 233 233 \item The queue of the monitor with the lowest address is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same lowest address monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing. 234 234 \item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor is the monitor which has the lowest address, every monitor needs to have the correct queues even though it is possible that some queues go unused for the entire duration of the program, for example if a monitor is only used in a specific pair. 235 235 \end{itemize} 236 Therefore, the following modifications need to be made to support external scheduling 236 Therefore, the following modifications need to be made to support external scheduling: 237 237 \begin{itemize} 238 \item The threads waiting on the entry -queue need to keep track of which routine it is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stackso the thread only needs to keep a pointer to that information.238 \item The threads waiting on the entry queue need to keep track of which routine they are trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack, so the thread only needs to keep a pointer to that information. 239 239 \item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that if a thread has acquired two monitors but executes a \code{waitfor} with only one monitor as a parameter, setting the mask of acceptable routines to both monitors will not cause any problems since the extra monitor will not change ownership regardless. This becomes relevant when \code{when} clauses affect the number of monitors passed to a \code{waitfor} statement. 240 \item The entry/exit routine need to be updated as shown in listing \ref{lst:entry3}.240 \item The entry/exit routines need to be updated as shown in listing \ref{lst:entry3}. 241 241 \end{itemize} 242 242 243 243 \subsection{External Scheduling - Destructors} 244 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The waitforsemantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}244 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is needed for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the call stack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The \code{waitfor} semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor} 245 245 246 246 \begin{figure} -
doc/proposals/concurrency/text/intro.tex
r326cd2b r9cdfb4d0 2 2 \chapter{Introduction} 3 3 % ====================================================================== 4 5 This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also comes with an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator. 4 This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, called \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also provides an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator. 6 5 7 6 There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization. 7 8 In the context of this thesis, a \textbf{thread} is a fundamental unit of execution that runs a sequence of code, generally on a program stack. Having multiple simultaneous threads gives rise to concurrency and generally requires some kind of locking mechanism to ensure proper execution. Correspondingly, \textbf{concurrency} is defined as the concepts and challenges that occur when multiple independent (sharing memory, timing dependencies, etc.) concurrent threads are introduced. Accordingly, \textbf{locking} (and by extension locks) are defined as a mechanism that prevents the progress of certain threads in order to avoid problems due to concurrency. Finally, in this thesis \textbf{parallelism} is distinct from concurrency and is defined as running multiple threads simultaneously. More precisely, parallelism implies \emph{actual} simultaneous execution as opposed to concurrency which only requires \emph{apparent} simultaneous execution. As such, parallelism is only observable in the differences in performance or, more generally, differences in timing. -
doc/proposals/concurrency/text/parallelism.tex
r326cd2b r9cdfb4d0 7 7 % # # # # # # # ####### ####### ####### ####### ### ##### # # 8 8 \chapter{Parallelism} 9 Historically, computer performance was about processor speeds and instruction s count. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is notlonger reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads.9 Historically, computer performance was about processor speeds and instruction counts. However, with heat dissipation being a direct consequence of speed increase, parallelism has become the new source for increased performance~\cite{Sutter05, Sutter05b}. In this decade, it is no longer reasonable to create a high-performance application without caring about parallelism. Indeed, parallelism is an important aspect of performance and more specifically throughput and hardware utilization. The lowest-level approach of parallelism is to use \glspl{kthread} in combination with semantics like \code{fork}, \code{join}, etc. However, since these have significant costs and limitations, \glspl{kthread} are now mostly used as an implementation tool rather than a user oriented one. There are several alternatives to solve these issues that all have strengths and weaknesses. While there are many variations of the presented paradigms, most of these variations do not actually change the guarantees or the semantics, they simply move costs in order to achieve better performance for certain workloads. 10 10 11 11 \section{Paradigms} 12 12 \subsection{User-Level Threads} 13 A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees but the parallelism toolkit offers very little to reduce complexity in itself.13 A direct improvement on the \gls{kthread} approach is to use \glspl{uthread}. These threads offer most of the same features that the operating system already provides but can be used on a much larger scale. This approach is the most powerful solution as it allows all the features of multithreading, while removing several of the more expensive costs of kernel threads. The downside is that almost none of the low-level threading problems are hidden; users still have to think about data races, deadlocks and synchronization issues. These issues can be somewhat alleviated by a concurrency toolkit with strong guarantees, but the parallelism toolkit offers very little to reduce complexity in itself. 14 14 15 15 Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}. 16 16 17 17 \subsection{Fibers : User-Level Threads Without Preemption} \label{fibers} 18 A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.18 A popular variant of \glspl{uthread} is what is often referred to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantic differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the latter. Advocates of \glspl{fiber} list their high performance and ease of implementation as major strengths, but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers. 19 19 20 20 An example of a language that uses fibers is Go~\cite{Go} … … 26 26 27 27 \subsection{Paradigm Performance} 28 While the choice between the three paradigms listed above may have significant performance implication , it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees that the \gls{pool}-based system has the best performance thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are largeenough the paradigm choice is largely amortized by the actual work done.28 While the choice between the three paradigms listed above may have significant performance implications, it is difficult to pin down the performance implications of choosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantees equivalent performance across paradigms and that the \gls{pool}-based system has the best efficiency thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilization, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large, enough the paradigm choice is largely amortized by the actual work done. 29 29 30 30 \section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel} 31 A \gls{cfacluster} is a group of \gls {kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a plugable scheduler that can optimize the workload generated by the \glspl{uthread}.31 A \gls{cfacluster} is a group of \glspl{kthread} executed in isolation. \Glspl{uthread} are scheduled on the \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{uthread} and \glspl{kthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogeneous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues. A \gls{cfacluster} also offers a pluggable scheduler that can optimize the workload generated by the \glspl{uthread}. 32 32 33 \Glspl{cfacluster} have not been fully implemented in the context of this thesis , currently \CFA only supports one \gls{cfacluster}, the initial one.33 \Glspl{cfacluster} have not been fully implemented in the context of this thesis. Currently \CFA only supports one \gls{cfacluster}, the initial one. 34 34 35 35 \subsection{Future Work: Machine Setup}\label{machine} 36 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and \/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.36 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heterogeneous setups. For example, a system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certain CPU cores. OS support for CPU affinity is now common~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity. 37 37 38 38 \subsection{Paradigms}\label{cfaparadigms} -
doc/proposals/concurrency/text/results.tex
r326cd2b r9cdfb4d0 38 38 39 39 \section{Micro Benchmarks} 40 All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code 40 All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code: 41 41 \begin{pseudo} 42 #define BENCH(run, result) 43 before = gettime(); 44 run; 45 after = gettime(); 42 #define BENCH(run, result) \ 43 before = gettime(); \ 44 run; \ 45 after = gettime(); \ 46 46 result = (after - before) / N; 47 47 \end{pseudo} … … 49 49 50 50 \subsection{Context-Switching} 51 The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. In order to make the comparison fair, coroutines also execute a 2-step context-switch (\gls{uthread} to \gls{kthread} then \gls{kthread} to \gls{uthread}), which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests.51 The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. Yielding causes the thread to context-switch to the scheduler and back, more precisely: from the \gls{uthread} to the \gls{kthread} then from the \gls{kthread} back to the same \gls{uthread} (or a different one in the general case). In order to make the comparison fair, coroutines also execute a 2-step context-switch by resuming another coroutine which does nothing but suspending in a tight loop, which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads with the results in table \ref{tab:ctx-switch}. All omitted tests are functionally identical to one of these tests. The difference between coroutines and threads can be attributed to the cost of scheduling. 52 52 \begin{figure} 53 53 \begin{multicols}{2} … … 115 115 116 116 \subsection{Mutual-Exclusion} 117 The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a pthread mutexlock is also measured. The results can be shown in table \ref{tab:mutex}.117 The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest approach is to measure how long it takes to enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a \code{pthread_mutex} lock is also measured. The results can be shown in table \ref{tab:mutex}. 118 118 119 119 \begin{figure} … … 199 199 \multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\ 200 200 \hline 201 Pthreads Condition Variable & 5902.5 & 6093.29 & 714.78 \\ 201 202 \uC \code{signal} & 322 & 323 & 3.36 \\ 202 203 \CFA \code{signal}, 1 \code{monitor} & 352.5 & 353.11 & 3.66 \\ … … 265 266 266 267 \subsection{Object Creation} 267 Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for pthreads and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.268 269 \begin{figure} 270 \begin{center} 271 pthread 268 Finally, the last benchmark measures the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for \texttt{pthread}s and \CFA threads, with results shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low. 269 270 \begin{figure} 271 \begin{center} 272 \texttt{pthread} 272 273 \begin{ccode} 273 274 int main() { … … 306 307 \end{cfacode} 307 308 \end{center} 308 \begin{cfacode}[caption={Benchmark code for pthreads and \CFA to measure object creation},label={lst:creation}]309 \begin{cfacode}[caption={Benchmark code for \texttt{pthread}s and \CFA to measure object creation},label={lst:creation}] 309 310 \end{cfacode} 310 311 \end{figure} -
doc/proposals/concurrency/text/together.tex
r326cd2b r9cdfb4d0 7 7 8 8 \section{Threads As Monitors} 9 As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine 9 As it was subtly alluded in section \ref{threads}, \code{thread}s in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine: 10 10 \begin{figure}[H] 11 11 \begin{cfacode}[caption={Toy simulator using \code{thread}s and \code{monitor}s.},label={lst:engine-v1}] … … 38 38 \end{cfacode} 39 39 \end{figure} 40 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner 40 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner: 41 41 \begin{figure}[H] 42 42 \begin{cfacode}[caption={Same toy simulator with proper termination condition.},label={lst:engine-v2}] -
doc/proposals/concurrency/thesis.tex
r326cd2b r9cdfb4d0 23 23 \usepackage{calc} 24 24 \usepackage{xspace} 25 \usepackage[labelformat=simple]{subfig} 26 \renewcommand{\thesubfigure}{(\alph{subfigure})} 25 27 \usepackage{graphicx} 26 28 \usepackage{tabularx} … … 34 36 \usepackage[usenames]{color} 35 37 \usepackage[pagewise]{lineno} 38 \renewcommand{\linenumberfont}{\scriptsize\sffamily} 36 39 \usepackage{fancyhdr} 37 40 \usepackage{float} 38 \renewcommand{\linenumberfont}{\scriptsize\sffamily}39 41 \usepackage{siunitx} 40 42 \sisetup{ binary-units=true } 41 43 \input{style} % bespoke macros used in the document 44 \usepackage{url} 42 45 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref} 43 46 \usepackage{breakurl} 47 \urlstyle{rm} 44 48 45 49 \usepackage{tikz} 46 50 \def\checkmark{\tikz\fill[scale=0.4](0,.35) -- (.25,0) -- (1,.7) -- (.25,.15) -- cycle;} 47 48 \renewcommand{\UrlFont}{\small\sf}49 51 50 52 \setlength{\topmargin}{-0.45in} % move running title into header … … 123 125 \input{future} 124 126 125 \clearpage126 \printglossary[type=\acronymtype]127 \printglossary128 127 129 128 \clearpage 129 130 % B I B L I O G R A P H Y 131 % ----------------------------- 132 \addcontentsline{toc}{chapter}{Bibliography} 130 133 \bibliographystyle{plain} 131 \bibliography{cw92,distSharedMem,lfp92,mlw92,parallel,parallelIO,partheory,pl,pldi92,ps,realtime,techreportsPAB,visual,local} 134 \bibliography{pl,local} 135 \cleardoublepage 136 \phantomsection % allows hyperref to link to the correct page 137 138 % G L O S S A R Y 139 % ----------------------------- 140 \addcontentsline{toc}{chapter}{Glossary} 141 \printglossary 142 \cleardoublepage 143 \phantomsection % allows hyperref to link to the correct page 132 144 133 145 -
doc/proposals/concurrency/version
r326cd2b r9cdfb4d0 1 0.11. 2991 0.11.403 -
src/CodeGen/FixMain.cc
r326cd2b r9cdfb4d0 23 23 24 24 #include "Common/SemanticError.h" // for SemanticError 25 #include "CodeGen/GenType.h" // for GenType 25 26 #include "SynTree/Declaration.h" // for FunctionDecl, operator<< 26 27 #include "SynTree/Type.h" // for FunctionType … … 29 30 bool FixMain::replace_main = false; 30 31 std::unique_ptr<FunctionDecl> FixMain::main_signature = nullptr; 32 33 template<typename container> 34 std::string genTypeAt(const container& p, size_t idx) { 35 return genType((*std::next(p.begin(), idx))->get_type(), ""); 36 } 31 37 32 38 void FixMain::registerMain(FunctionDecl* functionDecl) … … 43 49 44 50 os << main_signature->get_scopedMangleName() << "("; 45 switch(main_signature->get_functionType()->get_parameters().size()) { 46 case 3: os << "argc, argv, envp"; break; 47 case 2: os << "argc, argv"; break; 51 const auto& params = main_signature->get_functionType()->get_parameters(); 52 switch(params.size()) { 53 case 3: os << "(" << genTypeAt(params, 0) << ")argc, (" << genTypeAt(params, 1) << ")argv, (" << genTypeAt(params, 2) << ")envp"; break; 54 case 2: os << "(" << genTypeAt(params, 0) << ")argc, (" << genTypeAt(params, 1) << ")argv"; break; 48 55 case 0: break; 49 56 default : assert(false); -
src/CodeTools/TrackLoc.cc
r326cd2b r9cdfb4d0 64 64 } 65 65 else { 66 assertf( false, "Top level node has no CodeLocation %s", name.c_str() );66 assertf( false, "Top level node has no CodeLocation %s", toString( node ).c_str() ); 67 67 } 68 68 } -
src/Common/PassVisitor.h
r326cd2b r9cdfb4d0 19 19 #include "SynTree/Expression.h" 20 20 #include "SynTree/Constant.h" 21 #include "SynTree/TypeSubstitution.h" 21 22 class TypeSubstitution; 22 23 23 24 #include "PassVisitor.proto.h" … … 403 404 }; 404 405 406 #include "SynTree/TypeSubstitution.h" 405 407 #include "PassVisitor.impl.h" -
src/Common/PassVisitor.impl.h
r326cd2b r9cdfb4d0 62 62 63 63 template< typename pass_type > 64 staticinline void acceptAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& visitor ) {64 inline void acceptAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& visitor ) { 65 65 DeclList_t* beforeDecls = visitor.get_beforeDecls(); 66 66 DeclList_t* afterDecls = visitor.get_afterDecls(); … … 90 90 91 91 template< typename pass_type > 92 staticinline void mutateAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& mutator ) {92 inline void mutateAll( std::list< Declaration* > &decls, PassVisitor< pass_type >& mutator ) { 93 93 DeclList_t* beforeDecls = mutator.get_beforeDecls(); 94 94 DeclList_t* afterDecls = mutator.get_afterDecls(); -
src/Concurrency/Keywords.cc
r326cd2b r9cdfb4d0 257 257 // Generic keyword implementation 258 258 //============================================================================================= 259 void fixupGenerics(FunctionType * func, StructDecl * decl) { 260 cloneAll(decl->parameters, func->forall); 261 for ( TypeDecl * td : func->forall ) { 262 strict_dynamic_cast<StructInstType*>( 263 func->parameters.front()->get_type()->stripReferences() 264 )->parameters.push_back( 265 new TypeExpr( new TypeInstType( noQualifiers, td->name, td ) ) 266 ); 267 } 268 } 269 259 270 void ConcurrentSueKeyword::postvisit(StructDecl * decl) { 260 271 if( decl->name == type_name && decl->body ) { … … 301 312 ); 302 313 303 get_type->get_parameters().push_back( this_decl );314 get_type->get_parameters().push_back( this_decl->clone() ); 304 315 get_type->get_returnVals().push_back( 305 316 new ObjectDecl( … … 318 329 ) 319 330 ); 331 fixupGenerics(get_type, decl); 320 332 321 333 FunctionDecl * get_decl = new FunctionDecl( … … 343 355 nullptr 344 356 ); 345 } 357 fixupGenerics(main_type, decl); 358 } 359 360 delete this_decl; 346 361 347 362 declsToAddBefore.push_back( forward ); … … 377 392 new MemberExpr( 378 393 field, 379 UntypedExpr::createDeref( new VariableExpr( func->get_functionType()->get_parameters().front() ) ) 394 new CastExpr( 395 new VariableExpr( func->get_functionType()->get_parameters().front() ), 396 func->get_functionType()->get_parameters().front()->get_type()->stripReferences()->clone() 397 ) 380 398 ) 381 399 ) -
src/ControlStruct/LabelFixer.cc
r326cd2b r9cdfb4d0 44 44 45 45 void LabelFixer::postvisit( FunctionDecl * functionDecl ) { 46 MLEMutatormlemut( resolveJumps(), generator );46 PassVisitor<MLEMutator> mlemut( resolveJumps(), generator ); 47 47 functionDecl->acceptMutator( mlemut ); 48 48 } -
src/ControlStruct/MLEMutator.cc
r326cd2b r9cdfb4d0 46 46 void MLEMutator::fixBlock( std::list< Statement * > &kids ) { 47 47 for ( std::list< Statement * >::iterator k = kids.begin(); k != kids.end(); k++ ) { 48 *k = (*k)->acceptMutator(* this);48 *k = (*k)->acceptMutator(*visitor); 49 49 50 50 if ( ! get_breakLabel().empty() ) { … … 57 57 } 58 58 59 CompoundStmt* MLEMutator::mutate( CompoundStmt *cmpndStmt ) { 60 bool labeledBlock = !(cmpndStmt->get_labels().empty()); 59 void MLEMutator::premutate( CompoundStmt *cmpndStmt ) { 60 visit_children = false; 61 bool labeledBlock = !(cmpndStmt->labels.empty()); 61 62 if ( labeledBlock ) { 62 63 Label brkLabel = generator->newLabel("blockBreak", cmpndStmt); … … 65 66 66 67 // a child statement may set the break label - if they do, attach it to the next statement 67 std::list< Statement * > &kids = cmpndStmt-> get_kids();68 std::list< Statement * > &kids = cmpndStmt->kids; 68 69 fixBlock( kids ); 69 70 … … 75 76 enclosingControlStructures.pop_back(); 76 77 } // if 77 78 return cmpndStmt; 79 } 80 81 template< typename LoopClass > 82 Statement *MLEMutator::handleLoopStmt( LoopClass *loopStmt ) { 83 // remember this as the most recent enclosing loop, then mutate the body of the loop -- this will determine 84 // whether brkLabel and contLabel are used with branch statements and will recursively do the same to nested 85 // loops 86 Label brkLabel = generator->newLabel("loopBreak", loopStmt); 87 Label contLabel = generator->newLabel("loopContinue", loopStmt); 88 enclosingControlStructures.push_back( Entry( loopStmt, brkLabel, contLabel ) ); 89 loopStmt->set_body ( loopStmt->get_body()->acceptMutator( *this ) ); 90 91 assert( ! enclosingControlStructures.empty() ); 92 Entry &e = enclosingControlStructures.back(); 93 // sanity check that the enclosing loops have been popped correctly 94 assert ( e == loopStmt ); 95 96 // this will take the necessary steps to add definitions of the previous two labels, if they are used. 97 loopStmt->set_body( mutateLoop( loopStmt->get_body(), e ) ); 98 enclosingControlStructures.pop_back(); 99 100 return loopStmt; 101 } 102 103 Statement *MLEMutator::mutate( CaseStmt *caseStmt ) { 104 caseStmt->set_condition( maybeMutate( caseStmt->get_condition(), *this ) ); 105 fixBlock( caseStmt->get_statements() ); 106 107 return caseStmt; 108 } 109 110 template< typename IfClass > 111 Statement *MLEMutator::handleIfStmt( IfClass *ifStmt ) { 112 // generate a label for breaking out of a labeled if 113 bool labeledBlock = !(ifStmt->get_labels().empty()); 114 if ( labeledBlock ) { 115 Label brkLabel = generator->newLabel("blockBreak", ifStmt); 116 enclosingControlStructures.push_back( Entry( ifStmt, brkLabel ) ); 117 } // if 118 119 Parent::mutate( ifStmt ); 120 121 if ( labeledBlock ) { 122 if ( ! enclosingControlStructures.back().useBreakExit().empty() ) { 123 set_breakLabel( enclosingControlStructures.back().useBreakExit() ); 124 } // if 125 enclosingControlStructures.pop_back(); 126 } // if 127 return ifStmt; 128 } 129 130 template< typename SwitchClass > 131 Statement *MLEMutator::handleSwitchStmt( SwitchClass *switchStmt ) { 132 // generate a label for breaking out of a labeled switch 133 Label brkLabel = generator->newLabel("switchBreak", switchStmt); 134 enclosingControlStructures.push_back( Entry(switchStmt, brkLabel) ); 135 mutateAll( switchStmt->get_statements(), *this ); 136 137 Entry &e = enclosingControlStructures.back(); 138 assert ( e == switchStmt ); 139 140 // only generate break label if labeled break is used 141 if ( e.isBreakUsed() ) { 142 // for the purposes of keeping switch statements uniform (i.e. all statements that are direct children of a 143 // switch should be CastStmts), append the exit label + break to the last case statement; create a default 144 // case if there are no cases 145 std::list< Statement * > &statements = switchStmt->get_statements(); 146 if ( statements.empty() ) { 147 statements.push_back( CaseStmt::makeDefault() ); 148 } // if 149 150 if ( CaseStmt * c = dynamic_cast< CaseStmt * >( statements.back() ) ) { 151 Statement * stmt = new BranchStmt( Label("brkLabel"), BranchStmt::Break ); 152 stmt->labels.push_back( brkLabel ); 153 c->get_statements().push_back( stmt ); 154 } else assert(0); // as of this point, all statements of a switch are still CaseStmts 155 } // if 156 157 assert ( enclosingControlStructures.back() == switchStmt ); 158 enclosingControlStructures.pop_back(); 159 return switchStmt; 160 } 78 } 79 161 80 162 81 void addUnused( Statement * stmt, const Label & originalTarget ) { … … 179 98 180 99 181 Statement *MLEMutator:: mutate( BranchStmt *branchStmt ) throw ( SemanticError ) {182 std::string originalTarget = branchStmt-> get_originalTarget();100 Statement *MLEMutator::postmutate( BranchStmt *branchStmt ) throw ( SemanticError ) { 101 std::string originalTarget = branchStmt->originalTarget; 183 102 184 103 std::list< Entry >::reverse_iterator targetEntry; … … 215 134 // branch error checks, get the appropriate label name and create a goto 216 135 Label exitLabel; 217 switch ( branchStmt-> get_type()) {136 switch ( branchStmt->type ) { 218 137 case BranchStmt::Break: 219 138 assert( targetEntry->useBreakExit() != ""); … … 229 148 230 149 // add unused attribute to label to silence warnings 231 addUnused( targetEntry->get_controlStructure(), branchStmt-> get_originalTarget());150 addUnused( targetEntry->get_controlStructure(), branchStmt->originalTarget ); 232 151 233 152 // transform break/continue statements into goto to simplify later handling of branches … … 260 179 } 261 180 262 Statement *MLEMutator::mutate( WhileStmt *whileStmt ) { 263 return handleLoopStmt( whileStmt ); 264 } 265 266 Statement *MLEMutator::mutate( ForStmt *forStmt ) { 267 return handleLoopStmt( forStmt ); 268 } 269 270 Statement *MLEMutator::mutate( IfStmt *ifStmt ) { 271 return handleIfStmt( ifStmt ); 272 } 273 274 Statement *MLEMutator::mutate( SwitchStmt *switchStmt ) { 275 return handleSwitchStmt( switchStmt ); 181 template< typename LoopClass > 182 void MLEMutator::prehandleLoopStmt( LoopClass * loopStmt ) { 183 // remember this as the most recent enclosing loop, then mutate the body of the loop -- this will determine 184 // whether brkLabel and contLabel are used with branch statements and will recursively do the same to nested 185 // loops 186 Label brkLabel = generator->newLabel("loopBreak", loopStmt); 187 Label contLabel = generator->newLabel("loopContinue", loopStmt); 188 enclosingControlStructures.push_back( Entry( loopStmt, brkLabel, contLabel ) ); 189 } 190 191 template< typename LoopClass > 192 Statement * MLEMutator::posthandleLoopStmt( LoopClass * loopStmt ) { 193 assert( ! enclosingControlStructures.empty() ); 194 Entry &e = enclosingControlStructures.back(); 195 // sanity check that the enclosing loops have been popped correctly 196 assert ( e == loopStmt ); 197 198 // this will take the necessary steps to add definitions of the previous two labels, if they are used. 199 loopStmt->set_body( mutateLoop( loopStmt->get_body(), e ) ); 200 enclosingControlStructures.pop_back(); 201 return loopStmt; 202 } 203 204 void MLEMutator::premutate( WhileStmt * whileStmt ) { 205 return prehandleLoopStmt( whileStmt ); 206 } 207 208 void MLEMutator::premutate( ForStmt * forStmt ) { 209 return prehandleLoopStmt( forStmt ); 210 } 211 212 Statement * MLEMutator::postmutate( WhileStmt * whileStmt ) { 213 return posthandleLoopStmt( whileStmt ); 214 } 215 216 Statement * MLEMutator::postmutate( ForStmt * forStmt ) { 217 return posthandleLoopStmt( forStmt ); 218 } 219 220 void MLEMutator::premutate( IfStmt * ifStmt ) { 221 // generate a label for breaking out of a labeled if 222 bool labeledBlock = !(ifStmt->get_labels().empty()); 223 if ( labeledBlock ) { 224 Label brkLabel = generator->newLabel("blockBreak", ifStmt); 225 enclosingControlStructures.push_back( Entry( ifStmt, brkLabel ) ); 226 } // if 227 } 228 229 Statement * MLEMutator::postmutate( IfStmt * ifStmt ) { 230 bool labeledBlock = !(ifStmt->get_labels().empty()); 231 if ( labeledBlock ) { 232 if ( ! enclosingControlStructures.back().useBreakExit().empty() ) { 233 set_breakLabel( enclosingControlStructures.back().useBreakExit() ); 234 } // if 235 enclosingControlStructures.pop_back(); 236 } // if 237 return ifStmt; 238 } 239 240 void MLEMutator::premutate( CaseStmt *caseStmt ) { 241 visit_children = false; 242 caseStmt->condition = maybeMutate( caseStmt->condition, *visitor ); 243 fixBlock( caseStmt->stmts ); 244 } 245 246 void MLEMutator::premutate( SwitchStmt *switchStmt ) { 247 // generate a label for breaking out of a labeled switch 248 Label brkLabel = generator->newLabel("switchBreak", switchStmt); 249 enclosingControlStructures.push_back( Entry(switchStmt, brkLabel) ); 250 } 251 252 Statement * MLEMutator::postmutate( SwitchStmt * switchStmt ) { 253 Entry &e = enclosingControlStructures.back(); 254 assert ( e == switchStmt ); 255 256 // only generate break label if labeled break is used 257 if ( e.isBreakUsed() ) { 258 // for the purposes of keeping switch statements uniform (i.e. all statements that are direct children of a 259 // switch should be CastStmts), append the exit label + break to the last case statement; create a default 260 // case if there are no cases 261 std::list< Statement * > &statements = switchStmt->statements; 262 if ( statements.empty() ) { 263 statements.push_back( CaseStmt::makeDefault() ); 264 } // if 265 266 if ( CaseStmt * c = dynamic_cast< CaseStmt * >( statements.back() ) ) { 267 Statement * stmt = new BranchStmt( Label("brkLabel"), BranchStmt::Break ); 268 stmt->labels.push_back( e.useBreakExit() ); 269 c->stmts.push_back( stmt ); 270 } else assert(0); // as of this point, all statements of a switch are still CaseStmts 271 } // if 272 273 assert ( enclosingControlStructures.back() == switchStmt ); 274 enclosingControlStructures.pop_back(); 275 return switchStmt; 276 276 } 277 277 } // namespace ControlStruct -
src/ControlStruct/MLEMutator.h
r326cd2b r9cdfb4d0 20 20 #include <string> // for string 21 21 22 #include "Common/PassVisitor.h" 22 23 #include "Common/SemanticError.h" // for SemanticError 23 24 #include "SynTree/Label.h" // for Label … … 26 27 27 28 namespace ControlStruct { 28 class LabelGenerator;29 class LabelGenerator; 29 30 30 class MLEMutator : public Mutator{31 class MLEMutator : public WithVisitorRef<MLEMutator>, public WithShortCircuiting { 31 32 class Entry; 32 33 33 typedef Mutator Parent;34 34 public: 35 35 MLEMutator( std::map<Label, Statement *> *t, LabelGenerator *gen = 0 ) : targetTable( t ), breakLabel(std::string("")), generator( gen ) {} 36 36 ~MLEMutator(); 37 37 38 virtual CompoundStmt *mutate( CompoundStmt *cmpndStmt ) override; 39 virtual Statement *mutate( WhileStmt *whileStmt ) override; 40 virtual Statement *mutate( ForStmt *forStmt ) override; 41 virtual Statement *mutate( BranchStmt *branchStmt ) throw ( SemanticError ) override; 42 virtual Statement *mutate( CaseStmt *caseStmt ) override; 43 virtual Statement *mutate( IfStmt *ifStmt ) override; 44 virtual Statement *mutate( SwitchStmt *switchStmt ) override; 38 void premutate( CompoundStmt *cmpndStmt ); 39 Statement * postmutate( BranchStmt *branchStmt ) throw ( SemanticError ); 40 void premutate( WhileStmt *whileStmt ); 41 Statement * postmutate( WhileStmt *whileStmt ); 42 void premutate( ForStmt *forStmt ); 43 Statement * postmutate( ForStmt *forStmt ); 44 void premutate( CaseStmt *caseStmt ); 45 void premutate( IfStmt *ifStmt ); 46 Statement * postmutate( IfStmt *ifStmt ); 47 void premutate( SwitchStmt *switchStmt ); 48 Statement * postmutate( SwitchStmt *switchStmt ); 45 49 46 50 Statement *mutateLoop( Statement *bodyLoop, Entry &e ); … … 54 58 loop( _loop ), breakExit( _breakExit ), contExit( _contExit ), breakUsed(false), contUsed(false) {} 55 59 56 bool operator==( const Statement *stmt ) { return ( loop == stmt ); }57 bool operator!=( const Statement *stmt ) { return ( loop != stmt ); }60 bool operator==( const Statement *stmt ) { return loop == stmt; } 61 bool operator!=( const Statement *stmt ) { return loop != stmt; } 58 62 59 bool operator==( const Entry &other ) { return ( loop == other.get_controlStructure()); }63 bool operator==( const Entry &other ) { return loop == other.get_controlStructure(); } 60 64 61 65 Statement *get_controlStructure() const { return loop; } … … 78 82 79 83 template< typename LoopClass > 80 Statement *handleLoopStmt( LoopClass *loopStmt );84 void prehandleLoopStmt( LoopClass * loopStmt ); 81 85 82 template< typename IfClass > 83 Statement *handleIfStmt( IfClass *switchStmt ); 84 85 template< typename SwitchClass > 86 Statement *handleSwitchStmt( SwitchClass *switchStmt ); 86 template< typename LoopClass > 87 Statement * posthandleLoopStmt( LoopClass * loopStmt ); 87 88 88 89 void fixBlock( std::list< Statement * > &kids ); -
src/GenPoly/Box.cc
r326cd2b r9cdfb4d0 302 302 Expression *makeOp( const std::string &name, Expression *arg ) { 303 303 UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) ); 304 expr-> get_args().push_back( arg );304 expr->args.push_back( arg ); 305 305 return expr; 306 306 } … … 309 309 Expression *makeOp( const std::string &name, Expression *lhs, Expression *rhs ) { 310 310 UntypedExpr *expr = new UntypedExpr( new NameExpr( name ) ); 311 expr-> get_args().push_back( lhs );312 expr-> get_args().push_back( rhs );311 expr->args.push_back( lhs ); 312 expr->args.push_back( rhs ); 313 313 return expr; 314 314 } … … 316 316 /// Returns the dereference of a local pointer variable 317 317 Expression *derefVar( ObjectDecl *var ) { 318 return makeOp( "*?",new VariableExpr( var ) );318 return UntypedExpr::createDeref( new VariableExpr( var ) ); 319 319 } 320 320 … … 831 831 if ( ! isPolyType( arg->get_type() ) ) { 832 832 UntypedExpr *deref = new UntypedExpr( new NameExpr( "*?" ) ); 833 deref->get_args().push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) ); 834 deref->set_result( arg->get_type()->clone() ); 833 deref->args.push_back( new CastExpr( new VariableExpr( param ), new PointerType( Type::Qualifiers(), arg->get_type()->clone() ) ) ); 834 deref->result = arg->get_type()->clone(); 835 deref->result->set_lvalue( true ); 835 836 return deref; 836 837 } // if -
src/GenPoly/Lvalue.cc
r326cd2b r9cdfb4d0 47 47 if ( SymTab::dereferenceOperator ) { 48 48 VariableExpr * deref = new VariableExpr( SymTab::dereferenceOperator ); 49 deref-> set_result( new PointerType( Type::Qualifiers(), deref->get_result() ));50 Type * base = InitTweak::getPointerBase( arg-> get_result());51 assertf( base, "expected pointer type in dereference (type was %s)", toString( arg-> get_result()).c_str() );49 deref->result = new PointerType( Type::Qualifiers(), deref->result ); 50 Type * base = InitTweak::getPointerBase( arg->result ); 51 assertf( base, "expected pointer type in dereference (type was %s)", toString( arg->result ).c_str() ); 52 52 ApplicationExpr * ret = new ApplicationExpr( deref, { arg } ); 53 delete ret-> get_result();54 ret-> set_result( base->clone());55 ret-> get_result()->set_lvalue( true );53 delete ret->result; 54 ret->result = base->clone(); 55 ret->result->set_lvalue( true ); 56 56 return ret; 57 57 } else { … … 308 308 int diff = depth1-depth2; 309 309 if ( diff == 0 ) { 310 // conversion between references of the same depth 310 311 assertf( depth1 == depth2, "non-intrinsic reference with cast of reference to reference not yet supported: %d %d %s", depth1, depth2, toString( castExpr ).c_str() ); 311 312 PRINT( std::cerr << castExpr << std::endl; ) 312 313 return castExpr; 313 314 } else if ( diff < 0 ) { 314 Expression * ret = castExpr->get_arg(); 315 // conversion from reference to reference with less depth (e.g. int && -> int &): add dereferences 316 Expression * ret = castExpr->arg; 315 317 for ( int i = 0; i < diff; ++i ) { 316 318 ret = mkDeref( ret ); 317 319 } 318 ret->set_env( castExpr->get_env() ); 319 delete ret->get_result(); 320 ret->set_result( castExpr->get_result() ); 321 castExpr->set_env( nullptr ); 322 castExpr->set_arg( nullptr ); 323 castExpr->set_result( nullptr ); 320 ret->env = castExpr->env; 321 delete ret->result; 322 ret->result = castExpr->result; 323 ret->result->set_lvalue( true ); // ensure result is lvalue 324 castExpr->env = nullptr; 325 castExpr->arg = nullptr; 326 castExpr->result = nullptr; 324 327 delete castExpr; 325 328 return ret; 326 329 } else if ( diff > 0 ) { 327 Expression * ret = castExpr->get_arg(); 330 // conversion from reference to reference with more depth (e.g. int & -> int &&): add address-of 331 Expression * ret = castExpr->arg; 328 332 for ( int i = 0; i < diff; ++i ) { 329 333 ret = new AddressExpr( ret ); 330 334 } 331 ret-> set_env( castExpr->get_env() );332 delete ret-> get_result();333 ret-> set_result( castExpr->get_result() );334 castExpr-> set_env( nullptr );335 castExpr-> set_arg( nullptr );336 castExpr-> set_result( nullptr );335 ret->env = castExpr->env; 336 delete ret->result; 337 ret->result = castExpr->result; 338 castExpr->env = nullptr; 339 castExpr->arg = nullptr; 340 castExpr->result = nullptr; 337 341 delete castExpr; 338 342 return ret; … … 342 346 PRINT( std::cerr << castExpr << std::endl; ) 343 347 return castExpr; 344 } else if ( castExpr-> get_arg()->get_result()->get_lvalue() ) {348 } else if ( castExpr->arg->result->get_lvalue() ) { 345 349 // conversion from lvalue to reference 346 350 // xxx - keep cast, but turn into pointer cast?? … … 348 352 PRINT( 349 353 std::cerr << "convert lvalue to reference -- &" << std::endl; 350 std::cerr << castExpr-> get_arg()<< std::endl;354 std::cerr << castExpr->arg << std::endl; 351 355 ) 352 AddressExpr * ret = new AddressExpr( castExpr-> get_arg());353 if ( refType-> get_base()->get_qualifiers() != castExpr->get_arg()->get_result()->get_qualifiers() ) {356 AddressExpr * ret = new AddressExpr( castExpr->arg ); 357 if ( refType->base->get_qualifiers() != castExpr->arg->result->get_qualifiers() ) { 354 358 // must keep cast if cast-to type is different from the actual type 355 castExpr-> set_arg( ret );359 castExpr->arg = ret; 356 360 return castExpr; 357 361 } 358 ret-> set_env( castExpr->get_env() );359 delete ret-> get_result();360 ret-> set_result( castExpr->get_result() );361 castExpr-> set_env( nullptr );362 castExpr-> set_arg( nullptr );363 castExpr-> set_result( nullptr );362 ret->env = castExpr->env; 363 delete ret->result; 364 ret->result = castExpr->result; 365 castExpr->env = nullptr; 366 castExpr->arg = nullptr; 367 castExpr->result = nullptr; 364 368 delete castExpr; 365 369 return ret; … … 368 372 } 369 373 assertf( false, "Only conversions to reference from lvalue are currently supported: %s", toString( castExpr ).c_str() ); 370 } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * >( castExpr-> get_arg()->get_result()) ) {374 } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * >( castExpr->arg->result ) ) { 371 375 (void)refType; 372 376 // conversion from reference to rvalue … … 375 379 std::cerr << "was = " << castExpr << std::endl; 376 380 ) 377 Expression * ret = castExpr-> get_arg();378 TypeSubstitution * env = castExpr-> get_env();381 Expression * ret = castExpr->arg; 382 TypeSubstitution * env = castExpr->env; 379 383 castExpr->set_env( nullptr ); 380 384 if ( ! isIntrinsicReference( ret ) ) { … … 382 386 ret = mkDeref( ret ); 383 387 } 384 if ( ResolvExpr::typesCompatibleIgnoreQualifiers( castExpr-> get_result(), castExpr->get_arg()->get_result()->stripReferences(), SymTab::Indexer() ) ) {388 if ( ResolvExpr::typesCompatibleIgnoreQualifiers( castExpr->result, castExpr->arg->result->stripReferences(), SymTab::Indexer() ) ) { 385 389 // can remove cast if types are compatible, changing expression type to value type 386 ret->set_result( castExpr->get_result()->clone() ); 387 castExpr->set_arg( nullptr ); 390 ret->result = castExpr->result->clone(); 391 ret->result->set_lvalue( true ); // ensure result is lvalue 392 castExpr->arg = nullptr; 388 393 delete castExpr; 389 394 } else { 390 395 // must keep cast if types are different 391 castExpr-> set_arg( ret );396 castExpr->arg = ret; 392 397 ret = castExpr; 393 398 } -
src/GenPoly/ScrubTyVars.cc
r326cd2b r9cdfb4d0 25 25 26 26 namespace GenPoly { 27 Type * ScrubTyVars:: mutate( TypeInstType *typeInst ) {27 Type * ScrubTyVars::postmutate( TypeInstType * typeInst ) { 28 28 if ( ! tyVars ) { 29 29 if ( typeInst->get_isFtype() ) { … … 31 31 return new PointerType( Type::Qualifiers(), new FunctionType( Type::Qualifiers(), true ) ); 32 32 } else { 33 PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );33 PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) ); 34 34 delete typeInst; 35 35 return ret; … … 37 37 } 38 38 39 TyVarMap::const_iterator tyVar = tyVars->find( typeInst-> get_name());39 TyVarMap::const_iterator tyVar = tyVars->find( typeInst->name ); 40 40 if ( tyVar != tyVars->end() ) { 41 41 switch ( tyVar->second.kind ) { … … 43 43 case TypeDecl::Ttype: 44 44 { 45 PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) );45 PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( typeInst->get_qualifiers() ) ); 46 46 delete typeInst; 47 47 return ret; … … 55 55 } 56 56 57 Type * ScrubTyVars::mutateAggregateType( Type * ty ) {57 Type * ScrubTyVars::mutateAggregateType( Type * ty ) { 58 58 if ( shouldScrub( ty ) ) { 59 PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( ty->get_qualifiers() ) );59 PointerType * ret = new PointerType( Type::Qualifiers(), new VoidType( ty->get_qualifiers() ) ); 60 60 delete ty; 61 61 return ret; … … 64 64 } 65 65 66 Type * ScrubTyVars:: mutate( StructInstType *structInst ) {66 Type * ScrubTyVars::postmutate( StructInstType * structInst ) { 67 67 return mutateAggregateType( structInst ); 68 68 } 69 69 70 Type * ScrubTyVars:: mutate( UnionInstType *unionInst ) {70 Type * ScrubTyVars::postmutate( UnionInstType * unionInst ) { 71 71 return mutateAggregateType( unionInst ); 72 72 } 73 73 74 Expression * ScrubTyVars::mutate( SizeofExpr *szeof ) { 74 void ScrubTyVars::primeBaseScrub( Type * type ) { 75 // need to determine whether type needs to be scrubbed to determine whether 76 // automatic recursion is necessary 77 if ( Type * t = shouldScrub( type ) ) { 78 visit_children = false; 79 GuardValue( dynType ); 80 dynType = t; 81 } 82 } 83 84 Expression * ScrubTyVars::postmutate( SizeofExpr * szeof ) { 75 85 // sizeof( T ) => _sizeof_T parameter, which is the size of T 76 if ( Type *dynType = shouldScrub( szeof->get_type() )) {86 if ( dynType ) { 77 87 Expression *expr = new NameExpr( sizeofName( mangleType( dynType ) ) ); 78 88 return expr; 79 } else {80 return Mutator::mutate( szeof );81 89 } // if 90 return szeof; 82 91 } 83 92 84 Expression * ScrubTyVars:: mutate( AlignofExpr *algnof ) {93 Expression * ScrubTyVars::postmutate( AlignofExpr * algnof ) { 85 94 // alignof( T ) => _alignof_T parameter, which is the alignment of T 86 if ( Type *dynType = shouldScrub( algnof->get_type() )) {95 if ( dynType ) { 87 96 Expression *expr = new NameExpr( alignofName( mangleType( dynType ) ) ); 88 97 return expr; 89 } else {90 return Mutator::mutate( algnof );91 98 } // if 99 return algnof; 92 100 } 93 101 94 Type * ScrubTyVars::mutate( PointerType *pointer ) { 95 // // special case of shouldScrub that takes all TypeInstType pointer bases, even if they're not dynamic 96 // Type *base = pointer->get_base(); 97 // Type *dynType = 0; 98 // if ( dynamicOnly ) { 99 // if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( base ) ) { 100 // if ( tyVars.find( typeInst->get_name() ) != tyVars.end() ) { dynType = typeInst; } 101 // } else { 102 // dynType = isDynType( base, tyVars ); 103 // } 104 // } else { 105 // dynType = isPolyType( base, tyVars ); 106 // } 107 // if ( dynType ) { 108 if ( Type *dynType = shouldScrub( pointer->get_base() ) ) { 109 Type *ret = dynType->acceptMutator( *this ); 102 Type * ScrubTyVars::postmutate( PointerType * pointer ) { 103 if ( dynType ) { 104 Type * ret = dynType->acceptMutator( *visitor ); 110 105 ret->get_qualifiers() |= pointer->get_qualifiers(); 111 pointer-> set_base( 0 );106 pointer->base = nullptr; 112 107 delete pointer; 113 108 return ret; 114 109 } 115 return Mutator::mutate( pointer );110 return pointer; 116 111 } 117 112 } // namespace GenPoly -
src/GenPoly/ScrubTyVars.h
r326cd2b r9cdfb4d0 18 18 #include <cassert> // for assert 19 19 20 #include "Common/PassVisitor.h" 20 21 #include "GenPoly.h" // for TyVarMap, isPolyType, isDynType 21 22 #include "SynTree/Mutator.h" // for Mutator … … 27 28 28 29 namespace GenPoly { 29 class ScrubTyVars : public Mutator{30 struct ScrubTyVars : public WithVisitorRef<ScrubTyVars>, public WithShortCircuiting, public WithGuards { 30 31 /// Whether to scrub all type variables from the provided map, dynamic type variables from the provided map, or all type variables 31 32 enum ScrubMode { FromMap, DynamicFromMap, All }; … … 51 52 static SynTreeClass *scrubAll( SynTreeClass *target ); 52 53 53 virtual Type* mutate( TypeInstType *typeInst ); 54 virtual Type* mutate( StructInstType *structInst ); 55 virtual Type* mutate( UnionInstType *unionInst ); 56 virtual Expression* mutate( SizeofExpr *szeof ); 57 virtual Expression* mutate( AlignofExpr *algnof ); 58 virtual Type* mutate( PointerType *pointer ); 54 /// determine if children should be visited based on whether base type should be scrubbed. 55 void primeBaseScrub( Type * ); 56 57 void premutate( TypeInstType * ) { visit_children = false; } 58 void premutate( StructInstType * ) { visit_children = false; } 59 void premutate( UnionInstType * ) { visit_children = false; } 60 void premutate( SizeofExpr * szeof ) { primeBaseScrub( szeof->type ); } 61 void premutate( AlignofExpr * algnof ) { primeBaseScrub( algnof->type ); } 62 void premutate( PointerType * pointer ) { primeBaseScrub( pointer->base ); } 63 64 Type * postmutate( TypeInstType * typeInst ); 65 Type * postmutate( StructInstType * structInst ); 66 Type * postmutate( UnionInstType * unionInst ); 67 Expression * postmutate( SizeofExpr * szeof ); 68 Expression * postmutate( AlignofExpr * algnof ); 69 Type * postmutate( PointerType * pointer ); 59 70 60 71 private: … … 75 86 const TyVarMap *tyVars; ///< Type variables to scrub 76 87 ScrubMode mode; ///< which type variables to scrub? [FromMap] 88 89 Type * dynType = nullptr; ///< result of shouldScrub 77 90 }; 78 91 79 92 template< typename SynTreeClass > 80 93 SynTreeClass * ScrubTyVars::scrub( SynTreeClass *target, const TyVarMap &tyVars ) { 81 ScrubTyVarsscrubber( tyVars );94 PassVisitor<ScrubTyVars> scrubber( tyVars ); 82 95 return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) ); 83 96 } … … 85 98 template< typename SynTreeClass > 86 99 SynTreeClass * ScrubTyVars::scrubDynamic( SynTreeClass *target, const TyVarMap &tyVars ) { 87 ScrubTyVarsscrubber( tyVars, ScrubTyVars::DynamicFromMap );100 PassVisitor<ScrubTyVars> scrubber( tyVars, ScrubTyVars::DynamicFromMap ); 88 101 return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) ); 89 102 } … … 91 104 template< typename SynTreeClass > 92 105 SynTreeClass * ScrubTyVars::scrubAll( SynTreeClass *target ) { 93 ScrubTyVarsscrubber;106 PassVisitor<ScrubTyVars> scrubber; 94 107 return static_cast< SynTreeClass * >( target->acceptMutator( scrubber ) ); 95 108 } -
src/InitTweak/FixInit.cc
r326cd2b r9cdfb4d0 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // FixInit. h--7 // FixInit.cc -- 8 8 // 9 9 // Author : Rob Schluntz … … 197 197 }; 198 198 199 struct GenStructMemberCalls final : public WithGuards, public WithShortCircuiting, public WithIndexer {199 struct GenStructMemberCalls final : public WithGuards, public WithShortCircuiting, public WithIndexer, public WithVisitorRef<GenStructMemberCalls> { 200 200 /// generate default/copy ctor and dtor calls for user-defined struct ctor/dtors 201 201 /// for any member that is missing a corresponding ctor/dtor call. … … 203 203 static void generate( std::list< Declaration * > & translationUnit ); 204 204 205 void previsit( FunctionDecl * funcDecl ); 206 void postvisit( FunctionDecl * funcDecl ); 207 208 void previsit( MemberExpr * memberExpr ); 209 void previsit( ApplicationExpr * appExpr ); 205 void premutate( FunctionDecl * funcDecl ); 206 DeclarationWithType * postmutate( FunctionDecl * funcDecl ); 207 208 void premutate( MemberExpr * memberExpr ); 209 void premutate( ApplicationExpr * appExpr ); 210 211 /// Note: this post mutate used to be in a separate visitor. If this pass breaks, one place to examine is whether it is 212 /// okay for this part of the recursion to occur alongside the rest. 213 Expression * postmutate( UntypedExpr * expr ); 210 214 211 215 SemanticError errors; … … 220 224 bool isCtor = false; // true if current function is a constructor 221 225 StructDecl * structDecl = nullptr; 222 };223 224 // very simple resolver-like mutator class - used to225 // resolve UntypedExprs that are found within newly226 // generated constructor/destructor calls227 class MutatingResolver final : public Mutator {228 public:229 MutatingResolver( SymTab::Indexer & indexer ) : indexer( indexer ) {}230 231 using Mutator::mutate;232 virtual DeclarationWithType* mutate( ObjectDecl *objectDecl ) override;233 virtual Expression* mutate( UntypedExpr *untypedExpr ) override;234 235 private:236 SymTab::Indexer & indexer;237 226 }; 238 227 … … 315 304 void GenStructMemberCalls::generate( std::list< Declaration * > & translationUnit ) { 316 305 PassVisitor<GenStructMemberCalls> warner; 317 acceptAll( translationUnit, warner );306 mutateAll( translationUnit, warner ); 318 307 } 319 308 … … 365 354 // arrays are not copy constructed, so this should always be an ExprStmt 366 355 ImplicitCtorDtorStmt * stmt = genCtorDtor( fname, var, cpArg ); 367 ExprStmt * exprStmt = strict_dynamic_cast< ExprStmt * >( stmt->get_callStmt() ); 356 assertf( stmt, "ResolveCopyCtors: genCtorDtor returned nullptr: %s / %s / %s", fname.c_str(), toString( var ).c_str(), toString( cpArg ).c_str() ); 357 ExprStmt * exprStmt = strict_dynamic_cast< ExprStmt * >( stmt->callStmt ); 368 358 Expression * resolved = exprStmt->expr; 369 359 exprStmt->expr = nullptr; // take ownership of expr … … 382 372 } // if 383 373 delete stmt; 374 if ( TupleAssignExpr * assign = dynamic_cast< TupleAssignExpr * >( resolved ) ) { 375 // fix newly generated StmtExpr 376 postvisit( assign->stmtExpr ); 377 } 384 378 return resolved; 385 379 } … … 475 469 static UniqueName retNamer("_tmp_stmtexpr_ret"); 476 470 477 // create variable that will hold the result of the stmt expr478 471 result = result->clone(); 479 472 env->apply( result ); 473 if ( ! InitTweak::isConstructable( result ) ) { 474 delete result; 475 return; 476 } 477 478 // create variable that will hold the result of the stmt expr 480 479 ObjectDecl * ret = ObjectDecl::newObject( retNamer.newName(), result, nullptr ); 481 ret-> get_type()->set_const( false );482 stmtExpr-> get_returnDecls().push_front( ret );480 ret->type->set_const( false ); 481 stmtExpr->returnDecls.push_front( ret ); 483 482 484 483 // must have a non-empty body, otherwise it wouldn't have a result 485 CompoundStmt * body = stmtExpr-> get_statements();484 CompoundStmt * body = stmtExpr->statements; 486 485 assert( ! body->get_kids().empty() ); 487 486 // must be an ExprStmt, otherwise it wouldn't have a result 488 487 ExprStmt * last = strict_dynamic_cast< ExprStmt * >( body->get_kids().back() ); 489 last-> set_expr( makeCtorDtor( "?{}", ret, last->get_expr()) );490 491 stmtExpr-> get_dtors().push_front( makeCtorDtor( "^?{}", ret ) );488 last->expr = makeCtorDtor( "?{}", ret, last->get_expr() ); 489 490 stmtExpr->dtors.push_front( makeCtorDtor( "^?{}", ret ) ); 492 491 } // if 493 492 } … … 590 589 // to the outer context, rather than inside of the statement expression. 591 590 visit_children = false; 592 std::list< Statement * > & stmts = stmtExpr-> get_statements()->get_kids();591 std::list< Statement * > & stmts = stmtExpr->statements->get_kids(); 593 592 for ( Statement *& stmt : stmts ) { 594 593 stmt = stmt->acceptMutator( *visitor ); 595 594 } // for 596 assert( stmtExpr-> get_result());597 Type * result = stmtExpr-> get_result();595 assert( stmtExpr->result ); 596 Type * result = stmtExpr->result; 598 597 if ( ! result->isVoid() ) { 599 for ( ObjectDecl * obj : stmtExpr-> get_returnDecls()) {598 for ( ObjectDecl * obj : stmtExpr->returnDecls ) { 600 599 stmtsToAddBefore.push_back( new DeclStmt( obj ) ); 601 600 } // for 602 601 // add destructors after current statement 603 for ( Expression * dtor : stmtExpr-> get_dtors()) {602 for ( Expression * dtor : stmtExpr->dtors ) { 604 603 stmtsToAddAfter.push_back( new ExprStmt( dtor ) ); 605 604 } // for 606 605 // must have a non-empty body, otherwise it wouldn't have a result 607 606 assert( ! stmts.empty() ); 608 assert( ! stmtExpr->get_returnDecls().empty() ); 609 stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->get_returnDecls().front() ) ) ); 610 stmtExpr->get_returnDecls().clear(); 611 stmtExpr->get_dtors().clear(); 612 } 613 assert( stmtExpr->get_returnDecls().empty() ); 614 assert( stmtExpr->get_dtors().empty() ); 607 assertf( ! stmtExpr->returnDecls.empty() || stmtExpr->dtors.empty(), "StmtExpr returns non-void, but no return decls: %s", toString( stmtExpr ).c_str() ); 608 // if there is a return decl, add a use as the last statement; will not have return decl on non-constructable returns 609 if ( ! stmtExpr->returnDecls.empty() ) { 610 stmts.push_back( new ExprStmt( new VariableExpr( stmtExpr->returnDecls.front() ) ) ); 611 } 612 stmtExpr->returnDecls.clear(); 613 stmtExpr->dtors.clear(); 614 } 615 assert( stmtExpr->returnDecls.empty() ); 616 assert( stmtExpr->dtors.empty() ); 615 617 } 616 618 … … 937 939 } 938 940 939 void GenStructMemberCalls::pre visit( FunctionDecl * funcDecl ) {941 void GenStructMemberCalls::premutate( FunctionDecl * funcDecl ) { 940 942 GuardValue( function ); 941 943 GuardValue( unhandled ); … … 971 973 } 972 974 973 void GenStructMemberCalls::postvisit( FunctionDecl * funcDecl ) {975 DeclarationWithType * GenStructMemberCalls::postmutate( FunctionDecl * funcDecl ) { 974 976 // remove the unhandled objects from usedUninit, because a call is inserted 975 977 // to handle them - only objects that are later constructed are used uninitialized. … … 1025 1027 Statement * callStmt = stmt.front(); 1026 1028 1027 MutatingResolver resolver( indexer );1028 1029 try { 1029 callStmt->acceptMutator( resolver );1030 callStmt->acceptMutator( *visitor ); 1030 1031 if ( isCtor ) { 1031 1032 function->get_statements()->push_front( callStmt ); … … 1043 1044 throw errors; 1044 1045 } 1046 return funcDecl; 1045 1047 } 1046 1048 … … 1068 1070 } 1069 1071 1070 void GenStructMemberCalls::pre visit( ApplicationExpr * appExpr ) {1072 void GenStructMemberCalls::premutate( ApplicationExpr * appExpr ) { 1071 1073 if ( ! checkWarnings( function ) ) { 1072 1074 visit_children = false; … … 1093 1095 } 1094 1096 1095 void GenStructMemberCalls::pre visit( MemberExpr * memberExpr ) {1097 void GenStructMemberCalls::premutate( MemberExpr * memberExpr ) { 1096 1098 if ( ! checkWarnings( function ) || ! isCtor ) { 1097 1099 visit_children = false; … … 1121 1123 } 1122 1124 1123 DeclarationWithType * MutatingResolver::mutate( ObjectDecl * objectDecl ) { 1124 // add object to the indexer assumes that there will be no name collisions 1125 // in generated code. If this changes, add mutate methods for entities with 1126 // scope and call {enter,leave}Scope explicitly. 1127 indexer.addId( objectDecl ); 1128 return objectDecl; 1129 } 1130 1131 Expression * MutatingResolver::mutate( UntypedExpr * untypedExpr ) { 1125 Expression * GenStructMemberCalls::postmutate( UntypedExpr * untypedExpr ) { 1132 1126 Expression * newExpr = untypedExpr; 1133 1127 ResolvExpr::findVoidExpression( newExpr, indexer ); -
src/InitTweak/GenInit.cc
r326cd2b r9cdfb4d0 30 30 #include "InitTweak.h" // for isConstExpr, InitExpander, checkIn... 31 31 #include "Parser/LinkageSpec.h" // for isOverridable, C 32 #include "ResolvExpr/Resolver.h" 32 33 #include "SymTab/Autogen.h" // for genImplicitCall, SizeType 33 34 #include "SymTab/Mangler.h" // for Mangler … … 40 41 #include "SynTree/Type.h" // for Type, ArrayType, Type::Qualifiers 41 42 #include "SynTree/Visitor.h" // for acceptAll, maybeAccept 43 #include "Tuples/Tuples.h" // for maybeImpure 42 44 43 45 namespace InitTweak { … … 89 91 }; 90 92 91 struct HoistArrayDimension final : public WithDeclsToAdd, public WithShortCircuiting, public WithGuards {93 struct HoistArrayDimension final : public WithDeclsToAdd, public WithShortCircuiting, public WithGuards, public WithIndexer { 92 94 /// hoist dimension from array types in object declaration so that it uses a single 93 95 /// const variable of type size_t, so that side effecting array dimensions are only … … 104 106 void premutate( FunctionType * ) { visit_children = false; } 105 107 108 // need this so that enumerators are added to the indexer, due to premutate(AggregateDecl *) 109 void premutate( EnumDecl * ) {} 110 106 111 void hoist( Type * type ); 107 112 … … 135 140 if ( varExpr->var == retVal ) return; 136 141 } 137 stmtsToAddBefore.push_back( genCtorDtor( "?{}", retVal, returnStmt->get_expr() ) ); 142 Statement * stmt = genCtorDtor( "?{}", retVal, returnStmt->expr ); 143 assertf( stmt, "ReturnFixer: genCtorDtor returned nullptr: %s / %s", toString( retVal ).c_str(), toString( returnStmt->expr ).c_str() ); 144 stmtsToAddBefore.push_back( stmt ); 138 145 139 146 // return the retVal object … … 178 185 if ( ! arrayType->get_dimension() ) return; // xxx - recursive call to hoist? 179 186 180 // don't need to hoist dimension if it's a constexpr - only need to if there's potential for side effects. 181 if ( isConstExpr( arrayType->get_dimension() ) ) return; 187 // need to resolve array dimensions in order to accurately determine if constexpr 188 ResolvExpr::findSingleExpression( arrayType->dimension, SymTab::SizeType->clone(), indexer ); 189 // array is variable-length when the dimension is not constexpr 190 arrayType->isVarLen = ! isConstExpr( arrayType->dimension ); 191 // don't need to hoist dimension if it's definitely pure - only need to if there's potential for side effects. 192 if ( ! Tuples::maybeImpure( arrayType->dimension ) ) return; 182 193 183 194 ObjectDecl * arrayDimension = new ObjectDecl( dimensionName.newName(), storageClasses, LinkageSpec::C, 0, SymTab::SizeType->clone(), new SingleInit( arrayType->get_dimension() ) ); … … 194 205 void HoistArrayDimension::premutate( FunctionDecl * ) { 195 206 GuardValue( inFunction ); 207 inFunction = true; 196 208 } 197 209 … … 220 232 Type * type = objDecl->get_type(); 221 233 while ( ArrayType * at = dynamic_cast< ArrayType * >( type ) ) { 234 // must always construct VLAs with an initializer, since this is an error in C 235 if ( at->isVarLen && objDecl->init ) return true; 222 236 type = at->get_base(); 223 237 } -
src/InitTweak/InitTweak.cc
r326cd2b r9cdfb4d0 564 564 void previsit( ConstantExpr * ) {} 565 565 566 void previsit( VariableExpr * varExpr ) { 567 visit_children = false; 568 569 if ( EnumInstType * inst = dynamic_cast< EnumInstType * >( varExpr->result ) ) { 570 long long int value; 571 if ( inst->baseEnum->valueOf( varExpr->var, value ) ) { 572 // enumerators are const expr 573 return; 574 } 575 } 576 isConstExpr = false; 577 } 578 566 579 bool isConstExpr = true; 567 580 }; -
src/Parser/DeclarationNode.cc
r326cd2b r9cdfb4d0 343 343 DeclarationNode * newnode = new DeclarationNode; 344 344 newnode->type = new TypeData( kind == OperKinds::PointTo ? TypeData::Pointer : TypeData::Reference ); 345 if ( kind == OperKinds::And ) { 346 // T && is parsed as 'And' operator rather than two references => add a second reference type 347 TypeData * td = new TypeData( TypeData::Reference ); 348 td->base = newnode->type; 349 newnode->type = td; 350 } 345 351 if ( qualifiers ) { 346 352 return newnode->addQualifiers( qualifiers ); -
src/Parser/parser.yy
r326cd2b r9cdfb4d0 10 10 // Created On : Sat Sep 1 20:22:55 2001 11 11 // Last Modified By : Peter A. Buhr 12 // Last Modified On : Mon Nov 27 17:23:35201713 // Update Count : 299 212 // Last Modified On : Thu Dec 21 11:32:56 2017 13 // Update Count : 2996 14 14 // 15 15 … … 317 317 %type<decl> cfa_typedef_declaration cfa_variable_declaration cfa_variable_specifier 318 318 319 %type<decl> c_declaration 319 %type<decl> c_declaration static_assert 320 320 %type<decl> KR_function_declarator KR_function_no_ptr KR_function_ptr KR_function_array 321 321 %type<decl> KR_declaration_list KR_declaration_list_opt … … 835 835 | exception_statement 836 836 | asm_statement 837 ; 837 838 838 839 labeled_statement: … … 1282 1283 c_declaration pop ';' 1283 1284 | cfa_declaration pop ';' // CFA 1284 | STATICASSERT '(' constant_expression ',' string_literal ')' ';' // C11 1285 | static_assert 1286 ; 1287 1288 static_assert: 1289 STATICASSERT '(' constant_expression ',' string_literal ')' ';' // C11 1285 1290 { throw SemanticError("Static assert is currently unimplemented."); $$ = nullptr; } // FIX ME 1286 ;1287 1291 1288 1292 // C declaration syntax is notoriously confusing and error prone. Cforall provides its own type, variable and function … … 1890 1894 $$ = distAttr( $2, $3 ); 1891 1895 } 1896 | static_assert 1892 1897 ; 1893 1898 -
src/ResolvExpr/CastCost.cc
r326cd2b r9cdfb4d0 31 31 32 32 namespace ResolvExpr { 33 classCastCost : public ConversionCost {33 struct CastCost : public ConversionCost { 34 34 public: 35 CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env );35 CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc ); 36 36 37 virtual void visit( BasicType *basicType ); 38 virtual void visit( PointerType *pointerType ); 37 using ConversionCost::previsit; 38 using ConversionCost::postvisit; 39 void postvisit( BasicType * basicType ); 40 void postvisit( PointerType * pointerType ); 39 41 }; 40 42 … … 52 54 // all typedefs should be gone by this point 53 55 TypeDecl *type = strict_dynamic_cast< TypeDecl* >( namedType ); 54 if ( type-> get_base()) {55 return castCost( src, type-> get_base(), indexer, env ) + Cost::safe;56 if ( type->base ) { 57 return castCost( src, type->base, indexer, env ) + Cost::safe; 56 58 } // if 57 59 } // if … … 74 76 } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * > ( dest ) ) { 75 77 PRINT( std::cerr << "conversionCost: dest is reference" << std::endl; ) 76 return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const TypeEnvironment & env, const SymTab::Indexer & indexer) {78 return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const SymTab::Indexer & indexer, const TypeEnvironment & env ) { 77 79 return ptrsCastable( t1, t2, env, indexer ); 78 80 }); 79 81 } else { 80 CastCost converter( dest, indexer, env);82 PassVisitor<CastCost> converter( dest, indexer, env, castCost ); 81 83 src->accept( converter ); 82 if ( converter. get_cost() == Cost::infinity ) {84 if ( converter.pass.get_cost() == Cost::infinity ) { 83 85 return Cost::infinity; 84 86 } else { 85 87 // xxx - why are we adding cost 0 here? 86 return converter. get_cost() + Cost::zero;88 return converter.pass.get_cost() + Cost::zero; 87 89 } // if 88 90 } // if 89 91 } 90 92 91 CastCost::CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env )92 : ConversionCost( dest, indexer, env ) {93 CastCost::CastCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc ) 94 : ConversionCost( dest, indexer, env, costFunc ) { 93 95 } 94 96 95 void CastCost:: visit( BasicType *basicType ) {97 void CastCost::postvisit( BasicType *basicType ) { 96 98 PointerType *destAsPointer = dynamic_cast< PointerType* >( dest ); 97 99 if ( destAsPointer && basicType->isInteger() ) { … … 103 105 } 104 106 105 void CastCost:: visit( PointerType *pointerType ) {107 void CastCost::postvisit( PointerType *pointerType ) { 106 108 if ( PointerType *destAsPtr = dynamic_cast< PointerType* >( dest ) ) { 107 if ( pointerType->get_qualifiers() <= destAsPtr->get_qualifiers() && typesCompatibleIgnoreQualifiers( pointerType-> get_base(), destAsPtr->get_base(), indexer, env ) ) {109 if ( pointerType->get_qualifiers() <= destAsPtr->get_qualifiers() && typesCompatibleIgnoreQualifiers( pointerType->base, destAsPtr->base, indexer, env ) ) { 108 110 cost = Cost::safe; 109 111 } else { 110 112 TypeEnvironment newEnv( env ); 111 newEnv.add( pointerType-> get_forall());112 newEnv.add( pointerType-> get_base()->get_forall());113 int castResult = ptrsCastable( pointerType-> get_base(), destAsPtr->get_base(), newEnv, indexer );113 newEnv.add( pointerType->forall ); 114 newEnv.add( pointerType->base->forall ); 115 int castResult = ptrsCastable( pointerType->base, destAsPtr->base, newEnv, indexer ); 114 116 if ( castResult > 0 ) { 115 117 cost = Cost::safe; -
src/ResolvExpr/CommonType.cc
r326cd2b r9cdfb4d0 91 91 // special case where one type has a reference depth of 1 larger than the other 92 92 if ( diff > 0 || diff < 0 ) { 93 // std::cerr << "reference depth diff: " << diff << std::endl; 93 94 Type * result = nullptr; 94 if ( ReferenceType * ref1 = dynamic_cast< ReferenceType * >( type1 ) ) { 95 ReferenceType * ref1 = dynamic_cast< ReferenceType * >( type1 ); 96 ReferenceType * ref2 = dynamic_cast< ReferenceType * >( type2 ); 97 if ( diff > 0 ) { 98 // deeper on the left 99 assert( ref1 ); 100 result = handleReference( ref1->base, type2, widenFirst, widenSecond, indexer, env, openVars ); 101 } else { 102 // deeper on the right 103 assert( ref2 ); 104 result = handleReference( type1, ref2->base, widenFirst, widenSecond, indexer, env, openVars ); 105 } 106 if ( result && ref1 ) { 95 107 // formal is reference, so result should be reference 96 result = handleReference( ref1->base, type2, widenFirst, widenSecond, indexer, env, openVars ); 97 if ( result ) result = new ReferenceType( ref1->get_qualifiers(), result ); 98 } else { 99 // formal is value, so result should be value 100 ReferenceType * ref2 = strict_dynamic_cast< ReferenceType * > ( type2 ); 101 result = handleReference( type1, ref2->base, widenFirst, widenSecond, indexer, env, openVars ); 108 // std::cerr << "formal is reference; result should be reference" << std::endl; 109 result = new ReferenceType( ref1->get_qualifiers(), result ); 102 110 } 103 111 // std::cerr << "common type of reference [" << type1 << "] and [" << type2 << "] is [" << result << "]" << std::endl; … … 180 188 } 181 189 182 void CommonType::visit( __attribute((unused)) VoidType *voidType) {}190 void CommonType::visit( VoidType * ) {} 183 191 184 192 void CommonType::visit( BasicType *basicType ) { … … 246 254 } 247 255 248 void CommonType::visit( __attribute((unused)) ArrayType *arrayType) {}256 void CommonType::visit( ArrayType * ) {} 249 257 250 258 void CommonType::visit( ReferenceType *refType ) { … … 283 291 } 284 292 285 void CommonType::visit( __attribute((unused)) FunctionType *functionType) {}286 void CommonType::visit( __attribute((unused)) StructInstType *aggregateUseType) {}287 void CommonType::visit( __attribute((unused)) UnionInstType *aggregateUseType) {}293 void CommonType::visit( FunctionType * ) {} 294 void CommonType::visit( StructInstType * ) {} 295 void CommonType::visit( UnionInstType * ) {} 288 296 289 297 void CommonType::visit( EnumInstType *enumInstType ) { … … 296 304 } 297 305 298 void CommonType::visit( __attribute((unused)) TraitInstType *aggregateUseType) {306 void CommonType::visit( TraitInstType * ) { 299 307 } 300 308 … … 321 329 } 322 330 323 void CommonType::visit( __attribute((unused)) TupleType *tupleType) {}324 void CommonType::visit( __attribute((unused)) VarArgsType *varArgsType) {}331 void CommonType::visit( TupleType * ) {} 332 void CommonType::visit( VarArgsType * ) {} 325 333 326 334 void CommonType::visit( ZeroType *zeroType ) { -
src/ResolvExpr/ConversionCost.cc
r326cd2b r9cdfb4d0 44 44 EqvClass eqvClass; 45 45 NamedTypeDecl *namedType; 46 PRINT( std::cerr << "type inst " << destAsTypeInst-> get_name(); )47 if ( env.lookup( destAsTypeInst-> get_name(), eqvClass ) ) {46 PRINT( std::cerr << "type inst " << destAsTypeInst->name; ) 47 if ( env.lookup( destAsTypeInst->name, eqvClass ) ) { 48 48 if ( eqvClass.type ) { 49 49 return conversionCost( src, eqvClass.type, indexer, env ); … … 51 51 return Cost::infinity; 52 52 } 53 } else if ( ( namedType = indexer.lookupType( destAsTypeInst-> get_name()) ) ) {53 } else if ( ( namedType = indexer.lookupType( destAsTypeInst->name ) ) ) { 54 54 PRINT( std::cerr << " found" << std::endl; ) 55 55 TypeDecl *type = dynamic_cast< TypeDecl* >( namedType ); 56 56 // all typedefs should be gone by this point 57 57 assert( type ); 58 if ( type-> get_base()) {59 return conversionCost( src, type-> get_base(), indexer, env ) + Cost::safe;58 if ( type->base ) { 59 return conversionCost( src, type->base, indexer, env ) + Cost::safe; 60 60 } // if 61 61 } // if … … 77 77 } else if ( ReferenceType * refType = dynamic_cast< ReferenceType * > ( dest ) ) { 78 78 PRINT( std::cerr << "conversionCost: dest is reference" << std::endl; ) 79 return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const TypeEnvironment & env, const SymTab::Indexer &){79 return convertToReferenceCost( src, refType, indexer, env, [](Type * t1, Type * t2, const SymTab::Indexer &, const TypeEnvironment & env ){ 80 80 return ptrsAssignable( t1, t2, env ); 81 81 }); 82 82 } else { 83 ConversionCost converter( dest, indexer, env);83 PassVisitor<ConversionCost> converter( dest, indexer, env, conversionCost ); 84 84 src->accept( converter ); 85 if ( converter. get_cost() == Cost::infinity ) {85 if ( converter.pass.get_cost() == Cost::infinity ) { 86 86 return Cost::infinity; 87 87 } else { 88 return converter. get_cost() + Cost::zero;88 return converter.pass.get_cost() + Cost::zero; 89 89 } // if 90 90 } // if … … 92 92 93 93 Cost convertToReferenceCost( Type * src, Type * dest, int diff, const SymTab::Indexer & indexer, const TypeEnvironment & env, PtrsFunction func ) { 94 PRINT( std::cerr << "convert to reference cost... diff " << diff << std::endl; )94 PRINT( std::cerr << "convert to reference cost... diff " << diff << " " << src << " / " << dest << std::endl; ) 95 95 if ( diff > 0 ) { 96 96 // TODO: document this 97 Cost cost = convertToReferenceCost( strict_dynamic_cast< ReferenceType * >( src )-> get_base(), dest, diff-1, indexer, env, func );97 Cost cost = convertToReferenceCost( strict_dynamic_cast< ReferenceType * >( src )->base, dest, diff-1, indexer, env, func ); 98 98 cost.incReference(); 99 99 return cost; 100 100 } else if ( diff < -1 ) { 101 101 // TODO: document this 102 Cost cost = convertToReferenceCost( src, strict_dynamic_cast< ReferenceType * >( dest )-> get_base(), diff+1, indexer, env, func );102 Cost cost = convertToReferenceCost( src, strict_dynamic_cast< ReferenceType * >( dest )->base, diff+1, indexer, env, func ); 103 103 cost.incReference(); 104 104 return cost; … … 108 108 if ( srcAsRef && destAsRef ) { // pointer-like conversions between references 109 109 PRINT( std::cerr << "converting between references" << std::endl; ) 110 if ( srcAsRef->get_base()->get_qualifiers() <= destAsRef->get_base()->get_qualifiers() && typesCompatibleIgnoreQualifiers( srcAsRef->get_base(), destAsRef->get_base(), indexer, env ) ) { 111 return Cost::safe; 110 Type::Qualifiers tq1 = srcAsRef->base->get_qualifiers(); 111 Type::Qualifiers tq2 = destAsRef->base->get_qualifiers(); 112 if ( tq1 <= tq2 && typesCompatibleIgnoreQualifiers( srcAsRef->base, destAsRef->base, indexer, env ) ) { 113 PRINT( std::cerr << " :: compatible and good qualifiers" << std::endl; ) 114 if ( tq1 == tq2 ) { 115 // types are the same 116 return Cost::zero; 117 } else { 118 // types are the same, except otherPointer has more qualifiers 119 return Cost::safe; 120 } 112 121 } else { // xxx - this discards reference qualifiers from consideration -- reducing qualifiers is a safe conversion; is this right? 113 int assignResult = func( srcAsRef-> get_base(), destAsRef->get_base(), env, indexer);122 int assignResult = func( srcAsRef->base, destAsRef->base, indexer, env ); 114 123 PRINT( std::cerr << "comparing references: " << assignResult << " " << srcAsRef << " " << destAsRef << std::endl; ) 115 124 if ( assignResult > 0 ) { … … 121 130 } else { 122 131 PRINT( std::cerr << "reference to rvalue conversion" << std::endl; ) 123 ConversionCost converter( dest, indexer, env);132 PassVisitor<ConversionCost> converter( dest, indexer, env, conversionCost ); 124 133 src->accept( converter ); 125 return converter. get_cost();134 return converter.pass.get_cost(); 126 135 } // if 127 136 } else { … … 129 138 assert( diff == -1 && destAsRef ); 130 139 PRINT( std::cerr << "dest is: " << dest << " / src is: " << src << std::endl; ) 131 if ( typesCompatibleIgnoreQualifiers( src, destAsRef-> get_base(), indexer, env ) ) {140 if ( typesCompatibleIgnoreQualifiers( src, destAsRef->base, indexer, env ) ) { 132 141 PRINT( std::cerr << "converting compatible base type" << std::endl; ) 133 142 if ( src->get_lvalue() ) { … … 137 146 ) 138 147 // lvalue-to-reference conversion: cv lvalue T => cv T & 139 if ( src->get_qualifiers() == destAsRef-> get_base()->get_qualifiers() ) {148 if ( src->get_qualifiers() == destAsRef->base->get_qualifiers() ) { 140 149 return Cost::reference; // cost needs to be non-zero to add cast 141 } if ( src->get_qualifiers() < destAsRef-> get_base()->get_qualifiers() ) {150 } if ( src->get_qualifiers() < destAsRef->base->get_qualifiers() ) { 142 151 return Cost::safe; // cost needs to be higher than previous cast to differentiate adding qualifiers vs. keeping same 143 152 } else { 144 153 return Cost::unsafe; 145 154 } // if 146 } else if ( destAsRef-> get_base()->get_const() ) {155 } else if ( destAsRef->base->get_const() ) { 147 156 PRINT( std::cerr << "rvalue to const ref conversion" << std::endl; ) 148 157 // rvalue-to-const-reference conversion: T => const T & … … 164 173 } 165 174 166 ConversionCost::ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env )167 : dest( dest ), indexer( indexer ), cost( Cost::infinity ), env( env ) {175 ConversionCost::ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction costFunc ) 176 : dest( dest ), indexer( indexer ), cost( Cost::infinity ), env( env ), costFunc( costFunc ) { 168 177 } 169 178 … … 248 257 }; 249 258 250 void ConversionCost:: visit( __attribute((unused)) VoidType *voidType) {259 void ConversionCost::postvisit( VoidType * ) { 251 260 cost = Cost::infinity; 252 261 } 253 262 254 void ConversionCost:: visit(BasicType *basicType) {263 void ConversionCost::postvisit(BasicType *basicType) { 255 264 if ( BasicType *destAsBasic = dynamic_cast< BasicType* >( dest ) ) { 256 265 int tableResult = costMatrix[ basicType->get_kind() ][ destAsBasic->get_kind() ]; … … 269 278 } 270 279 271 void ConversionCost:: visit( PointerType * pointerType ) {280 void ConversionCost::postvisit( PointerType * pointerType ) { 272 281 if ( PointerType *destAsPtr = dynamic_cast< PointerType* >( dest ) ) { 273 PRINT( std::cerr << pointerType << " ===> " << destAsPtr; ) 274 Type::Qualifiers tq1 = pointerType->get_base()->get_qualifiers(); 275 Type::Qualifiers tq2 = destAsPtr->get_base()->get_qualifiers(); 276 if ( tq1 <= tq2 && typesCompatibleIgnoreQualifiers( pointerType->get_base(), destAsPtr->get_base(), indexer, env ) ) { 282 PRINT( std::cerr << pointerType << " ===> " << destAsPtr << std::endl; ) 283 Type::Qualifiers tq1 = pointerType->base->get_qualifiers(); 284 Type::Qualifiers tq2 = destAsPtr->base->get_qualifiers(); 285 if ( tq1 <= tq2 && typesCompatibleIgnoreQualifiers( pointerType->base, destAsPtr->base, indexer, env ) ) { 286 PRINT( std::cerr << " :: compatible and good qualifiers" << std::endl; ) 277 287 if ( tq1 == tq2 ) { 278 288 // types are the same … … 280 290 } else { 281 291 // types are the same, except otherPointer has more qualifiers 282 PRINT( std::cerr << " :: compatible and good qualifiers" << std::endl; )283 292 cost = Cost::safe; 284 293 } 285 } else { // xxx - this discards qualifiers from consideration -- reducing qualifiers is a safe conversion; is this right?294 } else { 286 295 int assignResult = ptrsAssignable( pointerType->base, destAsPtr->base, env ); 287 296 PRINT( std::cerr << " :: " << assignResult << std::endl; ) 288 if ( assignResult > 0 && pointerType->get_base()->get_qualifiers() <= destAsPtr->get_qualifiers() ) { 289 cost = Cost::safe; 297 if ( assignResult > 0 && tq1 <= tq2 ) { 298 // xxx - want the case where qualifiers are added to be more expensive than the case where qualifiers are the same. Is 1 safe vs. 2 safe correct? 299 if ( tq1 == tq2 ) { 300 cost = Cost::safe; 301 } else if ( tq1 < tq2 ) { 302 cost = Cost::safe+Cost::safe; 303 } 290 304 } else if ( assignResult < 0 ) { 291 305 cost = Cost::unsafe; … … 298 312 } 299 313 300 void ConversionCost:: visit( ArrayType * ) {}301 302 void ConversionCost:: visit( ReferenceType * refType ) {314 void ConversionCost::postvisit( ArrayType * ) {} 315 316 void ConversionCost::postvisit( ReferenceType * refType ) { 303 317 // Note: dest can never be a reference, since it would have been caught in an earlier check 304 318 assert( ! dynamic_cast< ReferenceType * >( dest ) ); … … 306 320 // recursively compute conversion cost from T1 to T2. 307 321 // cv can be safely dropped because of 'implicit dereference' behavior. 308 refType->base->accept( *this);322 cost = costFunc( refType->base, dest, indexer, env ); 309 323 if ( refType->base->get_qualifiers() == dest->get_qualifiers() ) { 310 324 cost.incReference(); // prefer exact qualifiers … … 317 331 } 318 332 319 void ConversionCost:: visit( FunctionType * ) {}320 321 void ConversionCost:: visit( StructInstType * inst ) {333 void ConversionCost::postvisit( FunctionType * ) {} 334 335 void ConversionCost::postvisit( StructInstType * inst ) { 322 336 if ( StructInstType *destAsInst = dynamic_cast< StructInstType* >( dest ) ) { 323 337 if ( inst->name == destAsInst->name ) { … … 327 341 } 328 342 329 void ConversionCost:: visit( UnionInstType * inst ) {343 void ConversionCost::postvisit( UnionInstType * inst ) { 330 344 if ( UnionInstType *destAsInst = dynamic_cast< UnionInstType* >( dest ) ) { 331 345 if ( inst->name == destAsInst->name ) { … … 335 349 } 336 350 337 void ConversionCost:: visit( EnumInstType * ) {351 void ConversionCost::postvisit( EnumInstType * ) { 338 352 static Type::Qualifiers q; 339 353 static BasicType integer( q, BasicType::SignedInt ); 340 integer.accept( *this); // safe if dest >= int354 cost = costFunc( &integer, dest, indexer, env ); // safe if dest >= int 341 355 if ( cost < Cost::unsafe ) { 342 356 cost.incSafe(); … … 344 358 } 345 359 346 void ConversionCost:: visit( TraitInstType * ) {}347 348 void ConversionCost:: visit( TypeInstType *inst ) {360 void ConversionCost::postvisit( TraitInstType * ) {} 361 362 void ConversionCost::postvisit( TypeInstType *inst ) { 349 363 EqvClass eqvClass; 350 364 NamedTypeDecl *namedType; 351 if ( env.lookup( inst-> get_name(), eqvClass ) ) {352 cost = co nversionCost( eqvClass.type, dest, indexer, env );365 if ( env.lookup( inst->name, eqvClass ) ) { 366 cost = costFunc( eqvClass.type, dest, indexer, env ); 353 367 } else if ( TypeInstType *destAsInst = dynamic_cast< TypeInstType* >( dest ) ) { 354 if ( inst-> get_name() == destAsInst->get_name()) {368 if ( inst->name == destAsInst->name ) { 355 369 cost = Cost::zero; 356 370 } 357 } else if ( ( namedType = indexer.lookupType( inst-> get_name()) ) ) {371 } else if ( ( namedType = indexer.lookupType( inst->name ) ) ) { 358 372 TypeDecl *type = dynamic_cast< TypeDecl* >( namedType ); 359 373 // all typedefs should be gone by this point 360 374 assert( type ); 361 if ( type-> get_base()) {362 cost = co nversionCost( type->get_base(), dest, indexer, env ) + Cost::safe;363 } // if 364 } // if 365 } 366 367 void ConversionCost:: visit( TupleType * tupleType ) {375 if ( type->base ) { 376 cost = costFunc( type->base, dest, indexer, env ) + Cost::safe; 377 } // if 378 } // if 379 } 380 381 void ConversionCost::postvisit( TupleType * tupleType ) { 368 382 Cost c = Cost::zero; 369 383 if ( TupleType * destAsTuple = dynamic_cast< TupleType * >( dest ) ) { 370 std::list< Type * >::const_iterator srcIt = tupleType-> get_types().begin();371 std::list< Type * >::const_iterator destIt = destAsTuple-> get_types().begin();372 while ( srcIt != tupleType-> get_types().end() && destIt != destAsTuple->get_types().end() ) {373 Cost newCost = co nversionCost( *srcIt++, *destIt++, indexer, env );384 std::list< Type * >::const_iterator srcIt = tupleType->types.begin(); 385 std::list< Type * >::const_iterator destIt = destAsTuple->types.begin(); 386 while ( srcIt != tupleType->types.end() && destIt != destAsTuple->types.end() ) { 387 Cost newCost = costFunc( *srcIt++, *destIt++, indexer, env ); 374 388 if ( newCost == Cost::infinity ) { 375 389 return; … … 377 391 c += newCost; 378 392 } // while 379 if ( destIt != destAsTuple-> get_types().end() ) {393 if ( destIt != destAsTuple->types.end() ) { 380 394 cost = Cost::infinity; 381 395 } else { … … 385 399 } 386 400 387 void ConversionCost:: visit( VarArgsType * ) {401 void ConversionCost::postvisit( VarArgsType * ) { 388 402 if ( dynamic_cast< VarArgsType* >( dest ) ) { 389 403 cost = Cost::zero; … … 391 405 } 392 406 393 void ConversionCost:: visit( ZeroType * ) {407 void ConversionCost::postvisit( ZeroType * ) { 394 408 if ( dynamic_cast< ZeroType * >( dest ) ) { 395 409 cost = Cost::zero; … … 408 422 } 409 423 410 void ConversionCost:: visit( OneType * ) {424 void ConversionCost::postvisit( OneType * ) { 411 425 if ( dynamic_cast< OneType * >( dest ) ) { 412 426 cost = Cost::zero; -
src/ResolvExpr/ConversionCost.h
r326cd2b r9cdfb4d0 19 19 20 20 #include "Cost.h" // for Cost 21 22 #include "Common/PassVisitor.h" 21 23 #include "SynTree/Visitor.h" // for Visitor 22 24 #include "SynTree/SynTree.h" // for Visitor Nodes … … 29 31 class TypeEnvironment; 30 32 31 class ConversionCost : public Visitor { 33 typedef std::function<Cost(Type *, Type *, const SymTab::Indexer &, const TypeEnvironment &)> CostFunction; 34 struct ConversionCost : public WithShortCircuiting { 32 35 public: 33 ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env );36 ConversionCost( Type *dest, const SymTab::Indexer &indexer, const TypeEnvironment &env, CostFunction ); 34 37 35 38 Cost get_cost() const { return cost; } 36 39 37 virtual void visit(VoidType *voidType); 38 virtual void visit(BasicType *basicType); 39 virtual void visit(PointerType *pointerType); 40 virtual void visit(ArrayType *arrayType); 41 virtual void visit(ReferenceType *refType); 42 virtual void visit(FunctionType *functionType); 43 virtual void visit(StructInstType *aggregateUseType); 44 virtual void visit(UnionInstType *aggregateUseType); 45 virtual void visit(EnumInstType *aggregateUseType); 46 virtual void visit(TraitInstType *aggregateUseType); 47 virtual void visit(TypeInstType *aggregateUseType); 48 virtual void visit(TupleType *tupleType); 49 virtual void visit(VarArgsType *varArgsType); 50 virtual void visit(ZeroType *zeroType); 51 virtual void visit(OneType *oneType); 40 void previsit( BaseSyntaxNode * ) { visit_children = false; } 41 42 void postvisit( VoidType * voidType ); 43 void postvisit( BasicType * basicType ); 44 void postvisit( PointerType * pointerType ); 45 void postvisit( ArrayType * arrayType ); 46 void postvisit( ReferenceType * refType ); 47 void postvisit( FunctionType * functionType ); 48 void postvisit( StructInstType * aggregateUseType ); 49 void postvisit( UnionInstType * aggregateUseType ); 50 void postvisit( EnumInstType * aggregateUseType ); 51 void postvisit( TraitInstType * aggregateUseType ); 52 void postvisit( TypeInstType * aggregateUseType ); 53 void postvisit( TupleType * tupleType ); 54 void postvisit( VarArgsType * varArgsType ); 55 void postvisit( ZeroType * zeroType ); 56 void postvisit( OneType * oneType ); 52 57 protected: 53 58 Type *dest; … … 55 60 Cost cost; 56 61 const TypeEnvironment &env; 62 CostFunction costFunc; 57 63 }; 58 64 59 typedef std::function<int(Type *, Type *, const TypeEnvironment &, const SymTab::Indexer&)> PtrsFunction;65 typedef std::function<int(Type *, Type *, const SymTab::Indexer &, const TypeEnvironment &)> PtrsFunction; 60 66 Cost convertToReferenceCost( Type * src, ReferenceType * dest, const SymTab::Indexer & indexer, const TypeEnvironment & env, PtrsFunction func ); 61 67 } // namespace ResolvExpr -
src/ResolvExpr/CurrentObject.cc
r326cd2b r9cdfb4d0 38 38 39 39 namespace ResolvExpr { 40 long long int getConstValue( ConstantExpr * constExpr ) {41 if ( BasicType * basicType = dynamic_cast< BasicType * >( constExpr->get_result() ) ) {42 if ( basicType->isInteger() ) {43 return constExpr->get_constant()->get_ival();44 } else {45 assertf( false, "Non-integer constant expression in getConstValue %s", toString( constExpr ).c_str() ); // xxx - might be semantic error46 }47 } else if ( dynamic_cast< OneType * >( constExpr->get_result() ) ) {48 return 1;49 } else if ( dynamic_cast< ZeroType * >( constExpr->get_result() ) ) {50 return 0;51 } else {52 assertf( false, "unhandled type on getConstValue %s", toString( constExpr->get_result() ).c_str() ); // xxx - might be semantic error53 }54 }55 56 40 template< typename AggrInst > 57 41 TypeSubstitution makeGenericSubstitution( AggrInst * inst ) { … … 141 125 base = at->get_base(); 142 126 memberIter = createMemberIterator( base ); 127 if ( at->isVarLen ) throw SemanticError( "VLA initialization does not support @=", at ); 143 128 setSize( at->get_dimension() ); 144 129 } … … 151 136 void setSize( Expression * expr ) { 152 137 if ( ConstantExpr * constExpr = dynamic_cast< ConstantExpr * >( expr ) ) { 153 size = getConstValue( constExpr ); 154 PRINT( std::cerr << "array type with size: " << size << std::endl; ) 138 try { 139 size = constExpr->intValue(); 140 PRINT( std::cerr << "array type with size: " << size << std::endl; ) 141 } catch ( SemanticError & ) { 142 throw SemanticError( "Constant expression of non-integral type in array dimension: ", expr ); 143 } 155 144 } else if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( expr ) ) { 156 145 setSize( castExpr->get_arg() ); // xxx - need to perform the conversion specified by the cast 146 } else if ( VariableExpr * varExpr = dynamic_cast< VariableExpr * >( expr ) ) { 147 if ( EnumInstType * inst = dynamic_cast< EnumInstType * > ( varExpr->result ) ) { 148 long long int value; 149 if ( inst->baseEnum->valueOf( varExpr->var, value ) ) { 150 size = value; 151 } 152 } 157 153 } else { 158 154 assertf( false, "unhandled expression in setSize: %s", toString( expr ).c_str() ); // xxx - if not a constant expression, it's not simple to determine how long the array actually is, which is necessary for initialization to be done correctly -- fix this … … 164 160 // need to permit integer-constant-expressions, including: integer constants, enumeration constants, character constants, sizeof expressions, _Alignof expressions, cast expressions 165 161 if ( ConstantExpr * constExpr = dynamic_cast< ConstantExpr * >( expr ) ) { 166 index = getConstValue( constExpr ); 162 try { 163 index = constExpr->intValue(); 164 } catch( SemanticError & ) { 165 throw SemanticError( "Constant expression of non-integral type in array designator: ", expr ); 166 } 167 167 } else if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( expr ) ) { 168 168 setPosition( castExpr->get_arg() ); 169 169 } else if ( VariableExpr * varExpr = dynamic_cast< VariableExpr * >( expr ) ) { 170 assertf( dynamic_cast<EnumInstType *> ( varExpr->get_result() ), "ArrayIterator given variable that isn't an enum constant : %s", toString( expr ).c_str() ); 171 index = 0; // xxx - get actual value of enum constant 170 EnumInstType * inst = dynamic_cast<EnumInstType *>( varExpr->get_result() ); 171 assertf( inst, "ArrayIterator given variable that isn't an enum constant : %s", toString( expr ).c_str() ); 172 long long int value; 173 if ( inst->baseEnum->valueOf( varExpr->var, value ) ) { 174 index = value; 175 } 172 176 } else if ( dynamic_cast< SizeofExpr * >( expr ) || dynamic_cast< AlignofExpr * >( expr ) ) { 173 177 index = 0; // xxx - get actual sizeof/alignof value? -
src/ResolvExpr/FindOpenVars.cc
r326cd2b r9cdfb4d0 19 19 #include <map> // for map<>::mapped_type 20 20 21 #include "Common/PassVisitor.h" 21 22 #include "SynTree/Declaration.h" // for TypeDecl, DeclarationWithType (ptr ... 22 23 #include "SynTree/Type.h" // for Type, Type::ForallList, ArrayType 23 #include "SynTree/Visitor.h" // for Visitor24 24 25 25 namespace ResolvExpr { 26 class FindOpenVars : public Visitor { 27 public: 26 struct FindOpenVars : public WithGuards { 28 27 FindOpenVars( OpenVarSet &openVars, OpenVarSet &closedVars, AssertionSet &needAssertions, AssertionSet &haveAssertions, bool firstIsOpen ); 29 28 30 private: 31 virtual void visit(PointerType *pointerType); 32 virtual void visit(ArrayType *arrayType); 33 virtual void visit(FunctionType *functionType); 34 virtual void visit(TupleType *tupleType); 29 void previsit( PointerType * pointerType ); 30 void previsit( ArrayType * arrayType ); 31 void previsit( FunctionType * functionType ); 32 void previsit( TupleType * tupleType ); 35 33 36 34 void common_action( Type *type ); … … 42 40 43 41 void findOpenVars( Type *type, OpenVarSet &openVars, OpenVarSet &closedVars, AssertionSet &needAssertions, AssertionSet &haveAssertions, bool firstIsOpen ) { 44 FindOpenVarsfinder( openVars, closedVars, needAssertions, haveAssertions, firstIsOpen );42 PassVisitor<FindOpenVars> finder( openVars, closedVars, needAssertions, haveAssertions, firstIsOpen ); 45 43 type->accept( finder ); 46 44 } … … 70 68 } // for 71 69 } // if 72 /// std::c out<< "type is ";73 /// type->print( std::c out);74 /// std::c out<< std::endl << "need is" << std::endl;75 /// printAssertionSet( needAssertions, std::c out);76 /// std::c out<< std::endl << "have is" << std::endl;77 /// printAssertionSet( haveAssertions, std::c out);70 /// std::cerr << "type is "; 71 /// type->print( std::cerr ); 72 /// std::cerr << std::endl << "need is" << std::endl; 73 /// printAssertionSet( needAssertions, std::cerr ); 74 /// std::cerr << std::endl << "have is" << std::endl; 75 /// printAssertionSet( haveAssertions, std::cerr ); 78 76 } 79 77 80 void FindOpenVars:: visit(PointerType *pointerType) {78 void FindOpenVars::previsit(PointerType *pointerType) { 81 79 common_action( pointerType ); 82 Visitor::visit( pointerType );83 80 } 84 81 85 void FindOpenVars:: visit(ArrayType *arrayType) {82 void FindOpenVars::previsit(ArrayType *arrayType) { 86 83 common_action( arrayType ); 87 Visitor::visit( arrayType );88 84 } 89 85 90 void FindOpenVars:: visit(FunctionType *functionType) {86 void FindOpenVars::previsit(FunctionType *functionType) { 91 87 common_action( functionType ); 92 88 nextIsOpen = ! nextIsOpen; 93 Visitor::visit( functionType ); 94 nextIsOpen = ! nextIsOpen; 89 GuardAction( [this](){ nextIsOpen = ! nextIsOpen; } ); 95 90 } 96 91 97 void FindOpenVars:: visit(TupleType *tupleType) {92 void FindOpenVars::previsit(TupleType *tupleType) { 98 93 common_action( tupleType ); 99 Visitor::visit( tupleType );100 94 } 101 95 } // namespace ResolvExpr -
src/ResolvExpr/Occurs.cc
r326cd2b r9cdfb4d0 5 5 // file "LICENCE" distributed with Cforall. 6 6 // 7 // Occurs.cc -- 7 // Occurs.cc -- 8 8 // 9 9 // Author : Richard C. Bilson … … 17 17 #include <string> // for string 18 18 19 #include "Common/PassVisitor.h" 19 20 #include "SynTree/Type.h" // for TypeInstType, Type 20 #include "SynTree/Visitor.h" // for Visitor21 21 #include "TypeEnvironment.h" // for EqvClass, TypeEnvironment 22 22 23 23 namespace ResolvExpr { 24 class Occurs : public Visitor { 25 public: 24 struct Occurs : public WithVisitorRef<Occurs> { 26 25 Occurs( std::string varName, const TypeEnvironment &env ); 27 bool get_result() const { return result; } 28 virtual void visit( TypeInstType *typeInst ); 29 private: 26 void previsit( TypeInstType * typeInst ); 27 30 28 bool result; 31 29 std::set< std::string > eqvVars; 32 const TypeEnvironment & env;30 const TypeEnvironment &tenv; 33 31 }; 34 32 35 33 bool occurs( Type *type, std::string varName, const TypeEnvironment &env ) { 36 Occursoccur( varName, env );34 PassVisitor<Occurs> occur( varName, env ); 37 35 type->accept( occur ); 38 return occur. get_result();36 return occur.pass.result; 39 37 } 40 38 41 Occurs::Occurs( std::string varName, const TypeEnvironment & env ) : result( false ),env( env ) {39 Occurs::Occurs( std::string varName, const TypeEnvironment & env ) : result( false ), tenv( env ) { 42 40 EqvClass eqvClass; 43 if ( env.lookup( varName, eqvClass ) ) {41 if ( tenv.lookup( varName, eqvClass ) ) { 44 42 eqvVars = eqvClass.vars; 45 43 } else { … … 48 46 } 49 47 50 void Occurs:: visit( TypeInstType *typeInst ) {48 void Occurs::previsit( TypeInstType * typeInst ) { 51 49 EqvClass eqvClass; 52 /// std::c out<< "searching for vars: ";53 /// std::copy( eqvVars.begin(), eqvVars.end(), std::ostream_iterator< std::string >( std::c out, " " ) );54 /// std::c out<< std::endl;50 /// std::cerr << "searching for vars: "; 51 /// std::copy( eqvVars.begin(), eqvVars.end(), std::ostream_iterator< std::string >( std::cerr, " " ) ); 52 /// std::cerr << std::endl; 55 53 if ( eqvVars.find( typeInst->get_name() ) != eqvVars.end() ) { 56 54 result = true; 57 } else if ( env.lookup( typeInst->get_name(), eqvClass ) ) {55 } else if ( tenv.lookup( typeInst->get_name(), eqvClass ) ) { 58 56 if ( eqvClass.type ) { 59 /// std::c out<< typeInst->get_name() << " is bound to";60 /// eqvClass.type->print( std::c out);61 /// std::c out<< std::endl;62 eqvClass.type->accept( * this);57 /// std::cerr << typeInst->get_name() << " is bound to"; 58 /// eqvClass.type->print( std::cerr ); 59 /// std::cerr << std::endl; 60 eqvClass.type->accept( *visitor ); 63 61 } // if 64 62 } // if -
src/ResolvExpr/PolyCost.cc
r326cd2b r9cdfb4d0 14 14 // 15 15 16 #include "Common/PassVisitor.h" 16 17 #include "SymTab/Indexer.h" // for Indexer 17 18 #include "SynTree/Type.h" // for TypeInstType, Type 18 #include "SynTree/Visitor.h" // for Visitor19 19 #include "TypeEnvironment.h" // for EqvClass, TypeEnvironment 20 20 21 21 namespace ResolvExpr { 22 class PolyCost : public Visitor { 23 public: 22 struct PolyCost { 24 23 PolyCost( const TypeEnvironment &env, const SymTab::Indexer &indexer ); 25 int get_result() const { return result; } 26 private: 27 virtual void visit(TypeInstType *aggregateUseType); 24 25 void previsit( TypeInstType * aggregateUseType ); 28 26 int result; 29 const TypeEnvironment & env;27 const TypeEnvironment &tenv; 30 28 const SymTab::Indexer &indexer; 31 29 }; 32 30 33 31 int polyCost( Type *type, const TypeEnvironment & env, const SymTab::Indexer &indexer ) { 34 P olyCostcoster( env, indexer );32 PassVisitor<PolyCost> coster( env, indexer ); 35 33 type->accept( coster ); 36 return coster. get_result();34 return coster.pass.result; 37 35 } 38 36 39 PolyCost::PolyCost( const TypeEnvironment & env, const SymTab::Indexer & indexer ) : result( 0 ), env( env ), indexer( indexer ) {37 PolyCost::PolyCost( const TypeEnvironment & env, const SymTab::Indexer & indexer ) : result( 0 ), tenv( env ), indexer( indexer ) { 40 38 } 41 39 42 void PolyCost:: visit(TypeInstType * typeInst) {40 void PolyCost::previsit(TypeInstType * typeInst) { 43 41 EqvClass eqvClass; 44 if ( env.lookup( typeInst->name, eqvClass ) ) {42 if ( tenv.lookup( typeInst->name, eqvClass ) ) { 45 43 if ( eqvClass.type ) { 46 44 if ( TypeInstType * otherTypeInst = dynamic_cast< TypeInstType* >( eqvClass.type ) ) { -
src/ResolvExpr/PtrsAssignable.cc
r326cd2b r9cdfb4d0 67 67 PtrsAssignable::PtrsAssignable( Type *dest, const TypeEnvironment &env ) : dest( dest ), result( 0 ), env( env ) {} 68 68 69 void PtrsAssignable::visit( __attribute((unused)) VoidType *voidType) {69 void PtrsAssignable::visit( VoidType * ) { 70 70 // T * = void * is disallowed - this is a change from C, where any 71 71 // void * can be assigned or passed to a non-void pointer without a cast. -
src/ResolvExpr/Resolver.cc
r326cd2b r9cdfb4d0 155 155 } // if 156 156 #endif 157 assertf( finder.get_alternatives().size() == 1, "findSingleExpression: must have exactly one alternative at the end .");157 assertf( finder.get_alternatives().size() == 1, "findSingleExpression: must have exactly one alternative at the end: (%zd) %s", finder.get_alternatives().size(), toString( untyped ).c_str() ); 158 158 Alternative &choice = finder.get_alternatives().front(); 159 159 Expression *newExpr = choice.expr->clone(); … … 171 171 172 172 namespace { 173 /// resolve `untyped` to the expression whose type satisfies `pred` with the lowest cost; kindStr is used for providing better error messages 174 template<typename Pred> 175 void findKindExpression(Expression *& untyped, const SymTab::Indexer & indexer, const std::string & kindStr, Pred pred) { 176 TypeEnvironment env; 177 AlternativeFinder finder( indexer, env ); 178 finder.findWithAdjustment( untyped ); 179 180 AltList candidates; 181 for ( Alternative & alt : finder.get_alternatives() ) { 182 if ( pred( alt.expr->result ) ) { 183 candidates.push_back( std::move( alt ) ); 184 } 185 } 186 187 // choose the lowest cost expression among the candidates 188 AltList winners; 189 findMinCost( candidates.begin(), candidates.end(), back_inserter( winners ) ); 190 if ( winners.size() == 0 ) { 191 throw SemanticError( "No reasonable alternatives for " + kindStr + " expression: ", untyped ); 192 } else if ( winners.size() != 1 ) { 193 std::ostringstream stream; 194 stream << "Cannot choose between " << winners.size() << " alternatives for " + kindStr + " expression\n"; 195 untyped->print( stream ); 196 stream << "Alternatives are:\n"; 197 printAlts( winners, stream, 1 ); 198 throw SemanticError( stream.str() ); 199 } 200 201 // there is one unambiguous interpretation - move the expression into the with statement 202 Alternative & alt = winners.front(); 203 finishExpr( alt.expr, alt.env, untyped->env ); 204 delete untyped; 205 untyped = alt.expr; 206 alt.expr = nullptr; 207 } 208 173 209 bool isIntegralType( Type *type ) { 174 210 if ( dynamic_cast< EnumInstType * >( type ) ) { … … 184 220 185 221 void findIntegralExpression( Expression *& untyped, const SymTab::Indexer &indexer ) { 186 TypeEnvironment env; 187 AlternativeFinder finder( indexer, env ); 188 finder.find( untyped ); 189 #if 0 190 if ( finder.get_alternatives().size() != 1 ) { 191 std::cout << "untyped expr is "; 192 untyped->print( std::cout ); 193 std::cout << std::endl << "alternatives are:"; 194 for ( std::list< Alternative >::const_iterator i = finder.get_alternatives().begin(); i != finder.get_alternatives().end(); ++i ) { 195 i->print( std::cout ); 196 } // for 197 } // if 198 #endif 199 Expression *newExpr = 0; 200 const TypeEnvironment *newEnv = 0; 201 for ( AltList::const_iterator i = finder.get_alternatives().begin(); i != finder.get_alternatives().end(); ++i ) { 202 if ( i->expr->get_result()->size() == 1 && isIntegralType( i->expr->get_result() ) ) { 203 if ( newExpr ) { 204 throw SemanticError( "Too many interpretations for case control expression", untyped ); 205 } else { 206 newExpr = i->expr->clone(); 207 newEnv = &i->env; 208 } // if 209 } // if 210 } // for 211 if ( ! newExpr ) { 212 throw SemanticError( "No interpretations for case control expression", untyped ); 213 } // if 214 finishExpr( newExpr, *newEnv, untyped->env ); 215 delete untyped; 216 untyped = newExpr; 217 } 218 222 findKindExpression( untyped, indexer, "condition", isIntegralType ); 223 } 219 224 } 220 225 … … 311 316 312 317 void Resolver::previsit( IfStmt *ifStmt ) { 313 find SingleExpression( ifStmt->condition, indexer );318 findIntegralExpression( ifStmt->condition, indexer ); 314 319 } 315 320 316 321 void Resolver::previsit( WhileStmt *whileStmt ) { 317 find SingleExpression( whileStmt->condition, indexer );322 findIntegralExpression( whileStmt->condition, indexer ); 318 323 } 319 324 320 325 void Resolver::previsit( ForStmt *forStmt ) { 321 326 if ( forStmt->condition ) { 322 find SingleExpression( forStmt->condition, indexer );327 findIntegralExpression( forStmt->condition, indexer ); 323 328 } // if 324 329 … … 579 584 } 580 585 586 581 587 void Resolver::previsit( WithStmt * withStmt ) { 582 588 for ( Expression *& expr : withStmt->exprs ) { 583 TypeEnvironment env;584 AlternativeFinder finder( indexer, env );585 finder.findWithAdjustment( expr );586 587 589 // only struct- and union-typed expressions are viable candidates 588 AltList candidates; 589 for ( Alternative & alt : finder.get_alternatives() ) { 590 if ( isStructOrUnion( alt.expr->result ) ) { 591 candidates.push_back( std::move( alt ) ); 592 } 593 } 594 595 // choose the lowest cost expression among the candidates 596 AltList winners; 597 findMinCost( candidates.begin(), candidates.end(), back_inserter( winners ) ); 598 if ( winners.size() == 0 ) { 599 throw SemanticError( "No reasonable alternatives for with statement expression: ", expr ); 600 } else if ( winners.size() != 1 ) { 601 std::ostringstream stream; 602 stream << "Cannot choose between " << winners.size() << " alternatives for with statement expression\n"; 603 expr->print( stream ); 604 stream << "Alternatives are:\n"; 605 printAlts( winners, stream, 1 ); 606 throw SemanticError( stream.str() ); 607 } 608 609 // there is one unambiguous interpretation - move the expression into the with statement 610 Alternative & alt = winners.front(); 611 finishExpr( alt.expr, alt.env, expr->env ); 612 delete expr; 613 expr = alt.expr; 614 alt.expr = nullptr; 590 findKindExpression( expr, indexer, "with statement", isStructOrUnion ); 615 591 616 592 // if with expression might be impure, create a temporary so that it is evaluated once -
src/ResolvExpr/Resolver.h
r326cd2b r9cdfb4d0 33 33 void findVoidExpression( Expression *& untyped, const SymTab::Indexer &indexer ); 34 34 void findSingleExpression( Expression *& untyped, const SymTab::Indexer &indexer ); 35 void findSingleExpression( Expression *& untyped, Type * type, const SymTab::Indexer &indexer ); 35 36 void resolveCtorInit( ConstructorInit * ctorInit, const SymTab::Indexer & indexer ); 36 37 void resolveStmtExpr( StmtExpr * stmtExpr, const SymTab::Indexer & indexer ); -
src/SymTab/Indexer.cc
r326cd2b r9cdfb4d0 572 572 } 573 573 574 void Indexer::addMembers( AggregateDecl * aggr, Expression * expr ) { 575 for ( Declaration * decl : aggr->members ) { 576 if ( DeclarationWithType * dwt = dynamic_cast< DeclarationWithType * >( decl ) ) { 577 addId( dwt, expr ); 578 if ( dwt->name == "" ) { 579 Type * t = dwt->get_type()->stripReferences(); 580 if ( dynamic_cast< StructInstType * >( t ) || dynamic_cast< UnionInstType * >( t ) ) { 581 Expression * base = expr->clone(); 582 ResolvExpr::referenceToRvalueConversion( base ); 583 addMembers( t->getAggr(), new MemberExpr( dwt, base ) ); 584 } 585 } 586 } 587 } 588 } 589 574 590 void Indexer::addWith( WithStmt * stmt ) { 575 591 for ( Expression * expr : stmt->exprs ) { … … 578 594 assertf( aggr, "WithStmt expr has non-aggregate type: %s", toString( expr->result ).c_str() ); 579 595 580 for ( Declaration * decl : aggr->members ) { 581 if ( DeclarationWithType * dwt = dynamic_cast< DeclarationWithType * >( decl ) ) { 582 addId( dwt, expr ); 583 } 584 } 596 addMembers( aggr, expr ); 585 597 } 586 598 } -
src/SymTab/Indexer.h
r326cd2b r9cdfb4d0 86 86 void addWith( WithStmt * ); 87 87 88 /// adds all of the members of the Aggregate (addWith helper) 89 void addMembers( AggregateDecl * aggr, Expression * expr ); 90 88 91 /// convenience function for adding a list of Ids to the indexer 89 92 void addIds( const std::list< DeclarationWithType * > & decls ); -
src/SymTab/Validate.cc
r326cd2b r9cdfb4d0 94 94 template< typename AggDecl > void handleAggregate( AggDecl *aggregateDecl ); 95 95 96 bool inStruct = false;96 AggregateDecl * parentAggr = nullptr; 97 97 }; 98 98 … … 303 303 template< typename AggDecl > 304 304 void HoistStruct::handleAggregate( AggDecl *aggregateDecl ) { 305 if ( inStruct) {305 if ( parentAggr ) { 306 306 // Add elements in stack order corresponding to nesting structure. 307 307 declsToAddBefore.push_front( aggregateDecl ); 308 308 } else { 309 GuardValue( inStruct);310 inStruct = true;309 GuardValue( parentAggr ); 310 parentAggr = aggregateDecl; 311 311 } // if 312 312 // Always remove the hoisted aggregate from the inner structure. … … 754 754 throw SemanticError( "Cannot redefine typedef: " + tyDecl->name ); 755 755 } 756 // cannot redefine VLA typedefs 756 // Cannot redefine VLA typedefs. Note: this is slightly incorrect, because our notion of VLAs 757 // at this point in the translator is imprecise. In particular, this will disallow redefining typedefs 758 // with arrays whose dimension is an enumerator or a cast of a constant/enumerator. The effort required 759 // to fix this corner case likely outweighs the utility of allowing it. 757 760 if ( isVariableLength( t1 ) || isVariableLength( t2 ) ) { 758 761 throw SemanticError( "Cannot redefine typedef: " + tyDecl->name ); -
src/SynTree/AggregateDecl.cc
r326cd2b r9cdfb4d0 86 86 std::string TraitDecl::typeString() const { return "trait"; } 87 87 88 namespace { 89 long long int getConstValue( Expression * expr ) { 90 if ( CastExpr * castExpr = dynamic_cast< CastExpr * > ( expr ) ) { 91 return getConstValue( castExpr->arg ); 92 } else if ( ConstantExpr * constExpr = dynamic_cast< ConstantExpr * >( expr ) ) { 93 return constExpr->intValue(); 94 // can be -1, +1, etc. 95 // } else if ( UntypedExpr * untypedExpr = dynamic_cast< UntypedExpr * >( expr ) ) { 96 // if ( untypedExpr-> ) 97 } else { 98 assertf( false, "Unhandled expression type in getConstValue for enumerators: %s", toString( expr ).c_str() ); 99 } 100 } 101 } 102 103 bool EnumDecl::valueOf( Declaration * enumerator, long long int & value ) { 104 if ( enumValues.empty() ) { 105 long long int currentValue = 0; 106 for ( Declaration * member : members ) { 107 ObjectDecl * field = strict_dynamic_cast< ObjectDecl * >( member ); 108 if ( field->init ) { 109 SingleInit * init = strict_dynamic_cast< SingleInit * >( field->init ); 110 currentValue = getConstValue( init->value ); 111 } 112 assertf( enumValues.count( field->name ) == 0, "Enum %s has multiple members with the name %s", name.c_str(), field->name.c_str() ); 113 enumValues[ field->name ] = currentValue; 114 ++currentValue; 115 } 116 } 117 if ( enumValues.count( enumerator->name ) ) { 118 value = enumValues[ enumerator->name ]; 119 return true; 120 } 121 return false; 122 } 123 88 124 // Local Variables: // 89 125 // tab-width: 4 // -
src/SynTree/Declaration.h
r326cd2b r9cdfb4d0 319 319 EnumDecl( const EnumDecl &other ) : Parent( other ) {} 320 320 321 bool valueOf( Declaration * enumerator, long long int & value ); 322 321 323 virtual EnumDecl *clone() const override { return new EnumDecl( *this ); } 322 324 virtual void accept( Visitor &v ) override { v.visit( this ); } 323 325 virtual Declaration *acceptMutator( Mutator &m ) override { return m.mutate( this ); } 324 326 private: 327 std::map< std::string, long long int > enumValues; 325 328 virtual std::string typeString() const override; 326 329 }; -
src/SynTree/Expression.cc
r326cd2b r9cdfb4d0 83 83 } 84 84 85 long long int ConstantExpr::intValue() const { 86 if ( BasicType * basicType = dynamic_cast< BasicType * >( result ) ) { 87 if ( basicType->isInteger() ) { 88 return get_constant()->get_ival(); 89 } 90 } else if ( dynamic_cast< OneType * >( result ) ) { 91 return 1; 92 } else if ( dynamic_cast< ZeroType * >( result ) ) { 93 return 0; 94 } 95 throw SemanticError( "Constant expression of non-integral type ", this ); 96 } 97 85 98 VariableExpr::VariableExpr( DeclarationWithType *_var ) : Expression(), var( _var ) { 86 99 assert( var ); … … 95 108 // assert( inst->baseEnum ); 96 109 // EnumDecl * decl = inst->baseEnum; 97 // for ( Declaration * member : decl->members ) { 98 // if ( member == _var ) { 99 // type->set_lvalue( false ); 100 // } 110 // long long int value; 111 // if ( decl->valueOf( var, value ) ) { 112 // type->set_lvalue( false ); 101 113 // } 102 114 // } … … 403 415 } else { 404 416 // references have been removed, in which case dereference returns an lvalue of the base type. 405 ret-> get_result()->set_lvalue( true );417 ret->result->set_lvalue( true ); 406 418 } 407 419 } … … 589 601 if ( ! body.empty() ) { 590 602 if ( ExprStmt * exprStmt = dynamic_cast< ExprStmt * >( body.back() ) ) { 591 set_result( maybeClone( exprStmt->get_expr()->get_result() ));603 result = maybeClone( exprStmt->expr->result ); 592 604 } 593 605 } 594 606 // ensure that StmtExpr has a result type 595 607 if ( ! result ) { 596 set_result( new VoidType( Type::Qualifiers()) );608 result = new VoidType( Type::Qualifiers() ); 597 609 } 598 610 } -
src/SynTree/Expression.h
r326cd2b r9cdfb4d0 295 295 296 296 Constant * get_constant() { return & constant; } 297 const Constant * get_constant() const { return & constant; } 297 298 void set_constant( const Constant & newValue ) { constant = newValue; } 299 300 long long int intValue() const; 298 301 299 302 virtual ConstantExpr * clone() const { return new ConstantExpr( * this ); } -
src/SynTree/Label.h
r326cd2b r9cdfb4d0 33 33 std::list< Attribute * >& get_attributes() { return attributes; } 34 34 35 operator std::string() { return name; }35 operator std::string() const { return name; } 36 36 bool empty() { return name.empty(); } 37 37 private: -
src/SynTree/Type.h
r326cd2b r9cdfb4d0 356 356 bool isTtype() const; 357 357 358 bool isUnprototyped() const { return isVarArgs && parameters.size() == 0; } 359 358 360 virtual FunctionType *clone() const override { return new FunctionType( *this ); } 359 361 virtual void accept( Visitor & v ) override { v.visit( this ); } -
src/SynTree/TypeSubstitution.cc
r326cd2b r9cdfb4d0 107 107 108 108 void TypeSubstitution::normalize() { 109 PassVisitor<Substituter> sub( *this, true ); 109 110 do { 110 sub Count = 0;111 freeOnly = true;111 sub.pass.subCount = 0; 112 sub.pass.freeOnly = true; 112 113 for ( TypeEnvType::iterator i = typeEnv.begin(); i != typeEnv.end(); ++i ) { 113 i->second = i->second->acceptMutator( *this);114 i->second = i->second->acceptMutator( sub ); 114 115 } 115 } while ( sub Count );116 } 117 118 Type * TypeSubstitution:: mutate( TypeInstType *inst ) {119 BoundVarsType::const_iterator bound = boundVars.find( inst-> get_name());116 } while ( sub.pass.subCount ); 117 } 118 119 Type * TypeSubstitution::Substituter::postmutate( TypeInstType *inst ) { 120 BoundVarsType::const_iterator bound = boundVars.find( inst->name ); 120 121 if ( bound != boundVars.end() ) return inst; 121 122 122 TypeEnvType::const_iterator i = typeEnv.find( inst->get_name() );123 if ( i == typeEnv.end() ) {123 TypeEnvType::const_iterator i = sub.typeEnv.find( inst->get_name() ); 124 if ( i == sub.typeEnv.end() ) { 124 125 return inst; 125 126 } else { 126 /// std::c out<< "found " << inst->get_name() << ", replacing with ";127 /// i->second->print( std::c out);128 /// std::c out<< std::endl;127 /// std::cerr << "found " << inst->get_name() << ", replacing with "; 128 /// i->second->print( std::cerr ); 129 /// std::cerr << std::endl; 129 130 subCount++; 130 Type * newtype = i->second->clone();131 Type * newtype = i->second->clone(); 131 132 newtype->get_qualifiers() |= inst->get_qualifiers(); 132 133 delete inst; … … 135 136 } 136 137 137 Expression * TypeSubstitution:: mutate( NameExpr *nameExpr ) {138 VarEnvType::const_iterator i = varEnv.find( nameExpr->get_name());139 if ( i == varEnv.end() ) {138 Expression * TypeSubstitution::Substituter::postmutate( NameExpr * nameExpr ) { 139 VarEnvType::const_iterator i = sub.varEnv.find( nameExpr->name ); 140 if ( i == sub.varEnv.end() ) { 140 141 return nameExpr; 141 142 } else { … … 146 147 } 147 148 148 template< typename TypeClass > 149 Type *TypeSubstitution::handleType( TypeClass *type ) { 150 ValueGuard<BoundVarsType> oldBoundVars( boundVars ); 149 void TypeSubstitution::Substituter::premutate( Type * type ) { 150 GuardValue( boundVars ); 151 151 // bind type variables from forall-qualifiers 152 152 if ( freeOnly ) { 153 for ( Type::ForallList::const_iterator tyvar = type-> get_forall().begin(); tyvar != type->get_forall().end(); ++tyvar ) {154 boundVars.insert( (*tyvar )->get_name());153 for ( Type::ForallList::const_iterator tyvar = type->forall.begin(); tyvar != type->forall.end(); ++tyvar ) { 154 boundVars.insert( (*tyvar)->name ); 155 155 } // for 156 156 } // if 157 Type *ret = Mutator::mutate( type );158 return ret;159 157 } 160 158 161 159 template< typename TypeClass > 162 Type *TypeSubstitution::handleAggregateType( TypeClass *type ) {163 ValueGuard<BoundVarsType> oldBoundVars( boundVars );160 void TypeSubstitution::Substituter::handleAggregateType( TypeClass * type ) { 161 GuardValue( boundVars ); 164 162 // bind type variables from forall-qualifiers 165 163 if ( freeOnly ) { 166 for ( Type::ForallList::const_iterator tyvar = type-> get_forall().begin(); tyvar != type->get_forall().end(); ++tyvar ) {167 boundVars.insert( (*tyvar )->get_name());164 for ( Type::ForallList::const_iterator tyvar = type->forall.begin(); tyvar != type->forall.end(); ++tyvar ) { 165 boundVars.insert( (*tyvar)->name ); 168 166 } // for 169 167 // bind type variables from generic type instantiations 170 168 std::list< TypeDecl* > *baseParameters = type->get_baseParameters(); 171 if ( baseParameters && ! type-> get_parameters().empty() ) {169 if ( baseParameters && ! type->parameters.empty() ) { 172 170 for ( std::list< TypeDecl* >::const_iterator tyvar = baseParameters->begin(); tyvar != baseParameters->end(); ++tyvar ) { 173 boundVars.insert( (*tyvar)-> get_name());171 boundVars.insert( (*tyvar)->name ); 174 172 } // for 175 173 } // if 176 174 } // if 177 Type *ret = Mutator::mutate( type ); 178 return ret; 179 } 180 181 Type * TypeSubstitution::mutate( VoidType *voidType ) { 182 return handleType( voidType ); 183 } 184 185 Type * TypeSubstitution::mutate( BasicType *basicType ) { 186 return handleType( basicType ); 187 } 188 189 Type * TypeSubstitution::mutate( PointerType *pointerType ) { 190 return handleType( pointerType ); 191 } 192 193 Type * TypeSubstitution::mutate( ArrayType *arrayType ) { 194 return handleType( arrayType ); 195 } 196 197 Type * TypeSubstitution::mutate( FunctionType *functionType ) { 198 return handleType( functionType ); 199 } 200 201 Type * TypeSubstitution::mutate( StructInstType *aggregateUseType ) { 202 return handleAggregateType( aggregateUseType ); 203 } 204 205 Type * TypeSubstitution::mutate( UnionInstType *aggregateUseType ) { 206 return handleAggregateType( aggregateUseType ); 207 } 208 209 Type * TypeSubstitution::mutate( EnumInstType *aggregateUseType ) { 210 return handleType( aggregateUseType ); 211 } 212 213 Type * TypeSubstitution::mutate( TraitInstType *aggregateUseType ) { 214 return handleType( aggregateUseType ); 215 } 216 217 Type * TypeSubstitution::mutate( TupleType *tupleType ) { 218 return handleType( tupleType ); 219 } 220 221 Type * TypeSubstitution::mutate( VarArgsType *varArgsType ) { 222 return handleType( varArgsType ); 223 } 224 225 Type * TypeSubstitution::mutate( ZeroType *zeroType ) { 226 return handleType( zeroType ); 227 } 228 229 Type * TypeSubstitution::mutate( OneType *oneType ) { 230 return handleType( oneType ); 175 } 176 177 void TypeSubstitution::Substituter::premutate( StructInstType * aggregateUseType ) { 178 handleAggregateType( aggregateUseType ); 179 } 180 181 void TypeSubstitution::Substituter::premutate( UnionInstType *aggregateUseType ) { 182 handleAggregateType( aggregateUseType ); 231 183 } 232 184 -
src/SynTree/TypeSubstitution.h
r326cd2b r9cdfb4d0 27 27 #include "SynTree/Declaration.h" // for TypeDecl, Declaration (ptr only) 28 28 #include "SynTree/Expression.h" // for Expression (ptr only), NameExpr (p... 29 #include "SynTree/Mutator.h" // for Mutator30 29 #include "SynTree/Type.h" // for Type, ArrayType (ptr only), BasicT... 31 30 32 class TypeSubstitution : public Mutator { 33 typedef Mutator Parent; 31 class TypeSubstitution { 34 32 public: 35 33 TypeSubstitution(); … … 64 62 TypeSubstitution *clone() const { return new TypeSubstitution( *this ); } 65 63 private: 66 virtual Type* mutate(TypeInstType *aggregateUseType);67 virtual Expression* mutate(NameExpr *nameExpr);68 64 69 /// Records type variable bindings from forall-statements 70 template< typename TypeClass > Type *handleType( TypeClass *type ); 71 /// Records type variable bindings from forall-statements and instantiations of generic types 72 template< typename TypeClass > Type *handleAggregateType( TypeClass *type ); 73 74 virtual Type* mutate(VoidType *basicType); 75 virtual Type* mutate(BasicType *basicType); 76 virtual Type* mutate(PointerType *pointerType); 77 virtual Type* mutate(ArrayType *arrayType); 78 virtual Type* mutate(FunctionType *functionType); 79 virtual Type* mutate(StructInstType *aggregateUseType);