Changeset c0d00b6

Jenkinsfile

-              r9d06142
+              rc0d00b6
         arch_name               = ''
         architecture    = ''
         do_alltests             = false
         do_benchmark    = false
 …
                 sh 'make clean > /dev/null'
                 sh 'make > /dev/null 2>&1'
+        }
+        }
         catch (Exception caughtError) {
                 err = caughtError //rethrow error later
 …
 def build() {
         build_stage('Build') {
                 def install_dir = pwd tmp: true
                 //Configure the conpilation (Output is not relevant)
                 //Use the current directory as the installation target so nothing
 …
                 if( !do_benchmark ) return
-                //Write the commit id to Benchmark
-                writeFile  file: 'bench.csv', text:'data=' + gitRefNewValue + ',' + arch_name + ','
                 //Append bench results
                 sh 'make -C src/benchmark --no-print-directory csv-data >> bench.csv'
+                sh 'make -C src/benchmark --no-print-directory jenkins githash=' + gitRefNewValue + ' arch=' + arch_name + ' | tee bench.json'
+        }
+}
 …
                 //Then publish the results
                 sh 'curl --silent --data @bench.csv http://plg2:8082/jenkins/publish > /dev/null || true'
+                sh 'curl -H "Content-Type: application/json" --silent --data @bench.json http://plg2:8082/jenkins/publish > /dev/null || true'
+        }
+}

doc/proposals/concurrency/Makefile

-              r9d06142
+              rc0d00b6
 PICTURES = ${addprefix build/, ${addsuffix .pstex, \
         system \
+        monitor_structs \
 }}
 …
         dvips $< -o $@
 build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle
+build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle annex/local.bib
         @ if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi                           # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
 …
         @ -${BibTeX} ${basename $@}
         @ echo "Glossary"
         makeglossaries -q -s ${basename $@}.ist ${basename $@}                                          # Make index from *.aux entries and input index at end of document
+        @ makeglossaries -q -s ${basename $@}.ist ${basename $@}                                                # Make index from *.aux entries and input index at end of document
         @ echo ".dvi generation"
         @ -build/bump_ver.sh

doc/proposals/concurrency/annex/local.bib

-              r9d06142
+              rc0d00b6
         year            = 2017
+}
+@manual{Cpp-Transactions,
+        keywords        = {C++, Transactional Memory},
+        title           = {Technical Specification for C++ Extensions for Transactional Memory},
+        organization= {International Standard ISO/IEC TS 19841:2015 },
+        publisher   = {American National Standards Institute},
+        address = {http://www.iso.org},
+        year            = 2015,
+}
+@article{BankTransfer,
+        keywords        = {Bank Transfer},
+        title   = {Bank Account Transfer Problem},
+        publisher       = {Wiki Wiki Web},
+        address = {http://wiki.c2.com},
+        year            = 2010
+}
+@misc{2FTwoHardThings,
+        keywords        = {Hard Problem},
+        title   = {TwoHardThings},
+        author  = {Martin Fowler},
+        address = {https://martinfowler.com/bliki/TwoHardThings.html},
+        year            = 2009
+}
+@article{IntrusiveData,
+        title           = {Intrusive Data Structures},
+        author  = {Jiri Soukup},
+        journal = {CppReport},
+        year            = 1998,
+        month           = May,
+        volume  = {10/No5.},
+        page            = 22
+}
+@misc{affinityLinux,
+        title           = "{Linux man page - sched\_setaffinity(2)}"
+}
+@misc{affinityWindows,
+        title           = "{Windows (vs.85) - SetThreadAffinityMask function}"
+}
+@misc{affinityFreebsd,
+        title           = "{FreeBSD General Commands Manual - CPUSET(1)}"
+}
+@misc{affinityNetbsd,
+        title           = "{NetBSD Library Functions Manual - AFFINITY(3)}"
+}
+@misc{affinityMacosx,
+        title           = "{Affinity API Release Notes for OS X v10.5}"
+}

doc/proposals/concurrency/figures/int_monitor.fig

-              r9d06142
+              rc0d00b6
 -2
 2
+1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 600.000 2625.000 600 2325 300 2625 600 2925
+3225 4500 7425 4800
+3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3375 4650 80 80 3375 4650 3455 4730
+3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4725 4650 105 105 4725 4650 4830 4755
+3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6225 4650 105 105 6225 4650 6330 4755
+0 -1 0 0 0 12 0.0000 2 135 1035 4950 4725 blocked task\001
+0 -1 0 0 0 12 0.0000 2 135 870 3525 4725 active task\001
+0 -1 0 0 0 12 0.0000 2 180 930 6450 4725 routine ptrs\001
+1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 675.000 2700.000 675 2400 375 2700 675 3000
+4533 2866 4655 3129
+1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4657.017 2997.000 4655 2873 4533 2997 4655 3121
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2866 4655 3129
 -6
+8445 1695 8655 1905
+3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 8550 1800 105 105 8550 1800 8655 1905
+1 -1 0 0 0 10 0.0000 2 75 75 8550 1860 a\001
+4725 2866 4847 3129
+1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4849.017 2997.000 4847 2873 4725 2997 4847 3121
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2866 4847 3129
 -6
+8445 1395 8655 1605
+3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 8550 1500 105 105 8550 1500 8655 1605
+1 -1 0 0 0 10 0.0000 2 105 90 8550 1560 b\001
+4911 2866 5033 3129
+1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 5035.017 2997.000 5033 2873 4911 2997 5033 3121
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2866 5033 3129
 -6
+3945 1695 4155 1905
+3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1800 105 105 4050 1800 4155 1905
+1 -1 0 0 0 10 0.0000 2 75 75 4050 1860 a\001
+9027 2866 9149 3129
+1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9024.983 2997.000 9027 2873 9149 2997 9027 3121
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2866 9027 3129
 -6
+3945 1395 4155 1605
+3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1500 105 105 4050 1500 4155 1605
+1 -1 0 0 0 10 0.0000 2 105 90 4050 1560 b\001
+9253 2866 9375 3129
+1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9250.983 2997.000 9253 2873 9375 2997 9253 3121
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2866 9253 3129
+-6
+9478 2866 9600 3129
+1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9475.983 2997.000 9478 2873 9600 2997 9478 3121
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2866 9478 3129
 -6
 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7650 3675 80 80 7650 3675 7730 3755
 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 3675 80 80 3150 3675 3230 3755
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4047 1793 125 125 4047 1793 3929 1752
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4050 1500 125 125 4050 1500 3932 1459
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1500 125 125 8550 1500 8432 1459
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1800 125 125 8550 1800 8432 1759
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1200 2850 125 125 1200 2850 1082 2809
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 900 2850 125 125 900 2850 782 2809
+3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6225 4650 105 105 6225 4650 6330 4755
+3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 4650 80 80 3150 4650 3230 4730
+3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4575 4650 105 105 4575 4650 4680 4755
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
 1950 4200 2100
 …
 4050 3300 4200
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
 2925 1350 2925
+3000 1425 3000
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
 2325 1350 2325
+2400 1425 2400
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
 2625 1425 2850
+2700 1500 2925
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
 2325 1275 2550
+2400 1350 2625
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2625 1350 2625
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+2775 1275 2645 1125 2645 1050 2775 1125 2905 1275 2905
+2775
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+2775 900 2645 750 2645 675 2775 750 2905 900 2905
+2775
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+3000 4725 2870 4575 2870 4500 3000 4575 3130 4725 3130
+3000
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+3000 5025 2870 4875 2870 4800 3000 4875 3130 5025 3130
+3000
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+3000 9225 2870 9075 2870 9000 3000 9075 3130 9225 3130
+3000
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+3000 9525 2870 9375 2870 9300 3000 9375 3130 9525 3130
+3000
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2775 975 2775
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2775 1350 2775
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+4950 4800 4820 4650 4820 4575 4950 4650 5080 4800 5080
+4950
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+4950 4875 4950
+3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
+4970 3450 4840 3300 4840 3225 4970 3300 5100 3450 5100
+4970
+2700 1425 2700
 1 -1 0 0 0 12 0.0000 2 135 315 2850 4275 exit\001
 1 -1 0 0 0 12 0.0000 2 135 315 7350 4275 exit\001
 …
 1 -1 0 0 0 12 0.0000 2 135 495 4050 1275 queue\001
 1 -1 0 0 0 12 0.0000 2 165 420 4050 1050 entry\001
+0 0 50 -1 0 11 0.0000 2 120 705 450 2250 Condition\001
+0 0 50 -1 0 11 0.0000 2 165 630 3600 5025 signalled\001
+0 0 50 -1 0 11 0.0000 2 165 525 4950 5025 waiting\001
+0 0 50 -1 0 11 0.0000 2 120 705 600 2325 Condition\001
+0 -1 0 0 0 12 0.0000 2 180 930 6450 4725 routine ptrs\001
+0 -1 0 0 0 12 0.0000 2 135 1050 3300 4725 active thread\001
+0 -1 0 0 0 12 0.0000 2 135 1215 4725 4725 blocked thread\001

doc/proposals/concurrency/text/basics.tex

-              r9d06142
+              rc0d00b6
 Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
 Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context switching among each other, always ask an oracle where to context switch next. While coroutines can execute on the caller's stack-frame, stackfull coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model \cit. The oracle/scheduler can either be a stackless or stackfull entity and correspondingly require one or two context switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
 A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context-switches occur. Mutual-exclusion and synchronisation are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desireable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows\cit.
+Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context switching among each other, always ask an oracle where to context switch next. While coroutines can execute on the caller's stack-frame, stackfull coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a non-preemptive scheduling). The oracle/scheduler can either be a stackless or stackfull entity and correspondingly require one or two context switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
+A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context-switches occur. Mutual-exclusion and synchronisation are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desireable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
 \section{\protect\CFA 's Thread Building Blocks}
 …
 \subsection{Alternative: Lamda Objects}
 For coroutines as for threads, many implementations are based on routine pointers or function objects\cit. For example, Boost implements coroutines in terms of four functor object types:
+For coroutines as for threads, many implementations are based on routine pointers or function objects\cite{Butenhof97, ANSI14:C++, MS:VisualC++, BoostCoroutines15}. For example, Boost implements coroutines in terms of four functor object types:
 \begin{cfacode}
 asymmetric_coroutine<>::pull_type

doc/proposals/concurrency/text/concurrency.tex

-              r9d06142
+              rc0d00b6
 Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desireable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
 An approach that is worth mentioning because it is gaining in popularity is transactionnal memory~\cite{Dice10}[Check citation]. While this approach is even pursued by system languages like \CC\cit, the performance and feature set is currently too restrictive to be the main concurrency paradigm for systems language, which is why it was rejected as the core paradigm for concurrency in \CFA.
+An approach that is worth mentioning because it is gaining in popularity is transactionnal memory~\cite{Dice10}[Check citation]. While this approach is even pursued by system languages like \CC\cite{Cpp-Transactions}, the performance and feature set is currently too restrictive to be the main concurrency paradigm for systems language, which is why it was rejected as the core paradigm for concurrency in \CFA.
 One of the most natural, elegant, and efficient mechanisms for synchronization and communication, especially for shared-memory systems, is the \emph{monitor}. Monitors were first proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}. Many programming languages---e.g., Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}---provide monitors as explicit language constructs. In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as semaphores or locks to simulate monitors. For these reasons, this project proposes monitors as the core concurrency-construct.
 …
 The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
 However, such use leads to the lock acquiring order problem. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle mistake means that calling these routines concurrently may lead to deadlock and is therefore undefined behavior. As shown\cit, solving this problem requires:
+However, such use leads to the lock acquiring order problem. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle mistake means that calling these routines concurrently may lead to deadlock and is therefore undefined behavior. As shown\cite{Lister77}, solving this problem requires:
 \begin{enumerate}
         \item Dynamically tracking of the monitor-call order.
         \item Implement rollback semantics.
 \end{enumerate}
 While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is prohibitively complex \cit. In \CFA, users simply need to be carefull when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors. While \CFA provides only a partial solution, many system provide no solution and the \CFA partial solution handles many useful cases.
+While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is still prohibitively complex \cite{Dice10}. In \CFA, users simply need to be carefull when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors. While \CFA provides only a partial solution, many system provide no solution and the \CFA partial solution handles many useful cases.
 For example, \gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways:
 …
+}
 \end{cfacode}
 This example shows a trivial solution to the bank-account transfer-problem\cit. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
+This example shows a trivial solution to the bank-account transfer-problem\cite{BankTransfer}. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
 \subsection{\code{mutex} statement} \label{mutex-stmt}
 The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cit. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
+The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cite{2FTwoHardThings}. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
 \begin{figure}
 …
 % ======================================================================
 % ======================================================================
 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronisation. With monitors, this capability is generally achieved with internal or external scheduling as in\cit. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
+In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronisation. With monitors, this capability is generally achieved with internal or external scheduling as in \cite{Hoare74}. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
 First, here is a simple example of such a technique:
 …
 This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers more monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well known deadlock problem is the Nested Monitor Problem\cit, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem :
+While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well known deadlock problem is the Nested Monitor Problem \cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem :
 \begin{multicols}{2}
 \begin{pseudo}
 …
 For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instruction. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in the following figure:
+\begin{figure}[H]
 \begin{center}
 {\resizebox{0.4\textwidth}{!}{\input{monitor}}}
 \end{center}
+\label{fig:monitor}
+\end{figure}
 There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This technique cannot be used in \CFA because it relies on the fact that the monitor type enumerates (declares) all the acceptable routines. For OO languages this does not compromise much since monitors already have an exhaustive list of member routines. However, for \CFA this is not the case; routines can be added to a type anywhere after its declaration. It is important to note that the bitmask approach does not actually require an exhaustive list of routines, but it requires a dense unique ordering of routines with an upper-bound and that ordering must be consistent across translation units.

doc/proposals/concurrency/text/future.tex

-              r9d06142
+              rc0d00b6
 \section{Flexible Scheduling} \label{futur:sched}
+An important part of concurrency is scheduling. Different scheduling algorithm can affact peformance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted the to requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbirary scheduling algorithms to the scheduler. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
 \section{Non-Blocking IO} \label{futur:nbio}
 While most of the parallelism tools
+However, many modern workloads are not bound on computation but on IO operations, an common case being webservers and XaaS (anything as a service). These type of workloads often require significant engineering around amortising costs of blocking IO operations. While improving throughtput of these operations is outside what \CFA can do as a language, it can help users to make better use of the CPU time otherwise spent waiting on IO operations. The current trend is to use asynchronous programming using tools like callbacks and/or futurs and promises\cit. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear
+However, many modern workloads are not bound on computation but on IO operations, an common case being webservers and XaaS (anything as a service). These type of workloads often require significant engineering around amortising costs of blocking IO operations. While improving throughtput of these operations is outside what \CFA can do as a language, it can help users to make better use of the CPU time otherwise spent waiting on IO operations. The current trend is to use asynchronous programming using tools like callbacks and/or futurs and promises\cite. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear
 \section{Other concurrency tools} \label{futur:tools}
+While monitors offer a flexible and powerful concurent core for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package. Example of such tools can include simple locks and condition variables, futures and promises\cite{promises}, and executors. These additional features are useful when monitors offer a level of abstraction which is indaquate for certain tasks.
 \section{Implicit threading} \label{futur:implcit}
 Simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the library level. The cannonical example of implcit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithm\cit. Listing \ref{lst:parfor} shows three different code examples that accomplish pointwise sums of large arrays. Note that none of these example explicitly declare any concurrency or parallelism objects.
+Simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the library level. The cannonical example of implcit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithm\cite{uC++book}. Listing \ref{lst:parfor} shows three different code examples that accomplish pointwise sums of large arrays. Note that none of these example explicitly declare any concurrency or parallelism objects.
 \begin{figure}
 …
 \end{figure}
+Implicit parallelism is a general solution and therefore is
+\section{Multiple Paradigms} \label{futur:paradigms}
+Implicit parallelism is a general solution and therefore has its limitations. However, it is a quick and simple approach to parallelism which may very well be sufficient for smaller applications and reduces the amount of boiler-plate that is needed to start benefiting from parallelism in modern CPUs.
-\section{Transactions} \label{futur:transaction}
-Concurrency and parallelism is still a very active field that strongly benefits from hardware advances. As such certain features that aren't necessarily mature enough in their current state could become relevant in the lifetime of \CFA.

doc/proposals/concurrency/text/internals.tex

-              r9d06142
+              rc0d00b6
 \chapter{Behind the scene}
 There are several challenges specific to \CFA when implementing concurrency. These challenges are direct results of \gls{bulk-acq} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. These queues need to be intrinsic\cit to avoid the need memory allocation. This entails that all the fields needed to keep track of all needed information. Since many conconcurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}) statically defining information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly the GCC extension variable length arrays which is used extensively.
+There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object-definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
+The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. The queue design needs to be intrusive\cite{IntrusiveData} to avoid the need for memory allocation, which entails that all the nodes need specific fields to keep track of all needed information. Since many concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly variable length arrays, which are used extensively.
 Since stack allocation is based around scope, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable length. The threads and the condition both allow a fixed amount of memory to be stored, while mutex-routines and the actual blocking call allow for an unbound amount (though the later is preferable in terms of performance).
 Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristiques of \CFA are consired as problems which have already been solved and therefore will not be discussed further.
+Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristiques of \CFA are considered as solved problems and therefore not discussed further.
 % ======================================================================
 …
 % ======================================================================
 The first step towards the monitor implementation is simple mutex-routines using monitors. In the single monitor case, this is done using the entry/exit procedure highlighted in listing \ref{lst:entry1}. This entry/exit procedure doesn't actually have to be extended to support multiple monitors, indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlocks\cit. In \CFA, ordering of monitor relies on memory ordering, this is sufficient because all objects are guaranteed to have distinct non-overlaping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behavior. When a mutex call is made, the concerned monitors are agregated into an variable-length pointer array and sorted based on pointer values. This array is concerved during the entire duration of the mutual-exclusion and it's ordering reused extensively.
+The first step towards the monitor implementation is simple mutex-routines using monitors. In the single monitor case, this is done using the entry/exit procedure highlighted in listing \ref{lst:entry1}. This entry/exit procedure does not actually have to be extended to support multiple monitors, indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlocks\cite{Havender68}. In \CFA, ordering of monitor relies on memory ordering, this is sufficient because all objects are guaranteed to have distinct non-overlaping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behavior. When a mutex call is made, the concerned monitors are agregated into a variable-length pointer array and sorted based on pointer values. This array presists for the entire duration of the mutual-exclusion and its ordering reused extensively.
 \begin{figure}
 \begin{multicols}{2}
 …
 \end{tabular}
 \end{center}
 \caption{Callsite vs entry-point locking for mutex calls}
+\caption{Call-site vs entry-point locking for mutex calls}
 \label{fig:locking-site}
 \end{figure}
 Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor routine is actually desired, writing a mutex routine is possible with the proper trait, for example:
+Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor routine is desired, writing the mutex routine is possible with the proper trait, for example:
 \begin{cfacode}
 //Incorrect: T is not a monitor
+//Incorrect: T may not be monitor
 forall(dtype T)
 void foo(T * mutex t);
 …
 \end{cfacode}
 Both entry-point and callsite locking are valid implementations. The current \CFA implementations uses entry-point locking because it seems to require less work if done using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. The same could be said of callsite locking, the difference being that the later does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e.: the function body.
+Both entry-point and callsite locking are feasible implementations. The current \CFA implementations uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. The same could be said of callsite locking, the difference being that the later does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e.: the function body. Furthermore, entry-point locking requires less code generation since any useful routine is called at least as often as it is define, there can be only one entry-point but many callsites.
 % ======================================================================
 …
 % ======================================================================
 Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency.
+Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency. Each component of the picture is explained in details in the fllowing sections.
 \begin{figure}
 …
 \subsection{Context Switching}
 As mentionned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading. This is because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumptions means that the basic recipe for context-switch is only to copy all callee-saved registers unto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that instruction pointer can be left untouched since the context-switch always inside the same function. In the case of coroutines, that is the entire story. Threads however do not simply context-switch between each other directly. The context-switch to processors which is where the scheduling happens. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the "kernel" where scheduling and other system operation happen. Obiously, this has the cost of doubling the context-switch cost from because threads must context-switch to an intermediate stack. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield}(see section \ref{results}). additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch to use manually (or as part of monitors). This option is not currently present in \CFA but the changes required to add it are strictly additive.
+As mentionned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading. This is because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads however do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operation happen. Obiously, this has the cost of doubling the context-switch cost because threads must context-switch to an intermediate stack. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield}(see section \ref{results}). additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch to use manually (or as part of monitors). This option is not currently present in \CFA but the changes required to add it are strictly additive.
 \subsection{Processors}
 Parallelism in \CFA are built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operatiing system librairies. However, \gls{cfathread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor kernel threads simply fetch a user-level thread from the scheduler and run, they are effectively executers for user-threads. The main benefit of this approach is that it offers a well defined boundary between kernel code and user-code, for example kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
+Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operating-system librairies. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \glspl{uthread} from the scheduler and run, they are effectively executers for user-threads. The main benefit of this approach is that it offers a well defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
 \subsection{Stack management}
 One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all pthreads created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create there own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the kernel thread stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e. the initial kernel thread that is given to any program. In order to respect user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor.
 \subsection{Preemption}
 Finally, an important aspect for any complete threading system is preemption. As mentionned in chapter \ref{basics}, preemption introduces an extra degree of unceretainty, which enables users to have multiple threads interleave transparrently between eachother, rather than having to cooperate between thread for proper scheduling and CPU distribution. Indeed, preemption is desireable because it adds a degree of isolation between tasks. In a fully cooperative system, any thread that runs into a long loop can starve other threads, while in a preemptive system starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly programmer burden. Obviously, preemption is not optimal for every workload, however any preemptive system can become a cooperative system by making the time-slices extremely large. Which is why \CFA uses a preemptive threading system.
 Preemption in \CFA is based on kernel timers which are used to run a discreet event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption it sorts these expiration times in a list and sets a kernel timer for the closest one, effectiveling stepping between preemption events on each signals sent by the timer. These timers use the linux signal {\tt SIGALRM}, which is delivered to the process. This is important because when delivering signals to a process, the kernel documentation states that the signal can be delivered to any kernel thread for which the signal isn't block i.e. :
+\subsection{Preemption} \label{preemption}
+Finally, an important aspect for any complete threading system is preemption. As mentionned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desireable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs into a long loop can starve other threads, while in a preemptive system starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload, however any preemptive system can become a cooperative system by making the time-slices extremely large. Which is why \CFA uses a preemptive threading system.
+Preemption in \CFA is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it sorts these expiration times in a list and sets a kernel timer for the closest one, effectively stepping between preemption events on each signals sent by the timer. These timers use the linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem,because when delivering signals to a process, the kernel documentation states that the signal can be delivered to any kernel thread for which the signal is not blocked i.e. :
 \begin{quote}
 A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
 …
 For the sake of simplicity and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every thread except one. Now because of how involontary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.
+Involontary context-switching is done by sending {\tt SIGUSER1} to the corresponding processor and having the thread yield from inside the signal handler. Effectively context-switch away from the signal-handler back to the kernel and the signal-handler frame will be unwound when the thread is scheduled again. This means that a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause signal mask to migrate from one kernel thread to another. This is only a problem if all kernel threads among which a user thread can migrate differ in terms of signal masks. However, since the kernel thread hanlding preemption requires a different signal mask, executing user threads on the kernel alarm thread can cause deadlocks. For this reason, the alarm thread is on a tight loop around a system call to \code{sigwait} or more specifically \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g. on thread termination). This is also done using {\tt SIGALRM}, but sent throught the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
+\subsection{Scheduler} \footnote{ I'm not sure what to write here, is this section even needed. }
+Finally, an aspect that was not mentionned yet is the scheduling algorithm. Currently, the \CFA scheduler uses a single ready queue for all processors. Will this is not the highest performance algorithm, it has the significant advantage of being robust to heterogenous workloads. This is a very simple scheduling approach but is sufficient to for the context of this thesis.
+What to do here?
+However, when
+As will be mentionned \ref{futur:sched} it needs to be updated when clusters will be
+clusters
+Among the most pressing updates to the \CFA
+uses single queue
+in future should move to multiple queues with workstealing
+general purpouse means robust > fast
+worksharing can higher standard deviation in performance
+Involuntary context-switching is done by sending signal {\tt SIGUSER1} to the corresponding processor and having the thread yield from inside the signal handler. Effectively context-switching away from the signal-handler back to the kernel and the signal-handler frame is eventually unwound when the thread is scheduled again. This approach means that a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads among which a user thread can migrate differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distiguishes ``async-signal-safe'' functions from other functions}. However, since the kernel thread hanlding preemption requires a different signal mask, executing user threads on the kernel alarm thread can cause deadlocks. For this reason, the alarm thread is on a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent throught the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
+\subsection{Scheduler}
+Finally, an aspect that was not mentionned yet is the scheduling algorithm. Currently, the \CFA scheduler uses a single ready queue for all processors, which is the simplest approach to scheduling. Further discussion on scheduling is present in section \label{futur:sched}.
 % ======================================================================
 …
 % ======================================================================
 % ======================================================================
+To ease the understanding of monitors, like many other concepts, they are generelly represented graphically. While non-scheduled monitors are simple enough for a graphical representation to be useful, internal scheduling is complex enough to justify a visual representation. The following figure is the traditionnal illustration of a monitor :
+The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:monitor} for convenience) :
+\begin{figure}[H]
 \begin{center}
 {\resizebox{0.4\textwidth}{!}{\input{monitor}}}
 \end{center}
+This picture has several components, the two most important being the entry-queue and the AS-stack. The entry-queue is a (almost) FIFO list where threads waiting to enter are parked, while the AS-stack is a FILO list used for threads that have been signaled or otherwise marked as running next. For \CFA, the previous picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it doesn't make sense to tie the condition to a single monitor since blocking two monitors as one would require arbitrarily picking a monitor to hold the condition. Secondly, the object waiting on the conditions and AS-stack cannot simply contain the waiting thread since a single thread can potentially wait on multiple monitors. As mentionned in section \ref{intsched}, the handling in multiple monitors is done by partially passing, which entails that each concerned monitor needs to have a node object. However, for waiting on the condition, since all threads need to wait together, a single object needs to be queued in the condition. Moving out the condition and updating the node types yields :
+\caption{Traditional illustration of a monitor}
+\label{fig:monitor}
+\end{figure}
+This picture has several components, the two most important being the entry-queue and the AS-stack. The entry-queue is an (almost) FIFO list where threads waiting to enter are parked, while the acceptor-signalor (AS) stack is a FILO list used for threads that have been signalled or otherwise marked as running next.
+For \CFA, this picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it is non longer helpful to attach the condition to a single monitor. Secondly, the thread waiting on the conditions has to be seperated multiple monitors, which yields :
+\begin{figure}[H]
 \begin{center}
 {\resizebox{0.8\textwidth}{!}{\input{int_monitor}}}
 \end{center}
+This picture and the proper entry and leave algorithms is the fundamental implementation of internal scheduling (see listing \ref{lst:entry2}).
+\caption{Illustration of \CFA monitor}
+\label{fig:monitor_cfa}
+\end{figure}
+This picture and the proper entry and leave algorithms is the fundamental implementation of internal scheduling (see listing \ref{lst:entry2}). Note that when threads are moved from the condition to the AS-stack, it splits the thread into to pieces. The thread is woken up when all the pieces have moved from the AS-stacks to the active thread seat. In this picture, the threads are split into halves but this is only because there are two monitors in this picture. For a specific signaling operation every monitor needs a piece of thread on its AS-stack.
 \begin{figure}[b]
 …
 \end{figure}
+Some important things to notice about the exit routine. The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a seperate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging.
+The data structure used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the callstack of the \code{wait} and \code{signal_block} routines.
+Some important things to notice about the exit routine. The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a seperate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structure used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the callstack of the \code{wait} and \code{signal_block} routines.
+\begin{figure}[H]
+\begin{center}
+{\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}}
+\end{center}
+\caption{Data structures involved in internal/external scheduling}
+\label{fig:structs}
+\end{figure}
+Figure \ref{fig:structs} shows a high level representation of these data-structures. The main idea behind them is that, while figure \ref{fig:monitor_cfa} is a nice illustration in theory, in practice breaking a threads into multiple pieces to put unto intrusive stacks does not make sense. The \code{condition node} is the data structure that is queued into a condition variable and, when signaled, the condition queue is popped and each \code{condition criterion} are moved to the AS-stack. Once all the criterion have be popped from their respective AS-stacks, the thread is woken-up, which is what is shown in listing \ref{lst:entry2}.
 % ======================================================================
 …
 % ======================================================================
 % ======================================================================
+Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that entry-queues are no longer specific to a single monitor, as mentionned in section \ref{extsched}. This means that some kind of entry-queues must be used that is aware of both monitors and which holds threads that are currently waiting to enter the critical section. This challenge is solved for internal scheduling by having the entry-queues in conditions no longer be tied to a monitor, effectively allowing conditions to be moved outside of monitors. However, in the case of external scheduling, acceptable routines must be aware of the entry queues, which means they must be stored inside at least one of the monitors that will be acquired. This in turn adds the requirement that a systematic algorithm of disambiguating which monitor holds the relevant queue regardless of user ordering. The proposed algorithm is to fall back on monitor lock ordering and specify that the monitor that is acquired first is the one with the relevant entry queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
+This algorithm choice has two consequences, the entry queue of the highest priority monitor is no longer a true FIFO queue and the queue of the lowest priority monitor is both required and probably unused. The queue can no longer be a FIFO queue because instead of simply containing the waiting threads in order of arrival, they also contain a set of monitors. Therefore, another thread whos set contains the same highest priority monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing. Secondly, since it is not known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is probable that some queues will go unused for the entire duration of the program, for example if a monitor is only used in a pair.
+Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentionned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables which are still unique for a given scheduling operation (e.g., no single statment uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. The monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statment. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering and specify that the monitor that is acquired first is the one with the relevant wainting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
+This algorithm choice has two consequences :
+\begin{itemize}
+        \item The queue of the highest priority monitor is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same highest priority monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
+        \item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is possible that some queues will go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
+\end{itemize}
 Therefore, the following modifications need to be made to support external scheduling :
 \begin{itemize}
         \item The threads waiting on the entry-queue need to keep track of which routine is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on it's stack so the thread only needs to keep a pointer to that information.
+        \item The threads waiting on the entry-queue need to keep track of which routine is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack so the thread only needs to keep a pointer to that information.
         \item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that the complete mask can be pushed to any owned monitors, regardless of \code{when} statements, the \code{waitfor} statement is used in a context where the thread already has full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.
         \item The entry/exit routine need to be updated as shown in listing \ref{lst:entry3}.
 \end{itemize}
+\subsection{External scheduling - destructors}
 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is need for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the callstack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The waitfor semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
 …
         continue
 elif matches waitfor mask
         push waiter to AS-stack
+        push criterions to AS-stack
         continue
 else
 …
                 if all monitors ready
                         wake-up thread
+                endif
+        endif
         if entry queue not empty
                 wake-up thread
+        endif
 \end{pseudo}
 \end{multicols}
 …
 Waitfor
 \begin{pseudo}
-lock all monitors
 if matching thread is already there
         if found destructor
 …
                 push self to AS-stack
                 baton pass
+        endif
         return
+endif
 if non-blocking
         Unlock all monitors
         Return
+endif
 push self to AS-stack

doc/proposals/concurrency/text/parallelism.tex

-              r9d06142
+              rc0d00b6
 Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
 \subsection{Fibers : user-level threads without preemption}
+\subsection{Fibers : user-level threads without preemption} \label{fibers}
 A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the later one. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
 …
 \subsection{Future Work: Machine setup}\label{machine}
 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cit, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
+While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and\/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx} which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
 \subsection{Paradigms}\label{cfaparadigms}
 Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \gls{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.
+% \subsection{Paradigms}\label{cfaparadigms}
+% Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \gls{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.

doc/proposals/concurrency/text/results.tex

-              r9d06142
+              rc0d00b6
 % ======================================================================
 % ======================================================================
 \chapter{Performance results}
+\chapter{Performance results} \label{results}
 % ======================================================================
 % ======================================================================
 \section{Machine setup}
 \begin{figure}
+Table \ref{tab:machine} shows the characteristiques of the machine used to run the benchmarks. All tests where made on this machine.
+\begin{figure}[H]
 \begin{center}
 \begin{tabular}{| l | r | l | r |}
 …
 \section{Micro benchmarks}
+All benchmarks are run using the same harness to produce the results, seen as the \code{BENCH()} macro in the following examples. This macro uses the following logic to benchmark the code :
+\begin{pseudo}
+#define BENCH(run, result)
+        gettime();
+        run;
+        gettime();
+        result = (after - before) / N;
+\end{pseudo}
+The method used to get time is \code{clock_gettime(CLOCK_THREAD_CPUTIME_ID);}. Each benchmark is using many interations of a simple call to measure the cost of the call. The specific number of interation dependes on the specific benchmark.
+\subsection{Context-switching}
+The first interesting benchmark is to measure how long context-switches take. The simplest approach to do this is to yield on a thread, which executes a 2-step context switch. In order to make the comparison fair, coroutines also execute a 2-step context-switch, which is a resume/suspend cycle instead of a yield. Listing \ref{lst:ctx-switch} shows the code for coroutines and threads. All omitted tests are functionally identical to one of these tests. The results can be shown in table \ref{tab:ctx-switch}.
+\begin{figure}
+\begin{multicols}{2}
+\CFA Coroutines
+\begin{cfacode}
+coroutine GreatSuspender {};
+void main(GreatSuspender& this) {
+        while(true) { suspend(); }
+}
+int main() {
+        GreatSuspender s;
+        resume(s);
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        resume(s);
+                },
+                result
+        )
+        printf("%llu\n", result);
+}
+\end{cfacode}
+\columnbreak
+\CFA Threads
+\begin{cfacode}
+int main() {
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        yield();
+                },
+                result
+        )
+        printf("%llu\n", result);
+}
+\end{cfacode}
+\end{multicols}
+\caption{\CFA benchmark code used to measure context-switches for coroutines and threads.}
+\label{lst:ctx-switch}
+\end{figure}
 \begin{figure}
 …
 \caption{Context Switch comparaison. All numbers are in nanoseconds(\si{\nano\second})}
 \label{tab:ctx-switch}
+\end{figure}
+\subsection{Mutual-exclusion}
+The next interesting benchmark is to measure the overhead to enter/leave a critical-section. For monitors, the simplest appraoch is to measure how long it takes enter and leave a monitor routine. Listing \ref{lst:mutex} shows the code for \CFA. To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a pthread mutex lock are also mesured. The results can be shown in table \ref{tab:mutex}.
+\begin{figure}
+\begin{cfacode}
+monitor M {};
+void __attribute__((noinline)) call( M & mutex m /*, m2, m3, m4*/ ) {}
+int main() {
+        M m/*, m2, m3, m4*/;
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        call(m/*, m2, m3, m4*/);
+                },
+                result
+        )
+        printf("%llu\n", result);
+}
+\end{cfacode}
+\caption{\CFA benchmark code used to measure mutex routines.}
+\label{lst:mutex}
 \end{figure}
 …
 \end{figure}
+\subsection{Internal scheduling}
+The Internal scheduling benchmark measures the cost of waiting on and signaling a condition variable. Listing \ref{lst:int-sched} shows the code for \CFA. The results can be shown in table \ref{tab:int-sched}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
+\begin{figure}
+\begin{cfacode}
+volatile int go = 0;
+condition c;
+monitor M {};
+M m1;
+void __attribute__((noinline)) do_call( M & mutex a1 ) { signal(c); }
+thread T {};
+void ^?{}( T & mutex this ) {}
+void main( T & this ) {
+        while(go == 0) { yield(); }
+        while(go == 1) { do_call(m1); }
+}
+int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
+        go = 1;
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        wait(c);
+                },
+                result
+        )
+        printf("%llu\n", result);
+        go = 0;
+        return 0;
+}
+int main() {
+        T t;
+        return do_wait(m1);
+}
+\end{cfacode}
+\caption{Benchmark code for internal scheduling}
+\label{lst:int-sched}
+\end{figure}
 \begin{figure}
 \begin{center}
 …
 \end{figure}
+\subsection{External scheduling}
+The Internal scheduling benchmark measures the cost of the \code{waitfor} statement (\code{_Accept} in \uC). Listing \ref{lst:ext-sched} shows the code for \CFA. The results can be shown in table \ref{tab:ext-sched}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests.
+\begin{figure}
+\begin{cfacode}
+volatile int go = 0;
+monitor M {};
+M m1;
+thread T {};
+void __attribute__((noinline)) do_call( M & mutex a1 ) {}
+void ^?{}( T & mutex this ) {}
+void main( T & this ) {
+        while(go == 0) { yield(); }
+        while(go == 1) { do_call(m1); }
+}
+int  __attribute__((noinline)) do_wait( M & mutex a1 ) {
+        go = 1;
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        waitfor(call, a1);
+                },
+                result
+        )
+        printf("%llu\n", result);
+        go = 0;
+        return 0;
+}
+int main() {
+        T t;
+        return do_wait(m1);
+}
+\end{cfacode}
+\caption{Benchmark code for external scheduling}
+\label{lst:ext-sched}
+\end{figure}
 \begin{figure}
 \begin{center}
 …
 \end{figure}
+\begin{figure}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+Pthreads                & 26974.5       & 26977 & 124.12 \\
+\CFA Coroutines & 5             & 5             & 0      \\
+\CFA Threads    & 1122.5        & 1109.86       & 36.54  \\
+\uC Coroutines  & 106           & 107.04        & 1.61   \\
+\uC Threads             & 525.5 & 533.04        & 11.14  \\
+\subsection{Object creation}
+Finaly, the last benchmark measured is the cost of creation for concurrent objects. Listing \ref{lst:creation} shows the code for pthreads and \CFA threads. The results can be shown in table \ref{tab:creation}. As with all other benchmarks, all omitted tests are functionally identical to one of these tests. The only note here is that the callstacks of \CFA coroutines are lazily created, therefore without priming the coroutine, the creation cost is very low.
+\begin{figure}
+\begin{multicols}{2}
+pthread
+\begin{cfacode}
+int main() {
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        pthread_t thread;
+                        if(pthread_create(
+                                &thread,
+                                NULL,
+                                foo,
+                                NULL
+                        ) < 0) {
+                                perror( "failure" );
+                                return 1;
+                        }
+                        if(pthread_join(
+                                thread,
+                                NULL
+                        ) < 0) {
+                                perror( "failure" );
+                                return 1;
+                        }
+                },
+                result
+        )
+        printf("%llu\n", result);
+}
+\end{cfacode}
+\columnbreak
+\CFA Threads
+\begin{cfacode}
+int main() {
+        BENCH(
+                for(size_t i=0; i<n; i++) {
+                        MyThread m;
+                },
+                result
+        )
+        printf("%llu\n", result);
+}
+\end{cfacode}
+\end{multicols}
+\caption{Bechmark code for pthreads and \CFA to measure object creation}
+\label{lst:creation}
+\end{figure}
+\begin{figure}
+\begin{center}
+\begin{tabular}{| l | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] | S[table-format=5.2,table-number-alignment=right] |}
+\cline{2-4}
+\multicolumn{1}{c |}{} & \multicolumn{1}{c |}{ Median } &\multicolumn{1}{c |}{ Average } & \multicolumn{1}{c |}{ Standard Deviation} \\
+\hline
+Pthreads                        & 26974.5       & 26977 & 124.12 \\
+\CFA Coroutines Lazy    & 5             & 5             & 0      \\
+\CFA Coroutines Eager   & 335.0 & 357.67        & 34.2   \\
+\CFA Threads            & 1122.5        & 1109.86       & 36.54  \\
+\uC Coroutines          & 106           & 107.04        & 1.61   \\
+\uC Threads                     & 525.5 & 533.04        & 11.14  \\
 \hline
 \end{tabular}

doc/proposals/concurrency/text/together.tex

-              r9d06142
+              rc0d00b6
 \section{Threads as monitors}
 As it was subtely alluded in section \ref{threads}, \code{threads} in \CFA are in fact monitors. This means that all the monitors features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
+As it was subtely alluded in section \ref{threads}, \code{threads} in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
 \begin{cfacode}
 // Visualization declaration
 …
+        }
+}
+// Call destructor for simulator once simulator finishes
+// Call destructor for renderer to signify shutdown
 \end{cfacode}
 \section{Fibers \& Threads}
+As mentionned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand. Currently, using fibers is done by adding the following line of code to the program~:
+\begin{cfacode}
+unsigned int default_preemption() {
+        return 0;
+}
+\end{cfacode}
+This function is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice i.e. no preemption. However, once clusters are fully implemented, it will be possible to create fibers and uthreads in on the same system :
+\begin{figure}
+\begin{cfacode}
+//Cluster forward declaration
+struct cluster;
+//Processor forward declaration
+struct processor;
+//Construct clusters with a preemption rate
+void ?{}(cluster& this, unsigned int rate);
+//Construct processor and add it to cluster
+void ?{}(processor& this, cluster& cluster);
+//Construct thread and schedule it on cluster
+void ?{}(thread& this, cluster& cluster);
+//Declare two clusters
+cluster thread_cluster = { 10`ms };                     //Preempt every 10 ms
+cluster fibers_cluster = { 0 };                         //Never preempt
+//Construct 4 processors
+processor processors[4] = {
+        //2 for the thread cluster
+        thread_cluster;
+        thread_cluster;
+        //2 for the fibers cluster
+        fibers_cluster;
+        fibers_cluster;
+};
+//Declares thread
+thread UThread {};
+void ?{}(UThread& this) {
+        //Construct underlying thread to automatically
+        //be scheduled on the thread cluster
+        (this){ thread_cluster }
+}
+void main(UThread & this);
+//Declares fibers
+thread Fiber {};
+void ?{}(Fiber& this) {
+        //Construct underlying thread to automatically
+        //be scheduled on the fiber cluster
+        (this.__thread){ fibers_cluster }
+}
+void main(Fiber & this);
+\end{cfacode}
+\end{figure}

doc/proposals/concurrency/version

r9d06142	rc0d00b6
1		0.11.47
	1	0.11.129

src/Common/Debug.h

-              r9d06142
+              rc0d00b6
 #include "SynTree/Declaration.h"
+/// debug codegen a translation unit
+static inline void debugCodeGen( const std::list< Declaration * > & translationUnit, const std::string & label ) {
+        std::list< Declaration * > decls;
+#define DEBUG
+        filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
+                return ! LinkageSpec::isBuiltin( decl->get_linkage() );
+        });
+namespace Debug {
+        /// debug codegen a translation unit
+        static inline void codeGen( __attribute__((unused)) const std::list< Declaration * > & translationUnit, __attribute__((unused)) const std::string & label ) {
+        #ifdef DEBUG
+                std::list< Declaration * > decls;
+        std::cerr << "======" << label << "======" << std::endl;
+        CodeGen::generate( decls, std::cerr, false, true );
+} // dump
+                filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
+                        return ! LinkageSpec::isBuiltin( decl->get_linkage() );
+                });
+                std::cerr << "======" << label << "======" << std::endl;
+                CodeGen::generate( decls, std::cerr, false, true );
+        #endif
+        } // dump
+        static inline void treeDump( __attribute__((unused)) const std::list< Declaration * > & translationUnit, __attribute__((unused)) const std::string & label ) {
+        #ifdef DEBUG
+                std::list< Declaration * > decls;
+                filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
+                        return ! LinkageSpec::isBuiltin( decl->get_linkage() );
+                });
+                std::cerr << "======" << label << "======" << std::endl;
+                printAll( decls, std::cerr );
+        #endif
+        } // dump
+}
 // Local Variables: //

src/Concurrency/Keywords.cc

r9d06142	rc0d00b6
553	553	),
554	554	new ListInit(
555		map_range < std::list<Initializer> > ( args, [~~this~~](DeclarationWithType var ){
	555	map_range < std::list<Initializer> > ( args, [](DeclarationWithType var ){
556	556	Type * type = var->get_type()->clone();
557	557	type->set_mutex( false );

src/GenPoly/Box.cc

-              r9d06142
+              rc0d00b6
                         Expression *postmutate( OffsetofExpr *offsetofExpr );
                         Expression *postmutate( OffsetPackExpr *offsetPackExpr );
+                        void premutate( StructDecl * );
+                        void premutate( UnionDecl * );
                         void beginScope();
 …
                         /// adds type parameters to the layout call; will generate the appropriate parameters if needed
                         void addOtypeParamsToLayoutCall( UntypedExpr *layoutCall, const std::list< Type* > &otypeParams );
+                        /// change the type of generic aggregate members to char[]
+                        void mutateMembers( AggregateDecl * aggrDecl );
                         /// Enters a new scope for type-variables, adding the type variables from ty
 …
                 void PolyGenericCalculator::premutate( TypedefDecl *typedefDecl ) {
+                        assert(false);
                         beginTypeScope( typedefDecl->get_base() );
+                }
 …
+                }
+                /// converts polymorphic type T into a suitable monomorphic representation, currently: __attribute__((aligned(8)) char[size_T]
+                Type * polyToMonoType( Type * declType ) {
+                        Type * charType = new BasicType( Type::Qualifiers(), BasicType::Kind::Char);
+                        Expression * size = new NameExpr( sizeofName( mangleType(declType) ) );
+                        Attribute * aligned = new Attribute( "aligned", std::list<Expression*>{ new ConstantExpr( Constant::from_int(8) ) } );
+                        return new ArrayType( Type::Qualifiers(), charType, size,
+                                true, false, std::list<Attribute *>{ aligned } );
+                }
+                void PolyGenericCalculator::mutateMembers( AggregateDecl * aggrDecl ) {
+                        std::set< std::string > genericParams;
+                        for ( TypeDecl * td : aggrDecl->parameters ) {
+                                genericParams.insert( td->name );
+                        }
+                        for ( Declaration * decl : aggrDecl->members ) {
+                                if ( ObjectDecl * field = dynamic_cast< ObjectDecl * >( decl ) ) {
+                                        Type * ty = replaceTypeInst( field->type, env );
+                                        if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( ty ) ) {
+                                                // do not try to monomorphize generic parameters
+                                                if ( scopeTyVars.find( typeInst->get_name() ) != scopeTyVars.end() && ! genericParams.count( typeInst->name ) ) {
+                                                        // polymorphic aggregate members should be converted into monomorphic members.
+                                                        // Using char[size_T] here respects the expected sizing rules of an aggregate type.
+                                                        Type * newType = polyToMonoType( field->type );
+                                                        delete field->type;
+                                                        field->type = newType;
+                                                }
+                                        }
+                                }
+                        }
+                }
+                void PolyGenericCalculator::premutate( StructDecl * structDecl ) {
+                        mutateMembers( structDecl );
+                }
+                void PolyGenericCalculator::premutate( UnionDecl * unionDecl ) {
+                        mutateMembers( unionDecl );
+                }
                 void PolyGenericCalculator::premutate( DeclStmt *declStmt ) {
                         if ( ObjectDecl *objectDecl = dynamic_cast< ObjectDecl *>( declStmt->get_decl() ) ) {
 …
                                         // change initialization of a polymorphic value object to allocate via a VLA
                                         // (alloca was previously used, but can't be safely used in loops)
+                                        Type *declType = objectDecl->get_type();
+                                        ObjectDecl *newBuf = new ObjectDecl( bufNamer.newName(), Type::StorageClasses(), LinkageSpec::C, 0,
+                                                new ArrayType( Type::Qualifiers(), new BasicType( Type::Qualifiers(), BasicType::Kind::Char), new NameExpr( sizeofName( mangleType(declType) ) ),
+                                                true, false, std::list<Attribute*>{ new Attribute( "aligned", std::list<Expression*>{ new ConstantExpr( Constant::from_int(8) ) } ) } ), 0 );
+                                        ObjectDecl *newBuf = ObjectDecl::newObject( bufNamer.newName(), polyToMonoType( objectDecl->type ), nullptr );
                                         stmtsToAddBefore.push_back( new DeclStmt( noLabels, newBuf ) );

src/GenPoly/InstantiateGeneric.cc

-              r9d06142
+              rc0d00b6
 #include "Common/utility.h"            // for deleteAll, cloneAll
 #include "GenPoly.h"                   // for isPolyType, typesPolyCompatible
+#include "ResolvExpr/typeops.h"
 #include "ScopedSet.h"                 // for ScopedSet, ScopedSet<>::iterator
 #include "ScrubTyVars.h"               // for ScrubTyVars
 …
                 return gt;
+        }
+        /// Add cast to dtype-static member expressions so that type information is not lost in GenericInstantiator
+        struct FixDtypeStatic final {
+                Expression * postmutate( MemberExpr * memberExpr );
+                template<typename AggrInst>
+                Expression * fixMemberExpr( AggrInst * inst, MemberExpr * memberExpr );
+        };
         /// Mutator pass that replaces concrete instantiations of generic types with actual struct declarations, scoped appropriately
 …
         void instantiateGeneric( std::list< Declaration* > &translationUnit ) {
+                PassVisitor<FixDtypeStatic> fixer;
                 PassVisitor<GenericInstantiator> instantiator;
+                mutateAll( translationUnit, fixer );
                 mutateAll( translationUnit, instantiator );
+        }
+        bool isDtypeStatic( const std::list< TypeDecl* >& baseParams ) {
+                return std::all_of( baseParams.begin(), baseParams.end(), []( TypeDecl * td ) { return ! td->isComplete(); } );
+        }
 …
+        }
+        template< typename AggrInst >
+        Expression * FixDtypeStatic::fixMemberExpr( AggrInst * inst, MemberExpr * memberExpr ) {
+                // need to cast dtype-static member expressions to their actual type before that type is erased.
+                auto & baseParams = *inst->get_baseParameters();
+                if ( isDtypeStatic( baseParams ) ) {
+                        if ( ! ResolvExpr::typesCompatible( memberExpr->result, memberExpr->member->get_type(), SymTab::Indexer() ) ) {
+                                // type of member and type of expression differ, so add cast to actual type
+                                return new CastExpr( memberExpr, memberExpr->result->clone() );
+                        }
+                }
+                return memberExpr;
+        }
+        Expression * FixDtypeStatic::postmutate( MemberExpr * memberExpr ) {
+                Type * aggrType = memberExpr->aggregate->result;
+                if ( isGenericType( aggrType ) ) {
+                        if ( StructInstType * inst = dynamic_cast< StructInstType * >( aggrType ) ) {
+                                return fixMemberExpr( inst, memberExpr );
+                        } else if ( UnionInstType * inst = dynamic_cast< UnionInstType * >( aggrType ) ) {
+                                return fixMemberExpr( inst, memberExpr );
+                        }
+                }
+                return memberExpr;
+        }
 } // namespace GenPoly

src/InitTweak/FixInit.cc

-              r9d06142
+              rc0d00b6
                         if ( skipCopyConstruct( result ) ) return; // skip certain non-copyable types
+                        // type may involve type variables, so apply type substitution to get temporary variable's actual type.
+                        // type may involve type variables, so apply type substitution to get temporary variable's actual type,
+                        // since result type may not be substituted (e.g., if the type does not appear in the parameter list)
                         // Use applyFree so that types bound in function pointers are not substituted, e.g. in forall(dtype T) void (*)(T).
-                        result = result->clone();
                         env->applyFree( result );
                         ObjectDecl * tmp = ObjectDecl::newObject( "__tmp", result, nullptr );
 …
                         if ( returnDecl ) {
+                                UntypedExpr * assign = new UntypedExpr( new NameExpr( "?=?" ) );
+                                assign->get_args().push_back( new VariableExpr( returnDecl ) );
+                                assign->get_args().push_back( callExpr );
+                                // know the result type of the assignment is the type of the LHS (minus the pointer), so
+                                // add that onto the assignment expression so that later steps have the necessary information
+                                assign->set_result( returnDecl->get_type()->clone() );
+                                ApplicationExpr * assign = createBitwiseAssignment( new VariableExpr( returnDecl ), callExpr );
                                 Expression * retExpr = new CommaExpr( assign, new VariableExpr( returnDecl ) );
                                 // move env from callExpr to retExpr
 …
+                }
-                void addIds( SymTab::Indexer & indexer, const std::list< DeclarationWithType * > & decls ) {
-                        for ( auto d : decls ) {
-                                indexer.addId( d );
+                        }
+                }
-                void addTypes( SymTab::Indexer & indexer, const std::list< TypeDecl * > & tds ) {
-                        for ( auto td : tds ) {
-                                indexer.addType( td );
-                                addIds( indexer, td->assertions );
+                        }
+                }
                 void GenStructMemberCalls::previsit( StructDecl * structDecl ) {
                         if ( ! dtorStruct && structDecl->name == "__Destructor" ) {
 …
                                 // need to explicitly re-add function parameters to the indexer in order to resolve copy constructors
                                 auto guard = makeFuncGuard( [this]() { indexer.enterScope(); }, [this]() { indexer.leaveScope(); } );
+                                addTypes( indexer, function->type->forall );
+                                addIds( indexer, function->type->returnVals );
+                                addIds( indexer, function->type->parameters );
+                                indexer.addFunctionType( function->type );
                                 // need to iterate through members in reverse in order for
 …
                                         // insert and resolve default/copy constructor call for each field that's unhandled
                                         std::list< Statement * > stmt;
                                         Expression * arg2 = 0;
+                                        Expression * arg2 = nullptr;
                                         if ( isCopyConstructor( function ) ) {
                                                 // if copy ctor, need to pass second-param-of-this-function.field
 …
                         assert( ctorExpr->result && ctorExpr->get_result()->size() == 1 );
-                        // xxx - ideally we would reuse the temporary generated from the copy constructor passes from within firstArg if it exists and not generate a temporary if it's unnecessary.
-                        ObjectDecl * tmp = ObjectDecl::newObject( tempNamer.newName(), ctorExpr->get_result()->clone(), nullptr );
-                        declsToAddBefore.push_back( tmp );
                         // xxx - this can be TupleAssignExpr now. Need to properly handle this case.
                         ApplicationExpr * callExpr = strict_dynamic_cast< ApplicationExpr * > ( ctorExpr->get_callExpr() );
 …
                         ctorExpr->set_callExpr( nullptr );
                         ctorExpr->set_env( nullptr );
+                        // xxx - ideally we would reuse the temporary generated from the copy constructor passes from within firstArg if it exists and not generate a temporary if it's unnecessary.
+                        ObjectDecl * tmp = ObjectDecl::newObject( tempNamer.newName(), callExpr->args.front()->result->clone(), nullptr );
+                        declsToAddBefore.push_back( tmp );
                         delete ctorExpr;

src/InitTweak/InitTweak.cc

-              r9d06142
+              rc0d00b6
 #include "Parser/LinkageSpec.h"    // for Spec, isBuiltin, Intrinsic
 #include "ResolvExpr/typeops.h"    // for typesCompatibleIgnoreQualifiers
+#include "SymTab/Autogen.h"
 #include "SymTab/Indexer.h"        // for Indexer
 #include "SynTree/Attribute.h"     // for Attribute
 …
         class InitExpander::ExpanderImpl {
         public:
+                virtual ~ExpanderImpl() = default;
                 virtual std::list< Expression * > next( std::list< Expression * > & indices ) = 0;
                 virtual Statement * buildListInit( UntypedExpr * callExpr, std::list< Expression * > & indices ) = 0;
 …
         public:
                 InitImpl( Initializer * init ) : init( init ) {}
+                virtual ~InitImpl() = default;
                 virtual std::list< Expression * > next( __attribute((unused)) std::list< Expression * > & indices ) {
 …
         public:
                 ExprImpl( Expression * expr ) : arg( expr ) {}
+                ~ExprImpl() { delete arg; }
+                virtual ~ExprImpl() { delete arg; }
                 virtual std::list< Expression * > next( std::list< Expression * > & indices ) {
 …
+        }
+        ApplicationExpr * createBitwiseAssignment( Expression * dst, Expression * src ) {
+                static FunctionDecl * assign = nullptr;
+                if ( ! assign ) {
+                        // temporary? Generate a fake assignment operator to represent bitwise assignments.
+                        // This operator could easily exist as a real function, but it's tricky because nothing should resolve to this function.
+                        TypeDecl * td = new TypeDecl( "T", noStorageClasses, nullptr, TypeDecl::Dtype, true );
+                        assign = new FunctionDecl( "?=?", noStorageClasses, LinkageSpec::Intrinsic, SymTab::genAssignType( new TypeInstType( noQualifiers, td->name, td ) ), nullptr );
+                }
+                if ( dynamic_cast< ReferenceType * >( dst->result ) ) {
+                        dst = new AddressExpr( dst );
+                } else {
+                        dst = new CastExpr( dst, new ReferenceType( noQualifiers, dst->result->clone() ) );
+                }
+                if ( dynamic_cast< ReferenceType * >( src->result ) ) {
+                        src = new CastExpr( src, new ReferenceType( noQualifiers, src->result->stripReferences()->clone() ) );
+                }
+                return new ApplicationExpr( VariableExpr::functionPointer( assign ), { dst, src } );
+        }
         class ConstExprChecker : public Visitor {
         public:

src/InitTweak/InitTweak.h

-              r9d06142
+              rc0d00b6
         /// returns the first parameter of a constructor/destructor/assignment function
         ObjectDecl * getParamThis( FunctionType * ftype );
+        /// generate a bitwise assignment operation.
+        ApplicationExpr * createBitwiseAssignment( Expression * dst, Expression * src );
         /// transform Initializer into an argument list that can be passed to a call expression

src/Makefile.in

-              r9d06142
+              rc0d00b6
         ResolvExpr/driver_cfa_cpp-TypeEnvironment.$(OBJEXT) \
         ResolvExpr/driver_cfa_cpp-CurrentObject.$(OBJEXT) \
+        ResolvExpr/driver_cfa_cpp-ExplodedActual.$(OBJEXT) \
         SymTab/driver_cfa_cpp-Indexer.$(OBJEXT) \
         SymTab/driver_cfa_cpp-Mangler.$(OBJEXT) \
 …
         ResolvExpr/FindOpenVars.cc ResolvExpr/PolyCost.cc \
         ResolvExpr/Occurs.cc ResolvExpr/TypeEnvironment.cc \
+        ResolvExpr/CurrentObject.cc SymTab/Indexer.cc \
+        SymTab/Mangler.cc SymTab/Validate.cc SymTab/FixFunction.cc \
+        SymTab/ImplementationType.cc SymTab/TypeEquality.cc \
+        SymTab/Autogen.cc SynTree/Type.cc SynTree/VoidType.cc \
+        SynTree/BasicType.cc SynTree/PointerType.cc \
+        SynTree/ArrayType.cc SynTree/ReferenceType.cc \
+        SynTree/FunctionType.cc SynTree/ReferenceToType.cc \
+        SynTree/TupleType.cc SynTree/TypeofType.cc SynTree/AttrType.cc \
+        ResolvExpr/CurrentObject.cc ResolvExpr/ExplodedActual.cc \
+        SymTab/Indexer.cc SymTab/Mangler.cc SymTab/Validate.cc \
+        SymTab/FixFunction.cc SymTab/ImplementationType.cc \
+        SymTab/TypeEquality.cc SymTab/Autogen.cc SynTree/Type.cc \
+        SynTree/VoidType.cc SynTree/BasicType.cc \
+        SynTree/PointerType.cc SynTree/ArrayType.cc \
+        SynTree/ReferenceType.cc SynTree/FunctionType.cc \
+        SynTree/ReferenceToType.cc SynTree/TupleType.cc \
+        SynTree/TypeofType.cc SynTree/AttrType.cc \
         SynTree/VarArgsType.cc SynTree/ZeroOneType.cc \
         SynTree/Constant.cc SynTree/Expression.cc SynTree/TupleExpr.cc \
 …
         ResolvExpr/$(am__dirstamp) \
         ResolvExpr/$(DEPDIR)/$(am__dirstamp)
+ResolvExpr/driver_cfa_cpp-ExplodedActual.$(OBJEXT):  \
+        ResolvExpr/$(am__dirstamp) \
+        ResolvExpr/$(DEPDIR)/$(am__dirstamp)
 SymTab/$(am__dirstamp):
         @$(MKDIR_P) SymTab
 …
 @AMDEP_TRUE@@am__include@ @am__quote@ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ConversionCost.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@ResolvExpr/$(DEPDIR)/driver_cfa_cpp-CurrentObject.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@ResolvExpr/$(DEPDIR)/driver_cfa_cpp-FindOpenVars.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@ResolvExpr/$(DEPDIR)/driver_cfa_cpp-Occurs.Po@am__quote@
 …
 @am__fastdepCXX_FALSE@  $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(driver_cfa_cpp_CXXFLAGS) $(CXXFLAGS) -c -o ResolvExpr/driver_cfa_cpp-CurrentObject.obj `if test -f 'ResolvExpr/CurrentObject.cc'; then $(CYGPATH_W) 'ResolvExpr/CurrentObject.cc'; else $(CYGPATH_W) '$(srcdir)/ResolvExpr/CurrentObject.cc'; fi`
+ResolvExpr/driver_cfa_cpp-ExplodedActual.o: ResolvExpr/ExplodedActual.cc
+@am__fastdepCXX_TRUE@   $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(driver_cfa_cpp_CXXFLAGS) $(CXXFLAGS) -MT ResolvExpr/driver_cfa_cpp-ExplodedActual.o -MD -MP -MF ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Tpo -c -o ResolvExpr/driver_cfa_cpp-ExplodedActual.o `test -f 'ResolvExpr/ExplodedActual.cc' || echo '$(srcdir)/'`ResolvExpr/ExplodedActual.cc
+@am__fastdepCXX_TRUE@   $(AM_V_at)$(am__mv) ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Tpo ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      $(AM_V_CXX)source='ResolvExpr/ExplodedActual.cc' object='ResolvExpr/driver_cfa_cpp-ExplodedActual.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@  $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(driver_cfa_cpp_CXXFLAGS) $(CXXFLAGS) -c -o ResolvExpr/driver_cfa_cpp-ExplodedActual.o `test -f 'ResolvExpr/ExplodedActual.cc' || echo '$(srcdir)/'`ResolvExpr/ExplodedActual.cc
+ResolvExpr/driver_cfa_cpp-ExplodedActual.obj: ResolvExpr/ExplodedActual.cc
+@am__fastdepCXX_TRUE@   $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(driver_cfa_cpp_CXXFLAGS) $(CXXFLAGS) -MT ResolvExpr/driver_cfa_cpp-ExplodedActual.obj -MD -MP -MF ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Tpo -c -o ResolvExpr/driver_cfa_cpp-ExplodedActual.obj `if test -f 'ResolvExpr/ExplodedActual.cc'; then $(CYGPATH_W) 'ResolvExpr/ExplodedActual.cc'; else $(CYGPATH_W) '$(srcdir)/ResolvExpr/ExplodedActual.cc'; fi`
+@am__fastdepCXX_TRUE@   $(AM_V_at)$(am__mv) ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Tpo ResolvExpr/$(DEPDIR)/driver_cfa_cpp-ExplodedActual.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      $(AM_V_CXX)source='ResolvExpr/ExplodedActual.cc' object='ResolvExpr/driver_cfa_cpp-ExplodedActual.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@  $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(driver_cfa_cpp_CXXFLAGS) $(CXXFLAGS) -c -o ResolvExpr/driver_cfa_cpp-ExplodedActual.obj `if test -f 'ResolvExpr/ExplodedActual.cc'; then $(CYGPATH_W) 'ResolvExpr/ExplodedActual.cc'; else $(CYGPATH_W) '$(srcdir)/ResolvExpr/ExplodedActual.cc'; fi`
 SymTab/driver_cfa_cpp-Indexer.o: SymTab/Indexer.cc
 @am__fastdepCXX_TRUE@   $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(driver_cfa_cpp_CXXFLAGS) $(CXXFLAGS) -MT SymTab/driver_cfa_cpp-Indexer.o -MD -MP -MF SymTab/$(DEPDIR)/driver_cfa_cpp-Indexer.Tpo -c -o SymTab/driver_cfa_cpp-Indexer.o `test -f 'SymTab/Indexer.cc' || echo '$(srcdir)/'`SymTab/Indexer.cc

src/Parser/DeclarationNode.cc

-              r9d06142
+              rc0d00b6
 // Created On       : Sat May 16 12:34:05 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Sat Sep 23 18:16:48 2017
 // Update Count     : 1024
+// Last Modified On : Mon Nov 20 09:21:52 2017
+// Update Count     : 1031
 //
 …
 DeclarationNode * DeclarationNode::addQualifiers( DeclarationNode * q ) {
         if ( ! q ) { delete q; return this; }
+        if ( ! q ) { delete q; return this; }                           // empty qualifier
         checkSpecifiers( q );
         copySpecifiers( q );
+        if ( ! q->type ) {
+                delete q;
+                return this;
+        } // if
+        if ( ! q->type ) { delete q; return this; }
         if ( ! type ) {
                 type = q->type;                                                                 // reuse this structure
+                type = q->type;                                                                 // reuse structure
                 q->type = nullptr;
                 delete q;
 …
         } // if
         if ( q->type->forall ) {
                 if ( type->forall ) {
                         type->forall->appendList( q->type->forall );
+        if ( q->type->forall ) {                                                        // forall qualifier ?
+                if ( type->forall ) {                                                   // polymorphic routine ?
+                        type->forall->appendList( q->type->forall ); // augment forall qualifier
                 } else {
+                        if ( type->kind == TypeData::Aggregate ) {
+                                type->aggregate.params = q->type->forall;
+                                // change implicit typedef from TYPEDEFname to TYPEGENname
+                                typedefTable.changeKind( *type->aggregate.name, TypedefTable::TG );
+                        } else {
+                                type->forall = q->type->forall;
+                        if ( type->kind == TypeData::Aggregate ) {      // struct/union ?
+                                if ( type->aggregate.params ) {                 // polymorphic ?
+                                        type->aggregate.params->appendList( q->type->forall ); // augment forall qualifier
+                                } else {                                                                // not polymorphic
+                                        type->aggregate.params = q->type->forall; // make polymorphic type
+                                        // change implicit typedef from TYPEDEFname to TYPEGENname
+                                        typedefTable.changeKind( *type->aggregate.name, TypedefTable::TG );
+                                } // if
+                        } else {                                                                        // not polymorphic
+                                type->forall = q->type->forall;                 // make polymorphic routine
                         } // if
                 } // if
                 q->type->forall = nullptr;
+                q->type->forall = nullptr;                                              // forall qualifier moved
         } // if

src/Parser/parser.yy

-              r9d06142
+              rc0d00b6
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Wed Oct 25 12:28:54 2017
 // Update Count     : 2893
+// Last Modified On : Mon Nov 20 09:45:36 2017
+// Update Count     : 2945
 //
 …
         } // for
 } // distExt
+// There is an ambiguity for inline generic-routine return-types and generic routines.
+//   forall( otype T ) struct S { int i; } bar( T ) {}
+// Does the forall bind to the struct or the routine, and how would it be possible to explicitly specify the binding.
+//   forall( otype T ) struct S { int T; } forall( otype W ) bar( W ) {}
+void rebindForall( DeclarationNode * declSpec, DeclarationNode * funcDecl ) {
+        if ( declSpec->type->kind == TypeData::Aggregate ) { // return is aggregate definition
+                funcDecl->type->forall = declSpec->type->aggregate.params; // move forall from aggregate to function type
+                declSpec->type->aggregate.params = nullptr;
+        } // if
+} // rebindForall
 bool forall = false;                                                                    // aggregate have one or more forall qualifiers ?
 …
+// Handle single shift/reduce conflict for dangling else by shifting the ELSE token. For example, this string
+// is ambiguous:
+// .---------.                          matches IF '(' comma_expression ')' statement . (reduce)
+// if ( C ) S1 else S2
+// `-----------------'          matches IF '(' comma_expression ')' statement . (shift) ELSE statement */
+// Handle shift/reduce conflict for dangling else by shifting the ELSE token. For example, this string is ambiguous:
+//   .---------.                                matches IF '(' comma_expression ')' statement . (reduce)
+//   if ( C ) S1 else S2
+//   `-----------------'                matches IF '(' comma_expression ')' statement . (shift) ELSE statement */
 // Similar issues exit with the waitfor statement.
 …
 %precedence TIMEOUT     // token precedence for start of TIMEOUT in WAITFOR statement
 %precedence ELSE        // token precedence for start of else clause in IF/WAITFOR statement
+// Handle shift/reduce conflict for generic type by shifting the '(' token. For example, this string is ambiguous:
+//   forall( otype T ) struct Foo { T v; };
+//       .-----.                                matches pointer to function returning a generic (which is impossible without a type)
+//   Foo ( *fp )( int );
+//   `---'                                              matches start of TYPEGENname '('
+// Must be:
+// Foo( int ) ( *fp )( int );
+// Order of these lines matters (low-to-high precedence).
+%precedence TYPEGENname
+%precedence '('
 %locations                      // support location tracking for error messages
 …
 typegen_name:                                                                                   // CFA
+        TYPEGENname '(' ')'
+        TYPEGENname
+                { $$ = DeclarationNode::newFromTypeGen( $1, nullptr ); }
+        | TYPEGENname '(' ')'
                 { $$ = DeclarationNode::newFromTypeGen( $1, nullptr ); }
         | TYPEGENname '(' type_list ')'
 …
+                }
         | aggregate_key attribute_list_opt typegen_name         // CFA
+                { $$ = $3->addQualifiers( $2 ); }
+                {
+                        // Create new generic declaration with same name as previous forward declaration, where the IDENTIFIER is
+                        // switched to a TYPEGENname. Link any generic arguments from typegen_name to new generic declaration and
+                        // delete newFromTypeGen.
+                        $$ = DeclarationNode::newAggregate( $1, $3->type->symbolic.name, $3->type->symbolic.actuals, nullptr, false )->addQualifiers( $2 );
+                        $3->type->symbolic.name = nullptr;
+                        $3->type->symbolic.actuals = nullptr;
+                        delete $3;
+                }
+        ;
 …
         | declaration_specifier function_declarator with_clause_opt compound_statement
+                {
+                        rebindForall( $1, $2 );
                         typedefTable.addToEnclosingScope( TypedefTable::ID );
                         typedefTable.leaveScope();
 …
         | declaration_specifier KR_function_declarator KR_declaration_list_opt with_clause_opt compound_statement
+                {
+                        rebindForall( $1, $2 );
                         typedefTable.addToEnclosingScope( TypedefTable::ID );
                         typedefTable.leaveScope();

src/ResolvExpr/Alternative.cc

-              r9d06142
+              rc0d00b6
 #include <ostream>                       // for operator<<, ostream, basic_o...
 #include <string>                        // for operator<<, char_traits, string
+#include <utility>                       // for move
 #include "Common/utility.h"              // for maybeClone
 …
                 os << std::endl;
+        }
+        void splice( AltList& dst, AltList& src ) {
+                dst.reserve( dst.size() + src.size() );
+                for ( Alternative& alt : src ) {
+                        dst.push_back( std::move(alt) );
+                }
+                src.clear();
+        }
+        void spliceBegin( AltList& dst, AltList& src ) {
+                splice( src, dst );
+                dst.swap( src );
+        }
 } // namespace ResolvExpr

src/ResolvExpr/Alternative.h

-              r9d06142
+              rc0d00b6
 #include <iosfwd>             // for ostream
 #include <list>               // for list
+#include <vector>             // for vector
 #include "Cost.h"             // for Cost
 …
 namespace ResolvExpr {
-        struct Alternative;
-        typedef std::list< Alternative > AltList;
         struct Alternative {
                 Alternative();
 …
                 void print( std::ostream &os, Indenter indent = {} ) const;
+                /// Returns the stored expression, but released from management of this Alternative
+                Expression* release_expr() {
+                        Expression* tmp = expr;
+                        expr = nullptr;
+                        return tmp;
+                }
                 Cost cost;
                 Cost cvtCost;
 …
                 TypeEnvironment env;
         };
+        typedef std::vector< Alternative > AltList;
+        /// Moves all elements from src to the end of dst
+        void splice( AltList& dst, AltList& src );
+        /// Moves all elements from src to the beginning of dst
+        void spliceBegin( AltList& dst, AltList& src );
 } // namespace ResolvExpr

src/ResolvExpr/AlternativeFinder.cc

-              r9d06142
+              rc0d00b6
 #include <algorithm>               // for copy
 #include <cassert>                 // for strict_dynamic_cast, assert, assertf
+#include <cstddef>                 // for size_t
 #include <iostream>                // for operator<<, cerr, ostream, endl
 #include <iterator>                // for back_insert_iterator, back_inserter
 #include <list>                    // for _List_iterator, list, _List_const_...
 #include <map>                     // for _Rb_tree_iterator, map, _Rb_tree_c...
 #include <memory>                  // for allocator_traits<>::value_type
+#include <memory>                  // for allocator_traits<>::value_type, unique_ptr
 #include <utility>                 // for pair
+#include <vector>                  // for vector
 #include "Alternative.h"           // for AltList, Alternative
 …
 #include "Common/utility.h"        // for deleteAll, printAll, CodeLocation
 #include "Cost.h"                  // for Cost, Cost::zero, operator<<, Cost...
+#include "ExplodedActual.h"        // for ExplodedActual
 #include "InitTweak/InitTweak.h"   // for getFunctionName
 #include "RenameVars.h"            // for RenameVars, global_renamer
 …
 #define PRINT( text ) if ( resolvep ) { text }
 //#define DEBUG_COST
+using std::move;
+/// copies any copyable type
+template<typename T>
+T copy(const T& x) { return x; }
 namespace ResolvExpr {
 …
                 expr->accept( *this );
                 if ( failFast && alternatives.empty() ) {
+                        PRINT(
+                                std::cerr << "No reasonable alternatives for expression " << expr << std::endl;
+                        )
                         throw SemanticError( "No reasonable alternatives for expression ", expr );
+                }
 …
                                 printAlts( alternatives, std::cerr );
+                        )
                         AltList::iterator oldBegin = alternatives.begin();
                         pruneAlternatives( alternatives.begin(), alternatives.end(), front_inserter( alternatives ) );
                         if ( failFast && alternatives.begin() == oldBegin ) {
+                        AltList pruned;
+                        pruneAlternatives( alternatives.begin(), alternatives.end(), back_inserter( pruned ) );
+                        if ( failFast && pruned.empty() ) {
                                 std::ostringstream stream;
                                 AltList winners;
 …
                                 throw SemanticError( stream.str() );
+                        }
                         alternatives.erase( oldBegin, alternatives.end() );
+                        alternatives = move(pruned);
                         PRINT(
                                 std::cerr << "there are " << oldsize << " alternatives before elimination" << std::endl;
 …
                 tmpCost.incPoly( -tmpCost.get_polyCost() );
                 if ( tmpCost != Cost::zero ) {
-                // if ( convCost != Cost::zero ) {
                         Type *newType = formalType->clone();
                         env.apply( newType );
 …
 ///     needAssertions.insert( needAssertions.end(), (*tyvar)->get_assertions().begin(), (*tyvar)->get_assertions().end() );
+                }
+        }
-        /// instantiate a single argument by matching actuals from [actualIt, actualEnd) against formalType,
-        /// producing expression(s) in out and their total cost in cost.
-        template< typename AltIterator, typename OutputIterator >
-        bool instantiateArgument( Type * formalType, Initializer * defaultValue, AltIterator & actualIt, AltIterator actualEnd, OpenVarSet & openVars, TypeEnvironment & resultEnv, AssertionSet & resultNeed, AssertionSet & resultHave, const SymTab::Indexer & indexer, Cost & cost, OutputIterator out ) {
-                if ( TupleType * tupleType = dynamic_cast< TupleType * >( formalType ) ) {
-                        // formalType is a TupleType - group actuals into a TupleExpr whose type unifies with the TupleType
-                        std::list< Expression * > exprs;
-                        for ( Type * type : *tupleType ) {
-                                if ( ! instantiateArgument( type, defaultValue, actualIt, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( exprs ) ) ) {
-                                        deleteAll( exprs );
-                                        return false;
+                                }
+                        }
-                        *out++ = new TupleExpr( exprs );
-                } else if ( TypeInstType * ttype = Tuples::isTtype( formalType ) ) {
-                        // xxx - mixing default arguments with variadic??
-                        std::list< Expression * > exprs;
-                        for ( ; actualIt != actualEnd; ++actualIt ) {
-                                exprs.push_back( actualIt->expr->clone() );
-                                cost += actualIt->cost;
+                        }
-                        Expression * arg = nullptr;
-                        if ( exprs.size() == 1 && Tuples::isTtype( exprs.front()->get_result() ) ) {
-                                // the case where a ttype value is passed directly is special, e.g. for argument forwarding purposes
-                                // xxx - what if passing multiple arguments, last of which is ttype?
-                                // xxx - what would happen if unify was changed so that unifying tuple types flattened both before unifying lists? then pass in TupleType(ttype) below.
-                                arg = exprs.front();
-                        } else {
-                                arg = new TupleExpr( exprs );
+                        }
-                        assert( arg && arg->get_result() );
-                        if ( ! unify( ttype, arg->get_result(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-                                return false;
+                        }
-                        *out++ = arg;
-                } else if ( actualIt != actualEnd ) {
-                        // both actualType and formalType are atomic (non-tuple) types - if they unify
-                        // then accept actual as an argument, otherwise return false (fail to instantiate argument)
-                        Expression * actual = actualIt->expr;
-                        Type * actualType = actual->get_result();
-                        PRINT(
-                                std::cerr << "formal type is ";
-                                formalType->print( std::cerr );
-                                std::cerr << std::endl << "actual type is ";
-                                actualType->print( std::cerr );
-                                std::cerr << std::endl;
+                        )
-                        if ( ! unify( formalType, actualType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-                                // std::cerr << "unify failed" << std::endl;
-                                return false;
+                        }
-                        // move the expression from the alternative to the output iterator
-                        *out++ = actual;
-                        actualIt->expr = nullptr;
-                        cost += actualIt->cost;
-                        ++actualIt;
-                } else {
-                        // End of actuals - Handle default values
-                        if ( SingleInit *si = dynamic_cast<SingleInit *>( defaultValue )) {
-                                if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( si->get_value() ) ) {
-                                        // so far, only constant expressions are accepted as default values
-                                        if ( ConstantExpr *cnstexpr = dynamic_cast<ConstantExpr *>( castExpr->get_arg() ) ) {
-                                                if ( Constant *cnst = dynamic_cast<Constant *>( cnstexpr->get_constant() ) ) {
-                                                        if ( unify( formalType, cnst->get_type(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
-                                                                *out++ = cnstexpr->clone();
-                                                                return true;
-                                                        } // if
-                                                } // if
-                                        } // if
+                                }
-                        } // if
-                        return false;
-                } // if
-                return true;
+        }
-        bool AlternativeFinder::instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out ) {
-                simpleCombineEnvironments( actuals.begin(), actuals.end(), resultEnv );
-                // make sure we don't widen any existing bindings
-                for ( TypeEnvironment::iterator i = resultEnv.begin(); i != resultEnv.end(); ++i ) {
-                        i->allowWidening = false;
+                }
-                resultEnv.extractOpenVars( openVars );
-                // flatten actuals so that each actual has an atomic (non-tuple) type
-                AltList exploded;
-                Tuples::explode( actuals, indexer, back_inserter( exploded ) );
-                AltList::iterator actualExpr = exploded.begin();
-                AltList::iterator actualEnd = exploded.end();
-                for ( DeclarationWithType * formal : formals ) {
-                        // match flattened actuals with formal parameters - actuals will be grouped to match
-                        // with formals as appropriate
-                        Cost cost = Cost::zero;
-                        std::list< Expression * > newExprs;
-                        ObjectDecl * obj = strict_dynamic_cast< ObjectDecl * >( formal );
-                        if ( ! instantiateArgument( obj->get_type(), obj->get_init(), actualExpr, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( newExprs ) ) ) {
-                                deleteAll( newExprs );
-                                return false;
+                        }
-                        // success - produce argument as a new alternative
-                        assert( newExprs.size() == 1 );
-                        out.push_back( Alternative( newExprs.front(), resultEnv, cost ) );
+                }
-                if ( actualExpr != actualEnd ) {
-                        // there are still actuals remaining, but we've run out of formal parameters to match against
-                        // this is okay only if the function is variadic
-                        if ( ! isVarArgs ) {
-                                return false;
+                        }
-                        out.splice( out.end(), exploded, actualExpr, actualEnd );
+                }
-                return true;
+        }
 …
+        }
+        template< typename OutputIterator >
+        void AlternativeFinder::makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out ) {
+                OpenVarSet openVars;
+                AssertionSet resultNeed, resultHave;
+                TypeEnvironment resultEnv( func.env );
+                makeUnifiableVars( funcType, openVars, resultNeed );
+                resultEnv.add( funcType->get_forall() ); // add all type variables as open variables now so that those not used in the parameter list are still considered open
+                AltList instantiatedActuals; // filled by instantiate function
+        /// Gets a default value from an initializer, nullptr if not present
+        ConstantExpr* getDefaultValue( Initializer* init ) {
+                if ( SingleInit* si = dynamic_cast<SingleInit*>( init ) ) {
+                        if ( CastExpr* ce = dynamic_cast<CastExpr*>( si->get_value() ) ) {
+                                return dynamic_cast<ConstantExpr*>( ce->get_arg() );
+                        }
+                }
+                return nullptr;
+        }
+        /// State to iteratively build a match of parameter expressions to arguments
+        struct ArgPack {
+                std::size_t parent;                ///< Index of parent pack
+                std::unique_ptr<Expression> expr;  ///< The argument stored here
+                Cost cost;                         ///< The cost of this argument
+                TypeEnvironment env;               ///< Environment for this pack
+                AssertionSet need;                 ///< Assertions outstanding for this pack
+                AssertionSet have;                 ///< Assertions found for this pack
+                OpenVarSet openVars;               ///< Open variables for this pack
+                unsigned nextArg;                  ///< Index of next argument in arguments list
+                unsigned tupleStart;               ///< Number of tuples that start at this index
+                unsigned nextExpl;                 ///< Index of next exploded element
+                unsigned explAlt;                  ///< Index of alternative for nextExpl > 0
+                ArgPack()
+                        : parent(0), expr(), cost(Cost::zero), env(), need(), have(), openVars(), nextArg(0),
+                          tupleStart(0), nextExpl(0), explAlt(0) {}
+                ArgPack(const TypeEnvironment& env, const AssertionSet& need, const AssertionSet& have,
+                                const OpenVarSet& openVars)
+                        : parent(0), expr(), cost(Cost::zero), env(env), need(need), have(have),
+                          openVars(openVars), nextArg(0), tupleStart(0), nextExpl(0), explAlt(0) {}
+                ArgPack(std::size_t parent, Expression* expr, TypeEnvironment&& env, AssertionSet&& need,
+                                AssertionSet&& have, OpenVarSet&& openVars, unsigned nextArg,
+                                unsigned tupleStart = 0, Cost cost = Cost::zero, unsigned nextExpl = 0,
+                                unsigned explAlt = 0 )
+                        : parent(parent), expr(expr->clone()), cost(cost), env(move(env)), need(move(need)),
+                          have(move(have)), openVars(move(openVars)), nextArg(nextArg), tupleStart(tupleStart),
+                          nextExpl(nextExpl), explAlt(explAlt) {}
+                ArgPack(const ArgPack& o, TypeEnvironment&& env, AssertionSet&& need, AssertionSet&& have,
+                                OpenVarSet&& openVars, unsigned nextArg, Cost added )
+                        : parent(o.parent), expr(o.expr ? o.expr->clone() : nullptr), cost(o.cost + added),
+                          env(move(env)), need(move(need)), have(move(have)), openVars(move(openVars)),
+                          nextArg(nextArg), tupleStart(o.tupleStart), nextExpl(0), explAlt(0) {}
+                /// true iff this pack is in the middle of an exploded argument
+                bool hasExpl() const { return nextExpl > 0; }
+                /// Gets the list of exploded alternatives for this pack
+                const ExplodedActual& getExpl( const ExplodedArgs& args ) const {
+                        return args[nextArg-1][explAlt];
+                }
+                /// Ends a tuple expression, consolidating the appropriate actuals
+                void endTuple( const std::vector<ArgPack>& packs ) {
+                        // add all expressions in tuple to list, summing cost
+                        std::list<Expression*> exprs;
+                        const ArgPack* pack = this;
+                        if ( expr ) { exprs.push_front( expr.release() ); }
+                        while ( pack->tupleStart == 0 ) {
+                                pack = &packs[pack->parent];
+                                exprs.push_front( pack->expr->clone() );
+                                cost += pack->cost;
+                        }
+                        // reset pack to appropriate tuple
+                        expr.reset( new TupleExpr( exprs ) );
+                        tupleStart = pack->tupleStart - 1;
+                        parent = pack->parent;
+                }
+        };
+        /// Instantiates an argument to match a formal, returns false if no results left
+        bool instantiateArgument( Type* formalType, Initializer* initializer,
+                        const ExplodedArgs& args, std::vector<ArgPack>& results, std::size_t& genStart,
+                        const SymTab::Indexer& indexer, unsigned nTuples = 0 ) {
+                if ( TupleType* tupleType = dynamic_cast<TupleType*>( formalType ) ) {
+                        // formalType is a TupleType - group actuals into a TupleExpr
+                        ++nTuples;
+                        for ( Type* type : *tupleType ) {
+                                // xxx - dropping initializer changes behaviour from previous, but seems correct
+                                if ( ! instantiateArgument(
+                                                type, nullptr, args, results, genStart, indexer, nTuples ) )
+                                        return false;
+                                nTuples = 0;
+                        }
+                        // re-consititute tuples for final generation
+                        for ( auto i = genStart; i < results.size(); ++i ) {
+                                results[i].endTuple( results );
+                        }
+                        return true;
+                } else if ( TypeInstType* ttype = Tuples::isTtype( formalType ) ) {
+                        // formalType is a ttype, consumes all remaining arguments
+                        // xxx - mixing default arguments with variadic??
+                        // completed tuples; will be spliced to end of results to finish
+                        std::vector<ArgPack> finalResults{};
+                        // iterate until all results completed
+                        std::size_t genEnd;
+                        ++nTuples;
+                        do {
+                                genEnd = results.size();
+                                // add another argument to results
+                                for ( std::size_t i = genStart; i < genEnd; ++i ) {
+                                        auto nextArg = results[i].nextArg;
+                                        // use next element of exploded tuple if present
+                                        if ( results[i].hasExpl() ) {
+                                                const ExplodedActual& expl = results[i].getExpl( args );
+                                                unsigned nextExpl = results[i].nextExpl + 1;
+                                                if ( nextExpl == expl.exprs.size() ) {
+                                                        nextExpl = 0;
+                                                }
+                                                results.emplace_back(
+                                                        i, expl.exprs[results[i].nextExpl].get(), copy(results[i].env),
+                                                        copy(results[i].need), copy(results[i].have),
+                                                        copy(results[i].openVars), nextArg, nTuples, Cost::zero, nextExpl,
+                                                        results[i].explAlt );
+                                                continue;
+                                        }
+                                        // finish result when out of arguments
+                                        if ( nextArg >= args.size() ) {
+                                                ArgPack newResult{
+                                                        results[i].env, results[i].need, results[i].have,
+                                                        results[i].openVars };
+                                                newResult.nextArg = nextArg;
+                                                Type* argType;
+                                                if ( nTuples > 0 ) {
+                                                        // first iteration, push empty tuple expression
+                                                        newResult.parent = i;
+                                                        std::list<Expression*> emptyList;
+                                                        newResult.expr.reset( new TupleExpr( emptyList ) );
+                                                        argType = newResult.expr->get_result();
+                                                } else {
+                                                        // clone result to collect tuple
+                                                        newResult.parent = results[i].parent;
+                                                        newResult.cost = results[i].cost;
+                                                        newResult.tupleStart = results[i].tupleStart;
+                                                        newResult.expr.reset( results[i].expr->clone() );
+                                                        argType = newResult.expr->get_result();
+                                                        if ( results[i].tupleStart > 0 && Tuples::isTtype( argType ) ) {
+                                                                // the case where a ttype value is passed directly is special,
+                                                                // e.g. for argument forwarding purposes
+                                                                // xxx - what if passing multiple arguments, last of which is
+                                                                //       ttype?
+                                                                // xxx - what would happen if unify was changed so that unifying
+                                                                //       tuple
+                                                                // types flattened both before unifying lists? then pass in
+                                                                // TupleType (ttype) below.
+                                                                --newResult.tupleStart;
+                                                        } else {
+                                                                // collapse leftover arguments into tuple
+                                                                newResult.endTuple( results );
+                                                                argType = newResult.expr->get_result();
+                                                        }
+                                                }
+                                                // check unification for ttype before adding to final
+                                                if ( unify( ttype, argType, newResult.env, newResult.need, newResult.have,
+                                                                newResult.openVars, indexer ) ) {
+                                                        finalResults.push_back( move(newResult) );
+                                                }
+                                                continue;
+                                        }
+                                        // add each possible next argument
+                                        for ( std::size_t j = 0; j < args[nextArg].size(); ++j ) {
+                                                const ExplodedActual& expl = args[nextArg][j];
+                                                // fresh copies of parent parameters for this iteration
+                                                TypeEnvironment env = results[i].env;
+                                                OpenVarSet openVars = results[i].openVars;
+                                                env.addActual( expl.env, openVars );
+                                                // skip empty tuple arguments by (near-)cloning parent into next gen
+                                                if ( expl.exprs.empty() ) {
+                                                        results.emplace_back(
+                                                                results[i], move(env), copy(results[i].need),
+                                                                copy(results[i].have), move(openVars), nextArg + 1, expl.cost );
+                                                        continue;
+                                                }
+                                                // add new result
+                                                results.emplace_back(
+                                                        i, expl.exprs.front().get(), move(env), copy(results[i].need),
+                                                        copy(results[i].have), move(openVars), nextArg + 1,
+                                                        nTuples, expl.cost, expl.exprs.size() == 1 ? 0 : 1, j );
+                                        }
+                                }
+                                // reset for next round
+                                genStart = genEnd;
+                                nTuples = 0;
+                        } while ( genEnd != results.size() );
+                        // splice final results onto results
+                        for ( std::size_t i = 0; i < finalResults.size(); ++i ) {
+                                results.push_back( move(finalResults[i]) );
+                        }
+                        return ! finalResults.empty();
+                }
+                // iterate each current subresult
+                std::size_t genEnd = results.size();
+                for ( std::size_t i = genStart; i < genEnd; ++i ) {
+                        auto nextArg = results[i].nextArg;
+                        // use remainder of exploded tuple if present
+                        if ( results[i].hasExpl() ) {
+                                const ExplodedActual& expl = results[i].getExpl( args );
+                                Expression* expr = expl.exprs[results[i].nextExpl].get();
+                                TypeEnvironment env = results[i].env;
+                                AssertionSet need = results[i].need, have = results[i].have;
+                                OpenVarSet openVars = results[i].openVars;
+                                Type* actualType = expr->get_result();
+                                PRINT(
+                                        std::cerr << "formal type is ";
+                                        formalType->print( std::cerr );
+                                        std::cerr << std::endl << "actual type is ";
+                                        actualType->print( std::cerr );
+                                        std::cerr << std::endl;
+                                )
+                                if ( unify( formalType, actualType, env, need, have, openVars, indexer ) ) {
+                                        unsigned nextExpl = results[i].nextExpl + 1;
+                                        if ( nextExpl == expl.exprs.size() ) {
+                                                nextExpl = 0;
+                                        }
+                                        results.emplace_back(
+                                                i, expr, move(env), move(need), move(have), move(openVars), nextArg,
+                                                nTuples, Cost::zero, nextExpl, results[i].explAlt );
+                                }
+                                continue;
+                        }
+                        // use default initializers if out of arguments
+                        if ( nextArg >= args.size() ) {
+                                if ( ConstantExpr* cnstExpr = getDefaultValue( initializer ) ) {
+                                        if ( Constant* cnst = dynamic_cast<Constant*>( cnstExpr->get_constant() ) ) {
+                                                TypeEnvironment env = results[i].env;
+                                                AssertionSet need = results[i].need, have = results[i].have;
+                                                OpenVarSet openVars = results[i].openVars;
+                                                if ( unify( formalType, cnst->get_type(), env, need, have, openVars,
+                                                                indexer ) ) {
+                                                        results.emplace_back(
+                                                                i, cnstExpr, move(env), move(need), move(have),
+                                                                move(openVars), nextArg, nTuples );
+                                                }
+                                        }
+                                }
+                                continue;
+                        }
+                        // Check each possible next argument
+                        for ( std::size_t j = 0; j < args[nextArg].size(); ++j ) {
+                                const ExplodedActual& expl = args[nextArg][j];
+                                // fresh copies of parent parameters for this iteration
+                                TypeEnvironment env = results[i].env;
+                                AssertionSet need = results[i].need, have = results[i].have;
+                                OpenVarSet openVars = results[i].openVars;
+                                env.addActual( expl.env, openVars );
+                                // skip empty tuple arguments by (near-)cloning parent into next gen
+                                if ( expl.exprs.empty() ) {
+                                        results.emplace_back(
+                                                results[i], move(env), move(need), move(have), move(openVars),
+                                                nextArg + 1, expl.cost );
+                                        continue;
+                                }
+                                // consider only first exploded actual
+                                Expression* expr = expl.exprs.front().get();
+                                Type* actualType = expr->get_result()->clone();
+                                PRINT(
+                                        std::cerr << "formal type is ";
+                                        formalType->print( std::cerr );
+                                        std::cerr << std::endl << "actual type is ";
+                                        actualType->print( std::cerr );
+                                        std::cerr << std::endl;
+                                )
+                                // attempt to unify types
+                                if ( unify( formalType, actualType, env, need, have, openVars, indexer ) ) {
+                                        // add new result
+                                        results.emplace_back(
+                                                i, expr, move(env), move(need), move(have), move(openVars), nextArg + 1,
+                                                nTuples, expl.cost, expl.exprs.size() == 1 ? 0 : 1, j );
+                                }
+                        }
+                }
+                // reset for next parameter
+                genStart = genEnd;
+                return genEnd != results.size();
+        }
+        template<typename OutputIterator>
+        void AlternativeFinder::validateFunctionAlternative( const Alternative &func, ArgPack& result,
+                        const std::vector<ArgPack>& results, OutputIterator out ) {
+                ApplicationExpr *appExpr = new ApplicationExpr( func.expr->clone() );
+                // sum cost and accumulate actuals
+                std::list<Expression*>& args = appExpr->get_args();
+                Cost cost = Cost::zero;
+                const ArgPack* pack = &result;
+                while ( pack->expr ) {
+                        args.push_front( pack->expr->clone() );
+                        cost += pack->cost;
+                        pack = &results[pack->parent];
+                }
+                // build and validate new alternative
+                Alternative newAlt( appExpr, result.env, cost );
+                PRINT(
+                        std::cerr << "instantiate function success: " << appExpr << std::endl;
+                        std::cerr << "need assertions:" << std::endl;
+                        printAssertionSet( result.need, std::cerr, 8 );
+                )
+                inferParameters( result.need, result.have, newAlt, result.openVars, out );
+        }
+        template<typename OutputIterator>
+        void AlternativeFinder::makeFunctionAlternatives( const Alternative &func,
+                        FunctionType *funcType, const ExplodedArgs &args, OutputIterator out ) {
+                OpenVarSet funcOpenVars;
+                AssertionSet funcNeed, funcHave;
+                TypeEnvironment funcEnv( func.env );
+                makeUnifiableVars( funcType, funcOpenVars, funcNeed );
+                // add all type variables as open variables now so that those not used in the parameter
+                // list are still considered open.
+                funcEnv.add( funcType->get_forall() );
                 if ( targetType && ! targetType->isVoid() && ! funcType->get_returnVals().empty() ) {
                         // attempt to narrow based on expected target type
                         Type * returnType = funcType->get_returnVals().front()->get_type();
+                        if ( ! unify( returnType, targetType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
+                                // unification failed, don't pursue this alternative
+                        if ( ! unify( returnType, targetType, funcEnv, funcNeed, funcHave, funcOpenVars,
+                                        indexer ) ) {
+                                // unification failed, don't pursue this function alternative
                                 return;
+                        }
+                }
+                if ( instantiateFunction( funcType->get_parameters(), actualAlt, funcType->get_isVarArgs(), openVars, resultEnv, resultNeed, resultHave, instantiatedActuals ) ) {
+                        ApplicationExpr *appExpr = new ApplicationExpr( func.expr->clone() );
+                        Alternative newAlt( appExpr, resultEnv, sumCost( instantiatedActuals ) );
+                        makeExprList( instantiatedActuals, appExpr->get_args() );
+                        PRINT(
+                                std::cerr << "instantiate function success: " << appExpr << std::endl;
+                                std::cerr << "need assertions:" << std::endl;
+                                printAssertionSet( resultNeed, std::cerr, 8 );
+                        )
+                        inferParameters( resultNeed, resultHave, newAlt, openVars, out );
+                // iteratively build matches, one parameter at a time
+                std::vector<ArgPack> results;
+                results.push_back( ArgPack{ funcEnv, funcNeed, funcHave, funcOpenVars } );
+                std::size_t genStart = 0;
+                for ( DeclarationWithType* formal : funcType->get_parameters() ) {
+                        ObjectDecl* obj = strict_dynamic_cast< ObjectDecl* >( formal );
+                        if ( ! instantiateArgument(
+                                        obj->get_type(), obj->get_init(), args, results, genStart, indexer ) )
+                                return;
+                }
+                if ( funcType->get_isVarArgs() ) {
+                        // append any unused arguments to vararg pack
+                        std::size_t genEnd;
+                        do {
+                                genEnd = results.size();
+                                // iterate results
+                                for ( std::size_t i = genStart; i < genEnd; ++i ) {
+                                        auto nextArg = results[i].nextArg;
+                                        // use remainder of exploded tuple if present
+                                        if ( results[i].hasExpl() ) {
+                                                const ExplodedActual& expl = results[i].getExpl( args );
+                                                unsigned nextExpl = results[i].nextExpl + 1;
+                                                if ( nextExpl == expl.exprs.size() ) {
+                                                        nextExpl = 0;
+                                                }
+                                                results.emplace_back(
+                                                        i, expl.exprs[results[i].nextExpl].get(), copy(results[i].env),
+                                                        copy(results[i].need), copy(results[i].have),
+                                                        copy(results[i].openVars), nextArg, 0, Cost::zero, nextExpl,
+                                                        results[i].explAlt );
+                                                continue;
+                                        }
+                                        // finish result when out of arguments
+                                        if ( nextArg >= args.size() ) {
+                                                validateFunctionAlternative( func, results[i], results, out );
+                                                continue;
+                                        }
+                                        // add each possible next argument
+                                        for ( std::size_t j = 0; j < args[nextArg].size(); ++j ) {
+                                                const ExplodedActual& expl = args[nextArg][j];
+                                                // fresh copies of parent parameters for this iteration
+                                                TypeEnvironment env = results[i].env;
+                                                OpenVarSet openVars = results[i].openVars;
+                                                env.addActual( expl.env, openVars );
+                                                // skip empty tuple arguments by (near-)cloning parent into next gen
+                                                if ( expl.exprs.empty() ) {
+                                                        results.emplace_back(
+                                                                results[i], move(env), copy(results[i].need),
+                                                                copy(results[i].have), move(openVars), nextArg + 1, expl.cost );
+                                                        continue;
+                                                }
+                                                // add new result
+                                                results.emplace_back(
+                                                        i, expl.exprs.front().get(), move(env), copy(results[i].need),
+                                                        copy(results[i].have), move(openVars), nextArg + 1, 0,
+                                                        expl.cost, expl.exprs.size() == 1 ? 0 : 1, j );
+                                        }
+                                }
+                                genStart = genEnd;
+                        } while ( genEnd != results.size() );
+                } else {
+                        // filter out results that don't use all the arguments
+                        for ( std::size_t i = genStart; i < results.size(); ++i ) {
+                                ArgPack& result = results[i];
+                                if ( ! result.hasExpl() && result.nextArg >= args.size() ) {
+                                        validateFunctionAlternative( func, result, results, out );
+                                }
+                        }
+                }
+        }
 …
                 if ( funcFinder.alternatives.empty() ) return;
+                std::list< AlternativeFinder > argAlternatives;
+                findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(), back_inserter( argAlternatives ) );
+                std::list< AltList > possibilities;
+                combos( argAlternatives.begin(), argAlternatives.end(), back_inserter( possibilities ) );
+                std::vector< AlternativeFinder > argAlternatives;
+                findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(),
+                        back_inserter( argAlternatives ) );
                 // take care of possible tuple assignments
                 // if not tuple assignment, assignment is taken care of as a normal function call
                 Tuples::handleTupleAssignment( *this, untypedExpr, possibilities );
+                Tuples::handleTupleAssignment( *this, untypedExpr, argAlternatives );
                 // find function operators
 …
                         printAlts( funcOpFinder.alternatives, std::cerr, 1 );
+                )
+                // pre-explode arguments
+                ExplodedArgs argExpansions;
+                argExpansions.reserve( argAlternatives.size() );
+                for ( const AlternativeFinder& arg : argAlternatives ) {
+                        argExpansions.emplace_back();
+                        auto& argE = argExpansions.back();
+                        argE.reserve( arg.alternatives.size() );
+                        for ( const Alternative& actual : arg ) {
+                                argE.emplace_back( actual, indexer );
+                        }
+                }
                 AltList candidates;
 …
                                                 Alternative newFunc( *func );
                                                 referenceToRvalueConversion( newFunc.expr );
+                                                for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
+                                                        // XXX
+                                                        //Designators::check_alternative( function, *actualAlt );
+                                                        makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
+                                                }
+                                                makeFunctionAlternatives( newFunc, function, argExpansions,
+                                                        std::back_inserter( candidates ) );
+                                        }
                                 } else if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( func->expr->get_result()->stripReferences() ) ) { // handle ftype (e.g. *? on function pointer)
 …
                                                         Alternative newFunc( *func );
                                                         referenceToRvalueConversion( newFunc.expr );
+                                                        for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
+                                                                makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
+                                                        } // for
+                                                        makeFunctionAlternatives( newFunc, function, argExpansions,
+                                                                std::back_inserter( candidates ) );
                                                 } // if
                                         } // if
+                                }
+                                // try each function operator ?() with the current function alternative and each of the argument combinations
+                                for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin(); funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
+                                        // check if the type is pointer to function
+                                        if ( PointerType *pointer = dynamic_cast< PointerType* >( funcOp->expr->get_result()->stripReferences() ) ) {
+                                                if ( FunctionType *function = dynamic_cast< FunctionType* >( pointer->get_base() ) ) {
+                        } catch ( SemanticError &e ) {
+                                errors.append( e );
+                        }
+                } // for
+                // try each function operator ?() with each function alternative
+                if ( ! funcOpFinder.alternatives.empty() ) {
+                        // add exploded function alternatives to front of argument list
+                        std::vector<ExplodedActual> funcE;
+                        funcE.reserve( funcFinder.alternatives.size() );
+                        for ( const Alternative& actual : funcFinder ) {
+                                funcE.emplace_back( actual, indexer );
+                        }
+                        argExpansions.insert( argExpansions.begin(), move(funcE) );
+                        for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin();
+                                        funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
+                                try {
+                                        // check if type is a pointer to function
+                                        if ( PointerType* pointer = dynamic_cast<PointerType*>(
+                                                        funcOp->expr->get_result()->stripReferences() ) ) {
+                                                if ( FunctionType* function =
+                                                                dynamic_cast<FunctionType*>( pointer->get_base() ) ) {
                                                         Alternative newFunc( *funcOp );
                                                         referenceToRvalueConversion( newFunc.expr );
+                                                        for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
+                                                                AltList currentAlt;
+                                                                currentAlt.push_back( *func );
+                                                                currentAlt.insert( currentAlt.end(), actualAlt->begin(), actualAlt->end() );
+                                                                makeFunctionAlternatives( newFunc, function, currentAlt, std::back_inserter( candidates ) );
+                                                        } // for
+                                                } // if
+                                        } // if
+                                } // for
+                        } catch ( SemanticError &e ) {
+                                errors.append( e );
+                        }
+                } // for
+                                                        makeFunctionAlternatives( newFunc, function, argExpansions,
+                                                                std::back_inserter( candidates ) );
+                                                }
+                                        }
+                                } catch ( SemanticError &e ) {
+                                        errors.append( e );
+                                }
+                        }
+                }
                 // Implement SFINAE; resolution errors are only errors if there aren't any non-erroneous resolutions
 …
                 // compute conversionsion costs
                 for ( AltList::iterator withFunc = candidates.begin(); withFunc != candidates.end(); ++withFunc ) {
                         Cost cvtCost = computeApplicationConversionCost( *withFunc, indexer );
+                for ( Alternative& withFunc : candidates ) {
+                        Cost cvtCost = computeApplicationConversionCost( withFunc, indexer );
                         PRINT(
                                 ApplicationExpr *appExpr = strict_dynamic_cast< ApplicationExpr* >( withFunc->expr );
+                                ApplicationExpr *appExpr = strict_dynamic_cast< ApplicationExpr* >( withFunc.expr );
                                 PointerType *pointer = strict_dynamic_cast< PointerType* >( appExpr->get_function()->get_result() );
                                 FunctionType *function = strict_dynamic_cast< FunctionType* >( pointer->get_base() );
 …
                                 printAll( appExpr->get_args(), std::cerr, 8 );
                                 std::cerr << "bindings are:" << std::endl;
                                 withFunc->env.print( std::cerr, 8 );
+                                withFunc.env.print( std::cerr, 8 );
                                 std::cerr << "cost of conversion is:" << cvtCost << std::endl;
+                        )
                         if ( cvtCost != Cost::infinity ) {
                                 withFunc->cvtCost = cvtCost;
                                 alternatives.push_back( *withFunc );
+                                withFunc.cvtCost = cvtCost;
+                                alternatives.push_back( withFunc );
                         } // if
                 } // for
+                candidates.clear();
+                candidates.splice( candidates.end(), alternatives );
+                findMinCost( candidates.begin(), candidates.end(), std::back_inserter( alternatives ) );
+                // function may return struct or union value, in which case we need to add alternatives for implicit
+                // conversions to each of the anonymous members, must happen after findMinCost since anon conversions
+                // are never the cheapest expression
+                for ( const Alternative & alt : alternatives ) {
+                candidates = move(alternatives);
+                // use a new list so that alternatives are not examined by addAnonConversions twice.
+                AltList winners;
+                findMinCost( candidates.begin(), candidates.end(), std::back_inserter( winners ) );
+                // function may return struct or union value, in which case we need to add alternatives
+                // for implicitconversions to each of the anonymous members, must happen after findMinCost
+                // since anon conversions are never the cheapest expression
+                for ( const Alternative & alt : winners ) {
                         addAnonConversions( alt );
+                }
+                spliceBegin( alternatives, winners );
                 if ( alternatives.empty() && targetType && ! targetType->isVoid() ) {
 …
                 AlternativeFinder finder( indexer, env );
                 finder.find( addressExpr->get_arg() );
+                for ( std::list< Alternative >::iterator i = finder.alternatives.begin(); i != finder.alternatives.end(); ++i ) {
+                        if ( isLvalue( i->expr ) ) {
+                                alternatives.push_back( Alternative( new AddressExpr( i->expr->clone() ), i->env, i->cost ) );
+                for ( Alternative& alt : finder.alternatives ) {
+                        if ( isLvalue( alt.expr ) ) {
+                                alternatives.push_back(
+                                        Alternative{ new AddressExpr( alt.expr->clone() ), alt.env, alt.cost } );
                         } // if
                 } // for
 …
         void AlternativeFinder::visit( LabelAddressExpr * expr ) {
                 alternatives.push_back( Alternative( expr->clone(), env, Cost::zero) );
+                alternatives.push_back( Alternative{ expr->clone(), env, Cost::zero } );
+        }
 …
                 AltList candidates;
                 for ( std::list< Alternative >::iterator i = finder.alternatives.begin(); i != finder.alternatives.end(); ++i ) {
+                for ( Alternative & alt : finder.alternatives ) {
                         AssertionSet needAssertions, haveAssertions;
                         OpenVarSet openVars;
 …
                         // that are cast directly.  The candidate is invalid if it has fewer results than there are types to cast
                         // to.
                         int discardedValues = i->expr->get_result()->size() - castExpr->get_result()->size();
+                        int discardedValues = alt.expr->get_result()->size() - castExpr->get_result()->size();
                         if ( discardedValues < 0 ) continue;
                         // xxx - may need to go into tuple types and extract relevant types and use unifyList. Note that currently, this does not
                         // allow casting a tuple to an atomic type (e.g. (int)([1, 2, 3]))
                         // unification run for side-effects
+                        unify( castExpr->get_result(), i->expr->get_result(), i->env, needAssertions, haveAssertions, openVars, indexer );
+                        Cost thisCost = castCost( i->expr->get_result(), castExpr->get_result(), indexer, i->env );
+                        unify( castExpr->get_result(), alt.expr->get_result(), alt.env, needAssertions,
+                                haveAssertions, openVars, indexer );
+                        Cost thisCost = castCost( alt.expr->get_result(), castExpr->get_result(), indexer,
+                                alt.env );
+                        PRINT(
+                                std::cerr << "working on cast with result: " << castExpr->result << std::endl;
+                                std::cerr << "and expr type: " << alt.expr->result << std::endl;
+                                std::cerr << "env: " << alt.env << std::endl;
+                        )
                         if ( thisCost != Cost::infinity ) {
+                                PRINT(
+                                        std::cerr << "has finite cost." << std::endl;
+                                )
                                 // count one safe conversion for each value that is thrown away
                                 thisCost.incSafe( discardedValues );
+                                Alternative newAlt( restructureCast( i->expr->clone(), toType ), i->env, i->cost, thisCost );
+                                inferParameters( needAssertions, haveAssertions, newAlt, openVars, back_inserter( candidates ) );
+                                Alternative newAlt( restructureCast( alt.expr->clone(), toType ), alt.env,
+                                        alt.cost, thisCost );
+                                inferParameters( needAssertions, haveAssertions, newAlt, openVars,
+                                        back_inserter( candidates ) );
                         } // if
                 } // for
 …
         void AlternativeFinder::visit( UntypedTupleExpr *tupleExpr ) {
+                std::list< AlternativeFinder > subExprAlternatives;
+                findSubExprs( tupleExpr->get_exprs().begin(), tupleExpr->get_exprs().end(), back_inserter( subExprAlternatives ) );
+                std::list< AltList > possibilities;
+                combos( subExprAlternatives.begin(), subExprAlternatives.end(), back_inserter( possibilities ) );
+                for ( std::list< AltList >::const_iterator i = possibilities.begin(); i != possibilities.end(); ++i ) {
+                std::vector< AlternativeFinder > subExprAlternatives;
+                findSubExprs( tupleExpr->get_exprs().begin(), tupleExpr->get_exprs().end(),
+                        back_inserter( subExprAlternatives ) );
+                std::vector< AltList > possibilities;
+                combos( subExprAlternatives.begin(), subExprAlternatives.end(),
+                        back_inserter( possibilities ) );
+                for ( const AltList& alts : possibilities ) {
                         std::list< Expression * > exprs;
                         makeExprList( *i, exprs );
+                        makeExprList( alts, exprs );
                         TypeEnvironment compositeEnv;
+                        simpleCombineEnvironments( i->begin(), i->end(), compositeEnv );
+                        alternatives.push_back( Alternative( new TupleExpr( exprs ) , compositeEnv, sumCost( *i ) ) );
+                        simpleCombineEnvironments( alts.begin(), alts.end(), compositeEnv );
+                        alternatives.push_back(
+                                Alternative{ new TupleExpr( exprs ), compositeEnv, sumCost( alts ) } );
                 } // for
+        }

src/ResolvExpr/AlternativeFinder.h

-              r9d06142
+              rc0d00b6
 #include "Alternative.h"                 // for AltList, Alternative
+#include "ExplodedActual.h"              // for ExplodedActual
 #include "ResolvExpr/Cost.h"             // for Cost, Cost::infinity
 #include "ResolvExpr/TypeEnvironment.h"  // for AssertionSet, OpenVarSet
 …
 namespace ResolvExpr {
+        struct ArgPack;
+        /// First index is which argument, second index is which alternative for that argument,
+        /// third index is which exploded element of that alternative
+        using ExplodedArgs = std::vector< std::vector< ExplodedActual > >;
         class AlternativeFinder : public Visitor {
           public:
                 AlternativeFinder( const SymTab::Indexer &indexer, const TypeEnvironment &env );
+                AlternativeFinder( const AlternativeFinder& o )
+                        : indexer(o.indexer), alternatives(o.alternatives), env(o.env),
+                          targetType(o.targetType) {}
+                AlternativeFinder( AlternativeFinder&& o )
+                        : indexer(o.indexer), alternatives(std::move(o.alternatives)), env(o.env),
+                          targetType(o.targetType) {}
+                AlternativeFinder& operator= ( const AlternativeFinder& o ) {
+                        if (&o == this) return *this;
+                        // horrific nasty hack to rebind references...
+                        alternatives.~AltList();
+                        new(this) AlternativeFinder(o);
+                        return *this;
+                }
+                AlternativeFinder& operator= ( AlternativeFinder&& o ) {
+                        if (&o == this) return *this;
+                        // horrific nasty hack to rebind references...
+                        alternatives.~AltList();
+                        new(this) AlternativeFinder(std::move(o));
+                        return *this;
+                }
                 void find( Expression *expr, bool adjust = false, bool prune = true, bool failFast = true );
                 /// Calls find with the adjust flag set; adjustment turns array and function types into equivalent pointer types
 …
                 /// Adds alternatives for offsetof expressions, given the base type and name of the member
                 template< typename StructOrUnionType > void addOffsetof( StructOrUnionType *aggInst, const std::string &name );
+                bool instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out );
+                template< typename OutputIterator >
+                void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out );
+                /// Takes a final result and checks if its assertions can be satisfied
+                template<typename OutputIterator>
+                void validateFunctionAlternative( const Alternative &func, ArgPack& result, const std::vector<ArgPack>& results, OutputIterator out );
+                /// Finds matching alternatives for a function, given a set of arguments
+                template<typename OutputIterator>
+                void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const ExplodedArgs& args, OutputIterator out );
+                /// Checks if assertion parameters match for a new alternative
                 template< typename OutputIterator >
                 void inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out );

src/ResolvExpr/PtrsAssignable.cc

-              r9d06142
+              rc0d00b6
         void PtrsAssignable::visit( __attribute((unused)) VoidType *voidType ) {
+                if ( ! dynamic_cast< FunctionType* >( dest ) ) {
+                        // T * = void * is safe for any T that is not a function type.
+                        // xxx - this should be unsafe...
+                        result = 1;
+                } // if
+                // T * = void * is disallowed - this is a change from C, where any
+                // void * can be assigned or passed to a non-void pointer without a cast.
+        }

src/ResolvExpr/RenameVars.cc

-              r9d06142
+              rc0d00b6
         RenameVars global_renamer;
         RenameVars::RenameVars() : level( 0 ) {
+        RenameVars::RenameVars() : level( 0 ), resetCount( 0 ) {
                 mapStack.push_front( std::map< std::string, std::string >() );
+        }
 …
         void RenameVars::reset() {
                 level = 0;
+                resetCount++;
+        }
 …
                         for ( Type::ForallList::iterator i = type->get_forall().begin(); i != type->get_forall().end(); ++i ) {
                                 std::ostringstream output;
                                 output << "_" << level << "_" << (*i)->get_name();
+                                output << "_" << resetCount << "_" << level << "_" << (*i)->get_name();
                                 std::string newname( output.str() );
                                 mapStack.front()[ (*i)->get_name() ] = newname;

src/ResolvExpr/RenameVars.h

r9d06142	rc0d00b6
48	48	void typeBefore( Type *type );
49	49	void typeAfter( Type *type );
50		int level;
	50	int level, resetCount;
51	51	std::list< std::map< std::string, std::string > > mapStack;
52	52	};

src/ResolvExpr/Resolver.cc

-              r9d06142
+              rc0d00b6
 #include <memory>                        // for allocator, allocator_traits<...
 #include <tuple>                         // for get
+#include <vector>
 #include "Alternative.h"                 // for Alternative, AltList
 …
                         // Find all alternatives for all arguments in canonical form
                         std::list< AlternativeFinder > argAlternatives;
+                        std::vector< AlternativeFinder > argAlternatives;
                         funcFinder.findSubExprs( clause.target.arguments.begin(), clause.target.arguments.end(), back_inserter( argAlternatives ) );
                         // List all combinations of arguments
                         std::list< AltList > possibilities;
+                        std::vector< AltList > possibilities;
                         combos( argAlternatives.begin(), argAlternatives.end(), back_inserter( possibilities ) );

src/ResolvExpr/TypeEnvironment.cc

-              r9d06142
+              rc0d00b6
+        }
+        void TypeEnvironment::addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars ) {
+                for ( const EqvClass& c : actualEnv ) {
+                        EqvClass c2 = c;
+                        c2.allowWidening = false;
+                        for ( const std::string& var : c2.vars ) {
+                                openVars[ var ] = c2.data;
+                        }
+                        env.push_back( std::move(c2) );
+                }
+        }
+        std::ostream & operator<<( std::ostream & out, const TypeEnvironment & env ) {
+                env.print( out );
+                return out;
+        }
 } // namespace ResolvExpr

src/ResolvExpr/TypeEnvironment.h

-              r9d06142
+              rc0d00b6
                 TypeEnvironment *clone() const { return new TypeEnvironment( *this ); }
+                /// Iteratively adds the environment of a new actual (with allowWidening = false),
+                /// and extracts open variables.
+                void addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars );
                 typedef std::list< EqvClass >::iterator iterator;
                 iterator begin() { return env.begin(); }
 …
                 return sub.applyFree( type );
+        }
+        std::ostream & operator<<( std::ostream & out, const TypeEnvironment & env );
 } // namespace ResolvExpr

src/ResolvExpr/module.mk

r9d06142	rc0d00b6
32	32	ResolvExpr/Occurs.cc \
33	33	ResolvExpr/TypeEnvironment.cc \
34		ResolvExpr/CurrentObject.cc
	34	ResolvExpr/CurrentObject.cc \
	35	ResolvExpr/ExplodedActual.cc

src/ResolvExpr/typeops.h

-              r9d06142
+              rc0d00b6
 #pragma once
+#include <vector>
 #include "SynTree/SynTree.h"
 #include "SynTree/Type.h"
 …
         void combos( InputIterator begin, InputIterator end, OutputIterator out ) {
                 typedef typename InputIterator::value_type SetType;
                 typedef typename std::list< typename SetType::value_type > ListType;
+                typedef typename std::vector< typename SetType::value_type > ListType;
                 if ( begin == end )     {
 …
                 begin++;
                 std::list< ListType > recursiveResult;
+                std::vector< ListType > recursiveResult;
                 combos( begin, end, back_inserter( recursiveResult ) );
+                for ( typename std::list< ListType >::const_iterator i = recursiveResult.begin(); i != recursiveResult.end(); ++i ) {
+                        for ( typename ListType::const_iterator j = current->begin(); j != current->end(); ++j ) {
+                                ListType result;
+                                std::back_insert_iterator< ListType > inserter = back_inserter( result );
+                                *inserter++ = *j;
+                                std::copy( i->begin(), i->end(), inserter );
+                                *out++ = result;
+                        } // for
+                } // for
+                for ( const auto& i : recursiveResult ) for ( const auto& j : *current ) {
+                        ListType result;
+                        std::back_insert_iterator< ListType > inserter = back_inserter( result );
+                        *inserter++ = j;
+                        std::copy( i.begin(), i.end(), inserter );
+                        *out++ = result;
+                }
+        }

src/SymTab/Autogen.cc

-              r9d06142
+              rc0d00b6
                 void previsit( FunctionDecl * functionDecl );
-                void previsit( FunctionType * ftype );
-                void previsit( PointerType * ptype );
                 void previsit( CompoundStmt * compoundStmt );
 …
                 unsigned int functionNesting = 0;     // current level of nested functions
-                InitTweak::ManagedTypes managedTypes;
                 std::vector< FuncData > data;
         };
 …
         // generate ctor/dtors/assign for typedecls, e.g., otype T = int *;
         void AutogenerateRoutines::previsit( TypeDecl * typeDecl ) {
-                visit_children = false;
                 if ( ! typeDecl->base ) return;
 …
                 TypeFuncGenerator gen( typeDecl, &refType, data, functionNesting, indexer );
                 generateFunctions( gen, declsToAddAfter );
+        }
+        void AutogenerateRoutines::previsit( FunctionType *) {
+                // ensure that we don't add assignment ops for types defined as part of the function
+                visit_children = false;
+        }
+        void AutogenerateRoutines::previsit( PointerType *) {
+                // ensure that we don't add assignment ops for types defined as part of the pointer
+                visit_children = false;
+        }
 …
+        }
+        void AutogenerateRoutines::previsit( FunctionDecl * functionDecl ) {
+                visit_children = false;
+                // record the existence of this function as appropriate
+                managedTypes.handleDWT( functionDecl );
+                maybeAccept( functionDecl->type, *visitor );
+        void AutogenerateRoutines::previsit( FunctionDecl * ) {
+                // Track whether we're currently in a function.
+                // Can ignore function type idiosyncrasies, because function type can never
+                // declare a new type.
                 functionNesting += 1;
+                maybeAccept( functionDecl->statements, *visitor );
+                functionNesting -= 1;
+                GuardAction( [this]()  { functionNesting -= 1; } );
+        }
         void AutogenerateRoutines::previsit( CompoundStmt * ) {
-                GuardScope( managedTypes );
                 GuardScope( structsDone );
+        }

src/SymTab/Autogen.h

-              r9d06142
+              rc0d00b6
         /// inserts into out a generated call expression to function fname with arguments dstParam and srcParam. Intended to be used with generated ?=?, ?{}, and ^?{} calls.
         template< typename OutputIterator >
         Statement * genCall( InitTweak::InitExpander & srcParam, Expression * dstParam, const std::string & fname, OutputIterator out, Type * type, bool addCast = false, bool forward = true );
+        Statement * genCall( InitTweak::InitExpander & srcParam, Expression * dstParam, const std::string & fname, OutputIterator out, Type * type, Type * addCast = nullptr, bool forward = true );
         /// inserts into out a generated call expression to function fname with arguments dstParam and srcParam. Should only be called with non-array types.
         /// optionally returns a statement which must be inserted prior to the containing loop, if there is one
         template< typename OutputIterator >
         Statement * genScalarCall( InitTweak::InitExpander & srcParam, Expression * dstParam, std::string fname, OutputIterator out, Type * type, bool addCast = false ) {
+        Statement * genScalarCall( InitTweak::InitExpander & srcParam, Expression * dstParam, std::string fname, OutputIterator out, Type * type, Type * addCast = nullptr ) {
                 bool isReferenceCtorDtor = false;
                 if ( dynamic_cast< ReferenceType * >( type ) && CodeGen::isCtorDtor( fname ) ) {
 …
                         fname = "?=?";
                         dstParam = new AddressExpr( dstParam );
                         addCast = false;
+                        addCast = nullptr;
                         isReferenceCtorDtor = true;
+                }
 …
                         // remove lvalue as a qualifier, this can change to
                         //   type->get_qualifiers() = Type::Qualifiers();
+                        assert( type );
+                        Type * castType = type->clone();
+                        Type * castType = addCast->clone();
                         castType->get_qualifiers() -= Type::Qualifiers( Type::Lvalue | Type::Const | Type::Volatile | Type::Restrict | Type::Atomic );
                         // castType->set_lvalue( true ); // xxx - might not need this
 …
         /// If forward is true, loop goes from 0 to N-1, else N-1 to 0
         template< typename OutputIterator >
         void genArrayCall( InitTweak::InitExpander & srcParam, Expression *dstParam, const std::string & fname, OutputIterator out, ArrayType *array, bool addCast = false, bool forward = true ) {
+        void genArrayCall( InitTweak::InitExpander & srcParam, Expression *dstParam, const std::string & fname, OutputIterator out, ArrayType *array, Type * addCast = nullptr, bool forward = true ) {
                 static UniqueName indexName( "_index" );
                 // for a flexible array member nothing is done -- user must define own assignment
+                if ( ! array->get_dimension() ) return ;
+                if ( ! array->get_dimension() ) return;
+                if ( addCast ) {
+                        // peel off array layer from cast
+                        ArrayType * at = strict_dynamic_cast< ArrayType * >( addCast );
+                        addCast = at->base;
+                }
                 Expression * begin, * end, * update, * cmp;
 …
         template< typename OutputIterator >
         Statement * genCall( InitTweak::InitExpander & srcParam, Expression * dstParam, const std::string & fname, OutputIterator out, Type * type, bool addCast, bool forward ) {
+        Statement * genCall( InitTweak::InitExpander & srcParam, Expression * dstParam, const std::string & fname, OutputIterator out, Type * type, Type * addCast, bool forward ) {
                 if ( ArrayType * at = dynamic_cast< ArrayType * >( type ) ) {
                         genArrayCall( srcParam, dstParam, fname, out, at, addCast, forward );
 …
                 if ( isUnnamedBitfield( obj ) ) return;
+                bool addCast = (fname == "?{}" || fname == "^?{}") && ( !obj || ( obj && ! obj->get_bitfieldWidth() ) );
+                Type * addCast = nullptr;
+                if ( (fname == "?{}" || fname == "^?{}") && ( !obj || ( obj && ! obj->get_bitfieldWidth() ) ) ) {
+                        assert( dstParam->result );
+                        addCast = dstParam->result;
+                }
                 std::list< Statement * > stmts;
                 genCall( srcParam, dstParam, fname, back_inserter( stmts ), obj->type, addCast, forward );

src/SymTab/Indexer.cc

-              r9d06142
+              rc0d00b6
+        }
+        void Indexer::addIds( const std::list< DeclarationWithType * > & decls ) {
+                for ( auto d : decls ) {
+                        addId( d );
+                }
+        }
+        void Indexer::addTypes( const std::list< TypeDecl * > & tds ) {
+                for ( auto td : tds ) {
+                        addType( td );
+                        addIds( td->assertions );
+                }
+        }
+        void Indexer::addFunctionType( FunctionType * ftype ) {
+                addTypes( ftype->forall );
+                addIds( ftype->returnVals );
+                addIds( ftype->parameters );
+        }
         void Indexer::enterScope() {
                 ++scope;

src/SymTab/Indexer.h

-              r9d06142
+              rc0d00b6
                 void addTrait( TraitDecl *decl );
+                /// convenience function for adding a list of Ids to the indexer
+                void addIds( const std::list< DeclarationWithType * > & decls );
+                /// convenience function for adding a list of forall parameters to the indexer
+                void addTypes( const std::list< TypeDecl * > & tds );
+                /// convenience function for adding all of the declarations in a function type to the indexer
+                void addFunctionType( FunctionType * ftype );
                 bool doDebug = false; ///< Display debugging trace?
           private:

src/SymTab/Validate.cc

-              r9d06142
+              rc0d00b6
         /// Associates forward declarations of aggregates with their definitions
         struct LinkReferenceToTypes final : public WithIndexer {
+        struct LinkReferenceToTypes final : public WithIndexer, public WithGuards {
                 LinkReferenceToTypes( const Indexer *indexer );
                 void postvisit( TypeInstType *typeInst );
 …
                 void postvisit( UnionDecl *unionDecl );
                 void postvisit( TraitDecl * traitDecl );
+                void previsit( StructDecl *structDecl );
+                void previsit( UnionDecl *unionDecl );
+                void renameGenericParams( std::list< TypeDecl * > & params );
           private:
 …
                 ForwardStructsType forwardStructs;
                 ForwardUnionsType forwardUnions;
+                /// true if currently in a generic type body, so that type parameter instances can be renamed appropriately
+                bool inGeneric = false;
         };
 …
+        }
+        void checkGenericParameters( ReferenceToType * inst ) {
+                for ( Expression * param : inst->parameters ) {
+                        if ( ! dynamic_cast< TypeExpr * >( param ) ) {
+                                throw SemanticError( "Expression parameters for generic types are currently unsupported: ", inst );
+                        }
+                }
+        }
         void LinkReferenceToTypes::postvisit( StructInstType *structInst ) {
                 StructDecl *st = local_indexer->lookupStruct( structInst->get_name() );
 …
                         forwardStructs[ structInst->get_name() ].push_back( structInst );
                 } // if
+                checkGenericParameters( structInst );
+        }
 …
                         forwardUnions[ unionInst->get_name() ].push_back( unionInst );
                 } // if
+                checkGenericParameters( unionInst );
+        }
 …
                 // need to carry over the 'sized' status of each decl in the instance
                 for ( auto p : group_iterate( traitDecl->get_parameters(), traitInst->get_parameters() ) ) {
+                        TypeExpr * expr = strict_dynamic_cast< TypeExpr * >( std::get<1>(p) );
+                        TypeExpr * expr = dynamic_cast< TypeExpr * >( std::get<1>(p) );
+                        if ( ! expr ) {
+                                throw SemanticError( "Expression parameters for trait instances are currently unsupported: ", std::get<1>(p) );
+                        }
                         if ( TypeInstType * inst = dynamic_cast< TypeInstType * >( expr->get_type() ) ) {
                                 TypeDecl * formalDecl = std::get<0>(p);
 …
                         } // if
                 } // if
+        }
+        void LinkReferenceToTypes::renameGenericParams( std::list< TypeDecl * > & params ) {
+                // rename generic type parameters uniquely so that they do not conflict with user-defined function forall parameters, e.g.
+                //   forall(otype T)
+                //   struct Box {
+                //     T x;
+                //   };
+                //   forall(otype T)
+                //   void f(Box(T) b) {
+                //     ...
+                //   }
+                // The T in Box and the T in f are different, so internally the naming must reflect that.
+                GuardValue( inGeneric );
+                inGeneric = ! params.empty();
+                for ( TypeDecl * td : params ) {
+                        td->name = "__" + td->name + "_generic_";
+                }
+        }
+        void LinkReferenceToTypes::previsit( StructDecl * structDecl ) {
+                renameGenericParams( structDecl->parameters );
+        }
+        void LinkReferenceToTypes::previsit( UnionDecl * unionDecl ) {
+                renameGenericParams( unionDecl->parameters );
+        }
 …
         void LinkReferenceToTypes::postvisit( TypeInstType *typeInst ) {
+                // ensure generic parameter instances are renamed like the base type
+                if ( inGeneric && typeInst->baseType ) typeInst->name = typeInst->baseType->name;
                 if ( NamedTypeDecl *namedTypeDecl = local_indexer->lookupType( typeInst->get_name() ) ) {
                         if ( TypeDecl *typeDecl = dynamic_cast< TypeDecl * >( namedTypeDecl ) ) {

src/SynTree/Expression.cc

-              r9d06142
+              rc0d00b6
         Type * type = var->get_type()->clone();
         type->set_lvalue( true );
+        // xxx - doesn't quite work yet - get different alternatives with the same cost
+        // // enumerators are not lvalues
+        // if ( EnumInstType * inst = dynamic_cast< EnumInstType * >( var->get_type() ) ) {
+        //      assert( inst->baseEnum );
+        //      EnumDecl * decl = inst->baseEnum;
+        //      for ( Declaration * member : decl->members ) {
+        //              if ( member == _var ) {
+        //                      type->set_lvalue( false );
+        //              }
+        //      }
+        // }
         set_result( type );
+}
 …
                         return makeSub( refType->get_base() );
                 } else if ( StructInstType * aggInst = dynamic_cast< StructInstType * >( t ) ) {
                         return TypeSubstitution( aggInst->get_baseParameters()->begin(), aggInst->get_baseParameters()->end(), aggInst->get_parameters().begin() );
+                        return TypeSubstitution( aggInst->get_baseParameters()->begin(), aggInst->get_baseParameters()->end(), aggInst->parameters.begin() );
                 } else if ( UnionInstType * aggInst = dynamic_cast< UnionInstType * >( t ) ) {
                         return TypeSubstitution( aggInst->get_baseParameters()->begin(), aggInst->get_baseParameters()->end(), aggInst->get_parameters().begin() );
+                        return TypeSubstitution( aggInst->get_baseParameters()->begin(), aggInst->get_baseParameters()->end(), aggInst->parameters.begin() );
                 } else {
                         assertf( false, "makeSub expects struct or union type for aggregate, but got: %s", toString( t ).c_str() );

src/Tuples/Explode.h

-              r9d06142
+              rc0d00b6
 #pragma once
+#include <iterator>                  // for back_inserter, back_insert_iterator
+#include <iterator>                     // for back_inserter, back_insert_iterator
+#include <utility>                      // for forward
+#include "ResolvExpr/Alternative.h"  // for Alternative, AltList
+#include "SynTree/Expression.h"      // for Expression, UniqueExpr, AddressExpr
+#include "SynTree/Type.h"            // for TupleType, Type
+#include "Tuples.h"                  // for maybeImpure
+#include "ResolvExpr/Alternative.h"     // for Alternative, AltList
+#include "ResolvExpr/ExplodedActual.h"  // for ExplodedActual
+#include "SynTree/Expression.h"         // for Expression, UniqueExpr, AddressExpr
+#include "SynTree/Type.h"               // for TupleType, Type
+#include "Tuples.h"                     // for maybeImpure
 namespace SymTab {
 …
+        }
+        /// Append alternative to an OutputIterator of Alternatives
+        template<typename OutputIterator>
+        void append( OutputIterator out, Expression* expr, const ResolvExpr::TypeEnvironment& env,
+                        const ResolvExpr::Cost& cost, const ResolvExpr::Cost& cvtCost ) {
+                *out++ = ResolvExpr::Alternative{ expr, env, cost, cvtCost };
+        }
+        /// Append alternative to an ExplodedActual
+        static inline void append( ResolvExpr::ExplodedActual& ea, Expression* expr,
+                        const ResolvExpr::TypeEnvironment&, const ResolvExpr::Cost&, const ResolvExpr::Cost& ) {
+                ea.exprs.emplace_back( expr );
+                /// xxx -- merge environment, cost?
+        }
         /// helper function used by explode
+        template< typename OutputIterator >
+        void explodeUnique( Expression * expr, const ResolvExpr::Alternative & alt, const SymTab::Indexer & indexer, OutputIterator out, bool isTupleAssign ) {
+        template< typename Output >
+        void explodeUnique( Expression * expr, const ResolvExpr::Alternative & alt,
+                        const SymTab::Indexer & indexer, Output&& out, bool isTupleAssign ) {
                 if ( isTupleAssign ) {
                         // tuple assignment needs CastExprs to be recursively exploded to easily get at all of the components
                         if ( CastExpr * castExpr = isReferenceCast( expr ) ) {
                                 ResolvExpr::AltList alts;
+                                explodeUnique( castExpr->get_arg(), alt, indexer, back_inserter( alts ), isTupleAssign );
+                                explodeUnique(
+                                        castExpr->get_arg(), alt, indexer, back_inserter( alts ), isTupleAssign );
                                 for ( ResolvExpr::Alternative & alt : alts ) {
                                         // distribute reference cast over all components
                                         alt.expr = distributeReference( alt.expr );
                                         *out++ = alt;
+                                        append( std::forward<Output>(out), distributeReference( alt.release_expr() ),
+                                                alt.env, alt.cost, alt.cvtCost );
+                                }
                                 // in tuple assignment, still need to handle the other cases, but only if not already handled here (don't want to output too many alternatives)
 …
                                 // can open tuple expr and dump its exploded components
                                 for ( Expression * expr : tupleExpr->get_exprs() ) {
                                         explodeUnique( expr, alt, indexer, out, isTupleAssign );
+                                        explodeUnique( expr, alt, indexer, std::forward<Output>(out), isTupleAssign );
+                                }
                         } else {
 …
                                 for ( unsigned int i = 0; i < tupleType->size(); i++ ) {
                                         TupleIndexExpr * idx = new TupleIndexExpr( arg->clone(), i );
                                         explodeUnique( idx, alt, indexer, out, isTupleAssign );
+                                        explodeUnique( idx, alt, indexer, std::forward<Output>(out), isTupleAssign );
                                         delete idx;
+                                }
 …
                 } else {
                         // atomic (non-tuple) type - output a clone of the expression in a new alternative
                         *out++ = ResolvExpr::Alternative( expr->clone(), alt.env, alt.cost, alt.cvtCost );
+                        append( std::forward<Output>(out), expr->clone(), alt.env, alt.cost, alt.cvtCost );
+                }
+        }
         /// expands a tuple-valued alternative into multiple alternatives, each with a non-tuple-type
+        template< typename OutputIterator >
+        void explode( const ResolvExpr::Alternative &alt, const SymTab::Indexer & indexer, OutputIterator out, bool isTupleAssign = false ) {
+                explodeUnique( alt.expr, alt, indexer, out, isTupleAssign );
+        template< typename Output >
+        void explode( const ResolvExpr::Alternative &alt, const SymTab::Indexer & indexer,
+                        Output&& out, bool isTupleAssign = false ) {
+                explodeUnique( alt.expr, alt, indexer, std::forward<Output>(out), isTupleAssign );
+        }
         // explode list of alternatives
+        template< typename AltIterator, typename OutputIterator >
+        void explode( AltIterator altBegin, AltIterator altEnd, const SymTab::Indexer & indexer, OutputIterator out, bool isTupleAssign = false ) {
+        template< typename AltIterator, typename Output >
+        void explode( AltIterator altBegin, AltIterator altEnd, const SymTab::Indexer & indexer,
+                        Output&& out, bool isTupleAssign = false ) {
                 for ( ; altBegin != altEnd; ++altBegin ) {
                         explode( *altBegin, indexer, out, isTupleAssign );
+                        explode( *altBegin, indexer, std::forward<Output>(out), isTupleAssign );
+                }
+        }
+        template< typename OutputIterator >
+        void explode( const ResolvExpr::AltList & alts, const SymTab::Indexer & indexer, OutputIterator out, bool isTupleAssign = false ) {
+                explode( alts.begin(), alts.end(), indexer, out, isTupleAssign );
+        template< typename Output >
+        void explode( const ResolvExpr::AltList & alts, const SymTab::Indexer & indexer, Output&& out,
+                        bool isTupleAssign = false ) {
+                explode( alts.begin(), alts.end(), indexer, std::forward<Output>(out), isTupleAssign );
+        }
 } // namespace Tuples

src/Tuples/TupleAssignment.cc

-              r9d06142
+              rc0d00b6
 #include <memory>                          // for unique_ptr, allocator_trai...
 #include <string>                          // for string
+#include <vector>
 #include "CodeGen/OperatorTable.h"
 …
 #include "ResolvExpr/Resolver.h"           // for resolveCtorInit
 #include "ResolvExpr/TypeEnvironment.h"    // for TypeEnvironment
+#include "ResolvExpr/typeops.h"            // for combos
 #include "SynTree/Declaration.h"           // for ObjectDecl
 #include "SynTree/Expression.h"            // for Expression, CastExpr, Name...
 …
                 // dispatcher for Tuple (multiple and mass) assignment operations
                 TupleAssignSpotter( ResolvExpr::AlternativeFinder & );
                 void spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities );
+                void spot( UntypedExpr * expr, std::vector<ResolvExpr::AlternativeFinder> &args );
           private:
 …
                 struct Matcher {
                   public:
+                        Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
+                        Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs, const
+                                ResolvExpr::AltList& rhs );
                         virtual ~Matcher() {}
                         virtual void match( std::list< Expression * > &out ) = 0;
 …
                 struct MassAssignMatcher : public Matcher {
                   public:
+                        MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
+                        MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs,
+                                const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
                         virtual void match( std::list< Expression * > &out );
                 };
 …
                 struct MultipleAssignMatcher : public Matcher {
                   public:
+                        MultipleAssignMatcher( TupleAssignSpotter &spot, const ResolvExpr::AltList & alts );
+                        MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs,
+                                const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
                         virtual void match( std::list< Expression * > &out );
                 };
 …
+        }
+        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
+        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr,
+                                std::vector<ResolvExpr::AlternativeFinder> &args ) {
                 TupleAssignSpotter spotter( currentFinder );
                 spotter.spot( expr, possibilities );
+                spotter.spot( expr, args );
+        }
 …
                 : currentFinder(f) {}
+        void TupleAssignSpotter::spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
+        void TupleAssignSpotter::spot( UntypedExpr * expr,
+                        std::vector<ResolvExpr::AlternativeFinder> &args ) {
                 if (  NameExpr *op = dynamic_cast< NameExpr * >(expr->get_function()) ) {
                         if ( CodeGen::isCtorDtorAssign( op->get_name() ) ) {
+                               fname = op->get_name();
+                                PRINT( std::cerr << "TupleAssignment: " << fname << std::endl; )
+                                for ( std::list<ResolvExpr::AltList>::const_iterator ali = possibilities.begin(); ali != possibilities.end(); ++ali ) {
+                                        if ( ali->size() == 0 ) continue; // AlternativeFinder will natrually handle this case, if it's legal
+                                        if ( ali->size() <= 1 && CodeGen::isAssignment( op->get_name() ) ) {
+                                                // what does it mean if an assignment takes 1 argument? maybe someone defined such a function, in which case AlternativeFinder will naturally handle it
+                                                continue;
+                                fname = op->get_name();
+                                // AlternativeFinder will naturally handle this case case, if it's legal
+                                if ( args.size() == 0 ) return;
+                                // if an assignment only takes 1 argument, that's odd, but maybe someone wrote
+                                // the function, in which case AlternativeFinder will handle it normally
+                                if ( args.size() == 1 && CodeGen::isAssignment( fname ) ) return;
+                                // look over all possible left-hand-sides
+                                for ( ResolvExpr::Alternative& lhsAlt : args[0] ) {
+                                        // skip non-tuple LHS
+                                        if ( ! refToTuple(lhsAlt.expr) ) continue;
+                                        // explode is aware of casts - ensure every LHS expression is sent into explode
+                                        // with a reference cast
+                                        // xxx - this seems to change the alternatives before the normal
+                                        //  AlternativeFinder flow; maybe this is desired?
+                                        if ( ! dynamic_cast<CastExpr*>( lhsAlt.expr ) ) {
+                                                lhsAlt.expr = new CastExpr( lhsAlt.expr,
+                                                                new ReferenceType( Type::Qualifiers(),
+                                                                        lhsAlt.expr->get_result()->clone() ) );
+                                        }
+                                        assert( ! ali->empty() );
+                                        // grab args 2-N and group into a TupleExpr
+                                        const ResolvExpr::Alternative & alt1 = ali->front();
+                                        auto begin = std::next(ali->begin(), 1), end = ali->end();
+                                        PRINT( std::cerr << "alt1 is " << alt1.expr << std::endl; )
+                                        if ( refToTuple(alt1.expr) ) {
+                                                PRINT( std::cerr << "and is reference to tuple" << std::endl; )
+                                                if ( isMultAssign( begin, end ) ) {
+                                                        PRINT( std::cerr << "possible multiple assignment" << std::endl; )
+                                                        matcher.reset( new MultipleAssignMatcher( *this, *ali ) );
+                                                } else {
+                                                        // mass assignment
+                                                        PRINT( std::cerr << "possible mass assignment" << std::endl; )
+                                                        matcher.reset( new MassAssignMatcher( *this,  *ali ) );
+                                        // explode the LHS so that each field of a tuple-valued-expr is assigned
+                                        ResolvExpr::AltList lhs;
+                                        explode( lhsAlt, currentFinder.get_indexer(), back_inserter(lhs), true );
+                                        for ( ResolvExpr::Alternative& alt : lhs ) {
+                                                // each LHS value must be a reference - some come in with a cast expression,
+                                                // if not just cast to reference here
+                                                if ( ! dynamic_cast<ReferenceType*>( alt.expr->get_result() ) ) {
+                                                        alt.expr = new CastExpr( alt.expr,
+                                                                new ReferenceType( Type::Qualifiers(),
+                                                                        alt.expr->get_result()->clone() ) );
+                                                }
+                                        }
+                                        if ( args.size() == 1 ) {
+                                                // mass default-initialization/destruction
+                                                ResolvExpr::AltList rhs{};
+                                                matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
                                                 match();
+                                        } else if ( args.size() > 2 ) {
+                                                // expand all possible RHS possibilities
+                                                // TODO build iterative version of this instead of using combos
+                                                std::vector< ResolvExpr::AltList > rhsAlts;
+                                                combos( std::next(args.begin(), 1), args.end(),
+                                                        std::back_inserter( rhsAlts ) );
+                                                for ( const ResolvExpr::AltList& rhsAlt : rhsAlts ) {
+                                                        // multiple assignment
+                                                        ResolvExpr::AltList rhs;
+                                                        explode( rhsAlt, currentFinder.get_indexer(),
+                                                                std::back_inserter(rhs), true );
+                                                        matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
+                                                        match();
+                                                }
+                                        } else {
+                                                for ( const ResolvExpr::Alternative& rhsAlt : args[1] ) {
+                                                        ResolvExpr::AltList rhs;
+                                                        if ( isTuple(rhsAlt.expr) ) {
+                                                                // multiple assignment
+                                                                explode( rhsAlt, currentFinder.get_indexer(),
+                                                                        std::back_inserter(rhs), true );
+                                                                matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
+                                                        } else {
+                                                                // mass assignment
+                                                                rhs.push_back( rhsAlt );
+                                                                matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
+                                                        }
+                                                        match();
+                                                }
+                                        }
+                                }
 …
                 ResolvExpr::AltList current;
                 // now resolve new assignments
+                for ( std::list< Expression * >::iterator i = new_assigns.begin(); i != new_assigns.end(); ++i ) {
+                for ( std::list< Expression * >::iterator i = new_assigns.begin();
+                                i != new_assigns.end(); ++i ) {
                         PRINT(
                                 std::cerr << "== resolving tuple assign ==" << std::endl;
 …
+                        )
+                        ResolvExpr::AlternativeFinder finder( currentFinder.get_indexer(), currentFinder.get_environ() );
+                        ResolvExpr::AlternativeFinder finder{ currentFinder.get_indexer(),
+                                currentFinder.get_environ() };
                         try {
                                 finder.findWithAdjustment(*i);
 …
                 // combine assignment environments into combined expression environment
                 simpleCombineEnvironments( current.begin(), current.end(), matcher->compositeEnv );
+                currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv, ResolvExpr::sumCost( current  ) + matcher->baseCost ) );
+        }
+        TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList &alts ) : spotter(spotter), baseCost( ResolvExpr::sumCost( alts ) ) {
+                assert( ! alts.empty() );
+                // combine argument environments into combined expression environment
+                simpleCombineEnvironments( alts.begin(), alts.end(), compositeEnv );
+                ResolvExpr::Alternative lhsAlt = alts.front();
+                // explode is aware of casts - ensure every LHS expression is sent into explode with a reference cast
+                if ( ! dynamic_cast< CastExpr * >( lhsAlt.expr ) ) {
+                        lhsAlt.expr = new CastExpr( lhsAlt.expr, new ReferenceType( Type::Qualifiers(), lhsAlt.expr->get_result()->clone() ) );
+                }
+                // explode the lhs so that each field of the tuple-valued-expr is assigned.
+                explode( lhsAlt, spotter.currentFinder.get_indexer(), back_inserter(lhs), true );
+                for ( ResolvExpr::Alternative & alt : lhs ) {
+                        // every LHS value must be a reference - some come in with a cast expression, if it doesn't just cast to reference here.
+                        if ( ! dynamic_cast< ReferenceType * >( alt.expr->get_result() ) ) {
+                                alt.expr = new CastExpr( alt.expr, new ReferenceType( Type::Qualifiers(), alt.expr->get_result()->clone() ) );
+                        }
+                }
+        }
+        TupleAssignSpotter::MassAssignMatcher::MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
+                assert( alts.size() == 1 || alts.size() == 2 );
+                if ( alts.size() == 2 ) {
+                        rhs.push_back( alts.back() );
+                }
+        }
+        TupleAssignSpotter::MultipleAssignMatcher::MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
+                // explode the rhs so that each field of the tuple-valued-expr is assigned.
+                explode( std::next(alts.begin(), 1), alts.end(), spotter.currentFinder.get_indexer(), back_inserter(rhs), true );
+                // xxx -- was push_front
+                currentFinder.get_alternatives().push_back( ResolvExpr::Alternative(
+                        new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv,
+                        ResolvExpr::sumCost( current ) + matcher->baseCost ) );
+        }
+        TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter,
+                const ResolvExpr::AltList &lhs, const ResolvExpr::AltList &rhs )
+        : lhs(lhs), rhs(rhs), spotter(spotter),
+          baseCost( ResolvExpr::sumCost( lhs ) + ResolvExpr::sumCost( rhs ) ) {
+                simpleCombineEnvironments( lhs.begin(), lhs.end(), compositeEnv );
+                simpleCombineEnvironments( rhs.begin(), rhs.end(), compositeEnv );
+        }

src/Tuples/Tuples.h

-              r9d06142
+              rc0d00b6
 #include <string>
+#include <vector>
 #include "SynTree/Expression.h"
 …
 namespace Tuples {
         // TupleAssignment.cc
+        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign, const std::list<ResolvExpr::AltList> & possibilities );
+        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign,
+                std::vector< ResolvExpr::AlternativeFinder >& args );
         // TupleExpansion.cc
         /// expands z.[a, b.[x, y], c] into [z.a, z.b.x, z.b.y, z.c], inserting UniqueExprs as appropriate

src/benchmark/Makefile.am

-              r9d06142
+              rc0d00b6
 STATS    = ${TOOLSDIR}stat.py
 repeats  = 30
+TIME_FORMAT = "%E"
+PRINT_FORMAT = %20s: #Comments needed for spacing
 .NOTPARALLEL:
 …
 all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
-bench$(EXEEXT) :
-        @for ccflags in "-debug" "-nodebug"; do \
-                echo ${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -lrt bench.c;\
-                ${CC} ${AM_CFLAGS} ${CFLAGS} $${ccflags} -lrt bench.c;\
-                ./a.out ; \
-        done ; \
-        rm -f ./a.out ;
-csv-data$(EXEEXT):
-        @${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -nodebug -lrt -quiet -DN=50000000 csv-data.c
-        @./a.out
-        @rm -f ./a.out
-## =========================================================================================================
-ctxswitch$(EXEEXT): \
-        ctxswitch-pthread.run           \
-        ctxswitch-cfa_coroutine.run     \
-        ctxswitch-cfa_thread.run        \
-        ctxswitch-upp_coroutine.run     \
-        ctxswitch-upp_thread.run
-ctxswitch-cfa_coroutine$(EXEEXT):
-        ${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-cfa_thread$(EXEEXT):
-        ${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-upp_coroutine$(EXEEXT):
-        u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-upp_thread$(EXEEXT):
-        u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-pthread$(EXEEXT):
-        @BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-## =========================================================================================================
-mutex$(EXEEXT) :\
-        mutex-function.run      \
-        mutex-pthread_lock.run  \
-        mutex-upp.run           \
-        mutex-cfa1.run          \
-        mutex-cfa2.run          \
-        mutex-cfa4.run
-mutex-function$(EXEEXT):
-        @BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-pthread_lock$(EXEEXT):
-        @BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-upp$(EXEEXT):
-        u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-cfa1$(EXEEXT):
-        ${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-cfa2$(EXEEXT):
-        ${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-cfa4$(EXEEXT):
-        ${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-## =========================================================================================================
-signal$(EXEEXT) :\
-        signal-upp.run          \
-        signal-cfa1.run         \
-        signal-cfa2.run         \
-        signal-cfa4.run
-signal-upp$(EXEEXT):
-        u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal-cfa1$(EXEEXT):
-        ${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal-cfa2$(EXEEXT):
-        ${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal-cfa4$(EXEEXT):
-        ${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-## =========================================================================================================
-waitfor$(EXEEXT) :\
-        waitfor-upp.run         \
-        waitfor-cfa1.run                \
-        waitfor-cfa2.run                \
-        waitfor-cfa4.run
-waitfor-upp$(EXEEXT):
-        u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor-cfa1$(EXEEXT):
-        ${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor-cfa2$(EXEEXT):
-        ${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor-cfa4$(EXEEXT):
-        ${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-## =========================================================================================================
-creation$(EXEEXT) :\
-        creation-pthread.run            \
-        creation-cfa_coroutine.run      \
-        creation-cfa_thread.run         \
-        creation-upp_coroutine.run      \
-        creation-upp_thread.run
-creation-cfa_coroutine$(EXEEXT):
-        ${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-cfa_thread$(EXEEXT):
-        ${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-upp_coroutine$(EXEEXT):
-        u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-upp_thread$(EXEEXT):
-        u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-pthread$(EXEEXT):
-        @BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-## =========================================================================================================
 %.run : %$(EXEEXT) ${REPEAT}
 …
         @rm -f a.out .result.log
+%.runquiet :
+        @+make $(basename $@)
+        @./a.out
+        @rm -f a.out
+%.make :
+        @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
+        @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
 ${REPEAT} :
         @+make -C ${TOOLSDIR} repeat
+## =========================================================================================================
+jenkins$(EXEEXT):
+        @echo "{"
+        @echo -e '\t"githash": "'${githash}'",'
+        @echo -e '\t"arch": "'   ${arch}   '",'
+        @echo -e '\t"compile": {'
+        @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
+        @echo -e '\t\t"dummy" : {}'
+        @echo -e '\t},'
+        @echo -e '\t"ctxswitch": {'
+        @echo -en '\t\t"coroutine":'
+        @+make ctxswitch-cfa_coroutine.runquiet
+        @echo -en '\t\t,"thread":'
+        @+make ctxswitch-cfa_thread.runquiet
+        @echo -e '\t},'
+        @echo -e '\t"mutex": ['
+        @echo -en '\t\t'
+        @+make mutex-cfa1.runquiet
+        @echo -en '\t\t,'
+        @+make mutex-cfa2.runquiet
+        @echo -e '\t],'
+        @echo -e '\t"scheduling": ['
+        @echo -en '\t\t'
+        @+make signal-cfa1.runquiet
+        @echo -en '\t\t,'
+        @+make signal-cfa2.runquiet
+        @echo -en '\t\t,'
+        @+make waitfor-cfa1.runquiet
+        @echo -en '\t\t,'
+        @+make waitfor-cfa2.runquiet
+        @echo -e '\n\t],'
+        @echo -e '\t"epoch": ' $(shell date +%s)
+        @echo "}"
+## =========================================================================================================
+ctxswitch$(EXEEXT): \
+        ctxswitch-pthread.run           \
+        ctxswitch-cfa_coroutine.run     \
+        ctxswitch-cfa_thread.run        \
+        ctxswitch-upp_coroutine.run     \
+        ctxswitch-upp_thread.run
+ctxswitch-cfa_coroutine$(EXEEXT):
+        @${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-cfa_thread$(EXEEXT):
+        @${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-upp_coroutine$(EXEEXT):
+        @u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-upp_thread$(EXEEXT):
+        @u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-pthread$(EXEEXT):
+        @@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+## =========================================================================================================
+mutex$(EXEEXT) :\
+        mutex-function.run      \
+        mutex-pthread_lock.run  \
+        mutex-upp.run           \
+        mutex-cfa1.run          \
+        mutex-cfa2.run          \
+        mutex-cfa4.run
+mutex-function$(EXEEXT):
+        @@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-pthread_lock$(EXEEXT):
+        @@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-upp$(EXEEXT):
+        @u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-cfa1$(EXEEXT):
+        @${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-cfa2$(EXEEXT):
+        @${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-cfa4$(EXEEXT):
+        @${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+## =========================================================================================================
+signal$(EXEEXT) :\
+        signal-upp.run          \
+        signal-cfa1.run         \
+        signal-cfa2.run         \
+        signal-cfa4.run
+signal-upp$(EXEEXT):
+        @u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal-cfa1$(EXEEXT):
+        @${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal-cfa2$(EXEEXT):
+        @${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal-cfa4$(EXEEXT):
+        @${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+## =========================================================================================================
+waitfor$(EXEEXT) :\
+        waitfor-upp.run         \
+        waitfor-cfa1.run                \
+        waitfor-cfa2.run                \
+        waitfor-cfa4.run
+waitfor-upp$(EXEEXT):
+        @u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor-cfa1$(EXEEXT):
+        @${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor-cfa2$(EXEEXT):
+        @${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor-cfa4$(EXEEXT):
+        @${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+## =========================================================================================================
+creation$(EXEEXT) :\
+        creation-pthread.run                    \
+        creation-cfa_coroutine.run              \
+        creation-cfa_coroutine_eager.run        \
+        creation-cfa_thread.run                 \
+        creation-upp_coroutine.run              \
+        creation-upp_thread.run
+creation-cfa_coroutine$(EXEEXT):
+        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-cfa_coroutine_eager$(EXEEXT):
+        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
+creation-cfa_thread$(EXEEXT):
+        @${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-upp_coroutine$(EXEEXT):
+        @u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-upp_thread$(EXEEXT):
+        @u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-pthread$(EXEEXT):
+        @@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+## =========================================================================================================
+compile$(EXEEXT) :\
+        compile-array.make      \
+        compile-attributes.make \
+        compile-empty.make      \
+        compile-expression.make \
+        compile-io.make         \
+        compile-monitor.make    \
+        compile-operators.make  \
+        compile-typeof.make
+compile-array$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/array.c                @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-attributes$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/attributes.c   @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-empty$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w compile/empty.c         @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-expression$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/expression.c   @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-io$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/io.c                   @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-monitor$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/monitor.c              @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-operators$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/operators.c    @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-thread$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/thread.c               @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-typeof$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/typeof.c               @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}

src/benchmark/Makefile.in

-              r9d06142
+              rc0d00b6
   esac
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
 am__DIST_COMMON = $(srcdir)/Makefile.in
+am__DIST_COMMON = $(srcdir)/Makefile.in compile
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 ACLOCAL = @ACLOCAL@
 …
 STATS = ${TOOLSDIR}stat.py
 repeats = 30
+TIME_FORMAT = "%E"
+PRINT_FORMAT = %20s: #Comments needed for spacing
 all: all-am
 …
 all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
-bench$(EXEEXT) :
-        @for ccflags in "-debug" "-nodebug"; do \
-                echo ${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -lrt bench.c;\
-                ${CC} ${AM_CFLAGS} ${CFLAGS} $${ccflags} -lrt bench.c;\
-                ./a.out ; \
-        done ; \
-        rm -f ./a.out ;
-csv-data$(EXEEXT):
-        @${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -nodebug -lrt -quiet -DN=50000000 csv-data.c
-        @./a.out
-        @rm -f ./a.out
-ctxswitch$(EXEEXT): \
-        ctxswitch-pthread.run           \
-        ctxswitch-cfa_coroutine.run     \
-        ctxswitch-cfa_thread.run        \
-        ctxswitch-upp_coroutine.run     \
-        ctxswitch-upp_thread.run
-ctxswitch-cfa_coroutine$(EXEEXT):
-        ${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-cfa_thread$(EXEEXT):
-        ${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-upp_coroutine$(EXEEXT):
-        u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-upp_thread$(EXEEXT):
-        u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-ctxswitch-pthread$(EXEEXT):
-        @BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex$(EXEEXT) :\
-        mutex-function.run      \
-        mutex-pthread_lock.run  \
-        mutex-upp.run           \
-        mutex-cfa1.run          \
-        mutex-cfa2.run          \
-        mutex-cfa4.run
-mutex-function$(EXEEXT):
-        @BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-pthread_lock$(EXEEXT):
-        @BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-upp$(EXEEXT):
-        u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-cfa1$(EXEEXT):
-        ${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-cfa2$(EXEEXT):
-        ${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-mutex-cfa4$(EXEEXT):
-        ${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal$(EXEEXT) :\
-        signal-upp.run          \
-        signal-cfa1.run         \
-        signal-cfa2.run         \
-        signal-cfa4.run
-signal-upp$(EXEEXT):
-        u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal-cfa1$(EXEEXT):
-        ${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal-cfa2$(EXEEXT):
-        ${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-signal-cfa4$(EXEEXT):
-        ${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor$(EXEEXT) :\
-        waitfor-upp.run         \
-        waitfor-cfa1.run                \
-        waitfor-cfa2.run                \
-        waitfor-cfa4.run
-waitfor-upp$(EXEEXT):
-        u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor-cfa1$(EXEEXT):
-        ${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor-cfa2$(EXEEXT):
-        ${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-waitfor-cfa4$(EXEEXT):
-        ${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation$(EXEEXT) :\
-        creation-pthread.run            \
-        creation-cfa_coroutine.run      \
-        creation-cfa_thread.run         \
-        creation-upp_coroutine.run      \
-        creation-upp_thread.run
-creation-cfa_coroutine$(EXEEXT):
-        ${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-cfa_thread$(EXEEXT):
-        ${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-upp_coroutine$(EXEEXT):
-        u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-upp_thread$(EXEEXT):
-        u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
-creation-pthread$(EXEEXT):
-        @BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 %.run : %$(EXEEXT) ${REPEAT}
         @rm -f .result.log
 …
         @rm -f a.out .result.log
+%.runquiet :
+        @+make $(basename $@)
+        @./a.out
+        @rm -f a.out
+%.make :
+        @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
+        @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
 ${REPEAT} :
         @+make -C ${TOOLSDIR} repeat
+jenkins$(EXEEXT):
+        @echo "{"
+        @echo -e '\t"githash": "'${githash}'",'
+        @echo -e '\t"arch": "'   ${arch}   '",'
+        @echo -e '\t"compile": {'
+        @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
+        @echo -e '\t\t"dummy" : {}'
+        @echo -e '\t},'
+        @echo -e '\t"ctxswitch": {'
+        @echo -en '\t\t"coroutine":'
+        @+make ctxswitch-cfa_coroutine.runquiet
+        @echo -en '\t\t,"thread":'
+        @+make ctxswitch-cfa_thread.runquiet
+        @echo -e '\t},'
+        @echo -e '\t"mutex": ['
+        @echo -en '\t\t'
+        @+make mutex-cfa1.runquiet
+        @echo -en '\t\t,'
+        @+make mutex-cfa2.runquiet
+        @echo -e '\t],'
+        @echo -e '\t"scheduling": ['
+        @echo -en '\t\t'
+        @+make signal-cfa1.runquiet
+        @echo -en '\t\t,'
+        @+make signal-cfa2.runquiet
+        @echo -en '\t\t,'
+        @+make waitfor-cfa1.runquiet
+        @echo -en '\t\t,'
+        @+make waitfor-cfa2.runquiet
+        @echo -e '\n\t],'
+        @echo -e '\t"epoch": ' $(shell date +%s)
+        @echo "}"
+ctxswitch$(EXEEXT): \
+        ctxswitch-pthread.run           \
+        ctxswitch-cfa_coroutine.run     \
+        ctxswitch-cfa_thread.run        \
+        ctxswitch-upp_coroutine.run     \
+        ctxswitch-upp_thread.run
+ctxswitch-cfa_coroutine$(EXEEXT):
+        @${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-cfa_thread$(EXEEXT):
+        @${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-upp_coroutine$(EXEEXT):
+        @u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-upp_thread$(EXEEXT):
+        @u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+ctxswitch-pthread$(EXEEXT):
+        @@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex$(EXEEXT) :\
+        mutex-function.run      \
+        mutex-pthread_lock.run  \
+        mutex-upp.run           \
+        mutex-cfa1.run          \
+        mutex-cfa2.run          \
+        mutex-cfa4.run
+mutex-function$(EXEEXT):
+        @@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-pthread_lock$(EXEEXT):
+        @@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-upp$(EXEEXT):
+        @u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-cfa1$(EXEEXT):
+        @${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-cfa2$(EXEEXT):
+        @${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+mutex-cfa4$(EXEEXT):
+        @${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal$(EXEEXT) :\
+        signal-upp.run          \
+        signal-cfa1.run         \
+        signal-cfa2.run         \
+        signal-cfa4.run
+signal-upp$(EXEEXT):
+        @u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal-cfa1$(EXEEXT):
+        @${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal-cfa2$(EXEEXT):
+        @${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+signal-cfa4$(EXEEXT):
+        @${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor$(EXEEXT) :\
+        waitfor-upp.run         \
+        waitfor-cfa1.run                \
+        waitfor-cfa2.run                \
+        waitfor-cfa4.run
+waitfor-upp$(EXEEXT):
+        @u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor-cfa1$(EXEEXT):
+        @${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor-cfa2$(EXEEXT):
+        @${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+waitfor-cfa4$(EXEEXT):
+        @${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation$(EXEEXT) :\
+        creation-pthread.run                    \
+        creation-cfa_coroutine.run              \
+        creation-cfa_coroutine_eager.run        \
+        creation-cfa_thread.run                 \
+        creation-upp_coroutine.run              \
+        creation-upp_thread.run
+creation-cfa_coroutine$(EXEEXT):
+        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-cfa_coroutine_eager$(EXEEXT):
+        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
+creation-cfa_thread$(EXEEXT):
+        @${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-upp_coroutine$(EXEEXT):
+        @u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-upp_thread$(EXEEXT):
+        @u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+creation-pthread$(EXEEXT):
+        @@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile$(EXEEXT) :\
+        compile-array.make      \
+        compile-attributes.make \
+        compile-empty.make      \
+        compile-expression.make \
+        compile-io.make         \
+        compile-monitor.make    \
+        compile-operators.make  \
+        compile-typeof.make
+compile-array$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/array.c                @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-attributes$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/attributes.c   @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-empty$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w compile/empty.c         @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-expression$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/expression.c   @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-io$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/io.c                   @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-monitor$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/monitor.c              @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-operators$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/operators.c    @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-thread$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/thread.c               @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
+compile-typeof$(EXEEXT):
+        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/typeof.c               @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
 # Tell versions [3.59,3.63) of GNU make to not export all variables.

src/benchmark/creation/cfa_cor.c

-              r9d06142
+              rc0d00b6
 coroutine MyCoroutine {};
+void ?{} (MyCoroutine & this) { prime(this); }
+void ?{} (MyCoroutine & this) {
+#ifdef EAGER
+        prime(this);
+#endif
+}
 void main(MyCoroutine & this) {}

src/libcfa/Makefile.am

-              r9d06142
+              rc0d00b6
 cfa_includedir = $(CFA_INCDIR)
+nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
+nobase_cfa_include_HEADERS =    \
+        ${headers}                      \
+        ${stdhdr}                       \
+        math                            \
+        gmp                             \
+        bits/defs.h             \
+        bits/locks.h            \
+        concurrency/invoke.h    \
+        libhdr.h                        \
+        libhdr/libalign.h       \
+        libhdr/libdebug.h       \
+        libhdr/libtools.h
 CLEANFILES = libcfa-prelude.c

src/libcfa/Makefile.in

-              r9d06142
+              rc0d00b6
         containers/result containers/vector concurrency/coroutine \
         concurrency/thread concurrency/kernel concurrency/monitor \
+        ${shell echo stdhdr/*} math gmp concurrency/invoke.h
+        ${shell echo stdhdr/*} math gmp bits/defs.h bits/locks.h \
+        concurrency/invoke.h libhdr.h libhdr/libalign.h \
+        libhdr/libdebug.h libhdr/libtools.h
 HEADERS = $(nobase_cfa_include_HEADERS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
 …
 stdhdr = ${shell echo stdhdr/*}
 cfa_includedir = $(CFA_INCDIR)
+nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
+nobase_cfa_include_HEADERS = \
+        ${headers}                      \
+        ${stdhdr}                       \
+        math                            \
+        gmp                             \
+        bits/defs.h             \
+        bits/locks.h            \
+        concurrency/invoke.h    \
+        libhdr.h                        \
+        libhdr/libalign.h       \
+        libhdr/libdebug.h       \
+        libhdr/libtools.h
 CLEANFILES = libcfa-prelude.c
 all: all-am

src/libcfa/concurrency/alarm.c

-              r9d06142
+              rc0d00b6
         disable_interrupts();
         lock( &event_kernel->lock DEBUG_CTX2 );
+        lock( event_kernel->lock DEBUG_CTX2 );
+        {
                 verify( validate( alarms ) );
 …
+                }
+        }
         unlock( &event_kernel->lock );
+        unlock( event_kernel->lock );
         this->set = true;
         enable_interrupts( DEBUG_CTX );
 …
 void unregister_self( alarm_node_t * this ) {
         disable_interrupts();
         lock( &event_kernel->lock DEBUG_CTX2 );
+        lock( event_kernel->lock DEBUG_CTX2 );
+        {
                 verify( validate( &event_kernel->alarms ) );
                 remove( &event_kernel->alarms, this );
+        }
         unlock( &event_kernel->lock );
+        unlock( event_kernel->lock );
         enable_interrupts( DEBUG_CTX );
         this->set = false;

src/libcfa/concurrency/invoke.h

-              r9d06142
+              rc0d00b6
 //
 #include <stdbool.h>
 #include <stdint.h>
+#include "bits/defs.h"
+#include "bits/locks.h"
 #ifdef __CFORALL__
 …
 #define _INVOKE_H_
-        #define unlikely(x)    __builtin_expect(!!(x), 0)
-        #define thread_local _Thread_local
         typedef void (*fptr_t)();
         typedef int_fast16_t __lock_size_t;
-        struct spinlock {
-                volatile int lock;
-                #ifdef __CFA_DEBUG__
-                        const char * prev_name;
-                        void* prev_thrd;
-                #endif
-        };
         struct __thread_queue_t {
 …
                 void push( struct __condition_stack_t &, struct __condition_criterion_t * );
                 struct __condition_criterion_t * pop( struct __condition_stack_t & );
-                void  ?{}(spinlock & this);
-                void ^?{}(spinlock & this);
+        }
         #endif
 …
         struct monitor_desc {
                 // spinlock to protect internal data
                 struct spinlock lock;
+                struct __spinlock_t lock;
                 // current owner of the monitor

src/libcfa/concurrency/kernel

-              r9d06142
+              rc0d00b6
 //-----------------------------------------------------------------------------
 // Locks
 // Lock the spinlock, spin if already acquired
 void lock      ( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, spin if already acquired
+// void lock      ( spinlock * DEBUG_CTX_PARAM2 );
 // Lock the spinlock, yield repeatedly if already acquired
 void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, yield repeatedly if already acquired
+// void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
 // Lock the spinlock, return false if already acquired
 bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
+// // Lock the spinlock, return false if already acquired
+// bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
 // Unlock the spinlock
 void unlock    ( spinlock * );
+// // Unlock the spinlock
+// void unlock    ( spinlock * );
 struct semaphore {
         spinlock lock;
+        __spinlock_t lock;
         int count;
         __thread_queue_t waiting;
 …
 struct cluster {
         // Ready queue locks
         spinlock ready_queue_lock;
+        __spinlock_t ready_queue_lock;
         // Ready queue for threads
 …
         FinishOpCode action_code;
         thread_desc * thrd;
         spinlock * lock;
         spinlock ** locks;
+        __spinlock_t * lock;
+        __spinlock_t ** locks;
         unsigned short lock_count;
         thread_desc ** thrds;
 …
 #ifdef __CFA_DEBUG__
         // Last function to enable preemption on this processor
         char * last_enable;
+        const char * last_enable;
 #endif
 };

src/libcfa/concurrency/kernel.c

-              r9d06142
+              rc0d00b6
 void finishRunning(processor * this) {
         if( this->finish.action_code == Release ) {
                 unlock( this->finish.lock );
+                unlock( *this->finish.lock );
+        }
         else if( this->finish.action_code == Schedule ) {
 …
+        }
         else if( this->finish.action_code == Release_Schedule ) {
                 unlock( this->finish.lock );
+                unlock( *this->finish.lock );
                 ScheduleThread( this->finish.thrd );
+        }
         else if( this->finish.action_code == Release_Multi ) {
                 for(int i = 0; i < this->finish.lock_count; i++) {
                         unlock( this->finish.locks[i] );
+                        unlock( *this->finish.locks[i] );
+                }
+        }
         else if( this->finish.action_code == Release_Multi_Schedule ) {
                 for(int i = 0; i < this->finish.lock_count; i++) {
                         unlock( this->finish.locks[i] );
+                        unlock( *this->finish.locks[i] );
+                }
                 for(int i = 0; i < this->finish.thrd_count; i++) {
 …
         verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
         lock(   &this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
+        lock(   this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
         append( this_processor->cltr->ready_queue, thrd );
         unlock( &this_processor->cltr->ready_queue_lock );
+        unlock( this_processor->cltr->ready_queue_lock );
         verify( disable_preempt_count > 0 );
 …
 thread_desc * nextThread(cluster * this) {
         verify( disable_preempt_count > 0 );
         lock( &this->ready_queue_lock DEBUG_CTX2 );
+        lock( this->ready_queue_lock DEBUG_CTX2 );
         thread_desc * head = pop_head( this->ready_queue );
         unlock( &this->ready_queue_lock );
+        unlock( this->ready_queue_lock );
         verify( disable_preempt_count > 0 );
         return head;
 …
+}
 void BlockInternal( spinlock * lock ) {
+void BlockInternal( __spinlock_t * lock ) {
         disable_interrupts();
         this_processor->finish.action_code = Release;
 …
+}
 void BlockInternal( spinlock * lock, thread_desc * thrd ) {
+void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
         assert(thrd);
         disable_interrupts();
 …
+}
 void BlockInternal(spinlock * locks [], unsigned short count) {
+void BlockInternal(__spinlock_t * locks [], unsigned short count) {
         disable_interrupts();
         this_processor->finish.action_code = Release_Multi;
 …
+}
 void BlockInternal(spinlock * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
+void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
         disable_interrupts();
         this_processor->finish.action_code = Release_Multi_Schedule;
 …
+}
 void LeaveThread(spinlock * lock, thread_desc * thrd) {
+void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
         verify( disable_preempt_count > 0 );
         this_processor->finish.action_code = thrd ? Release_Schedule : Release;
 …
+}
 static spinlock kernel_abort_lock;
 static spinlock kernel_debug_lock;
+static __spinlock_t kernel_abort_lock;
+static __spinlock_t kernel_debug_lock;
 static bool kernel_abort_called = false;
 …
         // abort cannot be recursively entered by the same or different processors because all signal handlers return when
         // the globalAbort flag is true.
         lock( &kernel_abort_lock DEBUG_CTX2 );
+        lock( kernel_abort_lock DEBUG_CTX2 );
         // first task to abort ?
         if ( !kernel_abort_called ) {                   // not first task to abort ?
                 kernel_abort_called = true;
                 unlock( &kernel_abort_lock );
+                unlock( kernel_abort_lock );
+        }
         else {
                 unlock( &kernel_abort_lock );
+                unlock( kernel_abort_lock );
                 sigset_t mask;
 …
 extern "C" {
         void __lib_debug_acquire() {
                 lock( &kernel_debug_lock DEBUG_CTX2 );
+                lock( kernel_debug_lock DEBUG_CTX2 );
+        }
         void __lib_debug_release() {
                 unlock( &kernel_debug_lock );
+                unlock( kernel_debug_lock );
+        }
+}
 …
 //-----------------------------------------------------------------------------
 // Locks
-void ?{}( spinlock & this ) {
-        this.lock = 0;
+}
-void ^?{}( spinlock & this ) {
+}
-bool try_lock( spinlock * this DEBUG_CTX_PARAM2 ) {
-        return this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0;
+}
-void lock( spinlock * this DEBUG_CTX_PARAM2 ) {
-        for ( unsigned int i = 1;; i += 1 ) {
-                if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
+        }
-        LIB_DEBUG_DO(
-                this->prev_name = caller;
-                this->prev_thrd = this_thread;
+        )
+}
-void lock_yield( spinlock * this DEBUG_CTX_PARAM2 ) {
-        for ( unsigned int i = 1;; i += 1 ) {
-                if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
-                yield();
+        }
-        LIB_DEBUG_DO(
-                this->prev_name = caller;
-                this->prev_thrd = this_thread;
+        )
+}
-void unlock( spinlock * this ) {
-        __sync_lock_release_4( &this->lock );
+}
 void  ?{}( semaphore & this, int count = 1 ) {
         (this.lock){};
 …
 void P(semaphore & this) {
         lock( &this.lock DEBUG_CTX2 );
+        lock( this.lock DEBUG_CTX2 );
         this.count -= 1;
         if ( this.count < 0 ) {
 …
+        }
         else {
             unlock( &this.lock );
+            unlock( this.lock );
+        }
+}
 …
 void V(semaphore & this) {
         thread_desc * thrd = NULL;
         lock( &this.lock DEBUG_CTX2 );
+        lock( this.lock DEBUG_CTX2 );
         this.count += 1;
         if ( this.count <= 0 ) {
 …
+        }
         unlock( &this.lock );
+        unlock( this.lock );
         // make new owner

src/libcfa/concurrency/kernel_private.h

-              r9d06142
+              rc0d00b6
 //Block current thread and release/wake-up the following resources
 void BlockInternal(void);
 void BlockInternal(spinlock * lock);
+void BlockInternal(__spinlock_t * lock);
 void BlockInternal(thread_desc * thrd);
 void BlockInternal(spinlock * lock, thread_desc * thrd);
 void BlockInternal(spinlock * locks [], unsigned short count);
 void BlockInternal(spinlock * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
 void LeaveThread(spinlock * lock, thread_desc * thrd);
+void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
+void BlockInternal(__spinlock_t * locks [], unsigned short count);
+void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
+void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
 //-----------------------------------------------------------------------------
 …
 struct event_kernel_t {
         alarm_list_t alarms;
         spinlock lock;
+        __spinlock_t lock;
 };

src/libcfa/concurrency/monitor.c

-              r9d06142
+              rc0d00b6
 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
 static inline void lock_all  ( spinlock * locks [], __lock_size_t count );
 static inline void lock_all  ( monitor_desc * source [], spinlock * /*out*/ locks [], __lock_size_t count );
 static inline void unlock_all( spinlock * locks [], __lock_size_t count );
+static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
+static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
+static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
 static inline void save   ( monitor_desc * ctx [], __lock_size_t count, spinlock * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
 static inline void restore( monitor_desc * ctx [], __lock_size_t count, spinlock * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
+static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
+static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
 static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
 …
 static inline __lock_size_t count_max    ( const __waitfor_mask_t & mask );
 static inline __lock_size_t aggregate    ( monitor_desc * storage [], const __waitfor_mask_t & mask );
+#ifndef __CFA_LOCK_NO_YIELD
+#define DO_LOCK lock_yield
+#else
+#define DO_LOCK lock
+#endif
 //-----------------------------------------------------------------------------
 …
         unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
         __waitfor_mask_t masks [ count ];                         /* Save the current waitfor masks to restore them later                                */ \
         spinlock *   locks     [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
+        __spinlock_t *   locks [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
 #define monitor_save    save   ( monitors, count, locks, recursions, masks )
 …
         // Enter single monitor
         static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
                 // Lock the monitor spinlock, lock_yield to reduce contention
                 lock_yield( &this->lock DEBUG_CTX2 );
+                // Lock the monitor spinlock
+                DO_LOCK( this->lock DEBUG_CTX2 );
                 thread_desc * thrd = this_thread;
 …
                 // Release the lock and leave
                 unlock( &this->lock );
+                unlock( this->lock );
                 return;
+        }
         static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
                 // Lock the monitor spinlock, lock_yield to reduce contention
                 lock_yield( &this->lock DEBUG_CTX2 );
+                // Lock the monitor spinlock
+                DO_LOCK( this->lock DEBUG_CTX2 );
                 thread_desc * thrd = this_thread;
 …
                         set_owner( this, thrd );
                         unlock( &this->lock );
+                        unlock( this->lock );
                         return;
+                }
 …
         // Leave single monitor
         void __leave_monitor_desc( monitor_desc * this ) {
                 // Lock the monitor spinlock, lock_yield to reduce contention
                 lock_yield( &this->lock DEBUG_CTX2 );
+                // Lock the monitor spinlock, DO_LOCK to reduce contention
+                DO_LOCK( this->lock DEBUG_CTX2 );
                 LIB_DEBUG_PRINT_SAFE("Kernel : %10p Leaving mon %p (%p)\n", this_thread, this, this->owner);
 …
                 if( this->recursion != 0) {
                         LIB_DEBUG_PRINT_SAFE("Kernel :  recursion still %d\n", this->recursion);
                         unlock( &this->lock );
+                        unlock( this->lock );
                         return;
+                }
 …
                 // We can now let other threads in safely
                 unlock( &this->lock );
+                unlock( this->lock );
                 //We need to wake-up the thread
 …
                 // Lock the monitor now
                 lock_yield( &this->lock DEBUG_CTX2 );
+                DO_LOCK( this->lock DEBUG_CTX2 );
                 disable_interrupts();
 …
+}
 static inline void lock_all( spinlock * locks [], __lock_size_t count ) {
+static inline void lock_all( __spinlock_t * locks [], __lock_size_t count ) {
         for( __lock_size_t i = 0; i < count; i++ ) {
                 lock_yield( locks[i] DEBUG_CTX2 );
+        }
+}
 static inline void lock_all( monitor_desc * source [], spinlock * /*out*/ locks [], __lock_size_t count ) {
+                DO_LOCK( *locks[i] DEBUG_CTX2 );
+        }
+}
+static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
         for( __lock_size_t i = 0; i < count; i++ ) {
                 spinlock * l = &source[i]->lock;
                 lock_yield( l DEBUG_CTX2 );
+                __spinlock_t * l = &source[i]->lock;
+                DO_LOCK( *l DEBUG_CTX2 );
                 if(locks) locks[i] = l;
+        }
+}
 static inline void unlock_all( spinlock * locks [], __lock_size_t count ) {
+static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count ) {
         for( __lock_size_t i = 0; i < count; i++ ) {
                 unlock( locks[i] );
+                unlock( *locks[i] );
+        }
+}
 …
 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
         for( __lock_size_t i = 0; i < count; i++ ) {
                 unlock( &locks[i]->lock );
+                unlock( locks[i]->lock );
+        }
+}
 …
         monitor_desc * ctx [],
         __lock_size_t count,
         __attribute((unused)) spinlock * locks [],
+        __attribute((unused)) __spinlock_t * locks [],
         unsigned int /*out*/ recursions [],
         __waitfor_mask_t /*out*/ masks []
 …
         monitor_desc * ctx [],
         __lock_size_t count,
         spinlock * locks [],
+        __spinlock_t * locks [],
         unsigned int /*out*/ recursions [],
         __waitfor_mask_t /*out*/ masks []
 …
                 this.monitor_count = thrd->monitors.size;
                 this.monitors = malloc( this.monitor_count * sizeof( *this.monitors ) );
+                this.monitors = (monitor_desc **)malloc( this.monitor_count * sizeof( *this.monitors ) );
                 for( int i = 0; i < this.monitor_count; i++ ) {
                         this.monitors[i] = thrd->monitors.list[i];

src/libcfa/concurrency/preemption.c

-              r9d06142
+              rc0d00b6
                 case SI_KERNEL:
                         // LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread tick\n");
                         lock( &event_kernel->lock DEBUG_CTX2 );
+                        lock( event_kernel->lock DEBUG_CTX2 );
                         tick_preemption();
                         unlock( &event_kernel->lock );
+                        unlock( event_kernel->lock );
                         break;
                 // Signal was not sent by the kernel but by an other thread

src/libcfa/stdhdr/stddef.h

-              r9d06142
+              rc0d00b6
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 //
 // stddef.h --
 //
+//
+// stddef.h --
+//
 // Author           : Peter A. Buhr
 // Created On       : Mon Jul  4 23:25:26 2016
 …
 // Last Modified On : Tue Jul  5 20:40:01 2016
 // Update Count     : 12
 //
+//
 extern "C" {
+#include_next <stddef.h>                                                                // has internal check for multiple expansion
+#include_next <stddef.h>                // has internal check for multiple expansion
+#undef NULL
+#define NULL 0                          // define NULL as 0 rather than (void*)0 to take advantage of zero_t
 } // extern "C"

src/libcfa/stdlib

-              r9d06142
+              rc0d00b6
         //printf( "X8\n" );
         T * ptr = (T *)(void *)malloc( (size_t)sizeof(T) );     // C malloc
     return memset( ptr, (int)fill, sizeof(T) );                 // initial with fill value
+    return (T *)memset( ptr, (int)fill, sizeof(T) );                    // initial with fill value
 } // alloc
 …
         //printf( "X10\n" );
         T * ptr = (T *)(void *)malloc( dim * (size_t)sizeof(T) ); // C malloc
     return memset( ptr, (int)fill, dim * sizeof(T) );
+    return (T *)memset( ptr, (int)fill, dim * sizeof(T) );
 } // alloc
 static inline forall( dtype T | sized(T) ) T * alloc( T ptr[], size_t dim ) {
         //printf( "X11\n" );
         return (void *)realloc( (void *)ptr, dim * (size_t)sizeof(T) ); // C realloc
+        return (T *)(void *)realloc( (void *)ptr, dim * (size_t)sizeof(T) ); // C realloc
 } // alloc
 forall( dtype T | sized(T) ) T * alloc( T ptr[], size_t dim, char fill );
 …
         //printf( "X14\n" );
     T * ptr = (T *)memalign( align, sizeof(T) );
     return memset( ptr, (int)fill, sizeof(T) );
+    return (T *)memset( ptr, (int)fill, sizeof(T) );
 } // align_alloc
 …
         //printf( "X16\n" );
     T * ptr = (T *)memalign( align, dim * sizeof(T) );
     return memset( ptr, (int)fill, dim * sizeof(T) );
+    return (T *)memset( ptr, (int)fill, dim * sizeof(T) );
 } // align_alloc
 …
 static inline forall( dtype T | sized(T) ) T * memset( T * dest, char c ) {
         //printf( "X17\n" );
         return memset( dest, c, sizeof(T) );
+        return (T *)memset( dest, c, sizeof(T) );
 } // memset
 extern "C" { void * memcpy( void * dest, const void * src, size_t size ); } // use default C routine for void *
 static inline forall( dtype T | sized(T) ) T * memcpy( T * dest, const T * src ) {
         //printf( "X18\n" );
         return memcpy( dest, src, sizeof(T) );
+        return (T *)memcpy( dest, src, sizeof(T) );
 } // memcpy
 …
 static inline forall( dtype T | sized(T) ) T * memset( T dest[], size_t dim, char c ) {
         //printf( "X19\n" );
         return (void *)memset( dest, c, dim * sizeof(T) );      // C memset
+        return (T *)(void *)memset( dest, c, dim * sizeof(T) ); // C memset
 } // memset
 static inline forall( dtype T | sized(T) ) T * memcpy( T dest[], const T src[], size_t dim ) {
         //printf( "X20\n" );
         return (void *)memcpy( dest, src, dim * sizeof(T) ); // C memcpy
+        return (T *)(void *)memcpy( dest, src, dim * sizeof(T) ); // C memcpy
 } // memcpy

src/main.cc

r9d06142	rc0d00b6
206	206	FILE * extras = fopen( libcfap \| treep ? "../prelude/extras.cf" : CFA_LIBDIR "/extras.cf", "r" );
207	207	assertf( extras, "cannot open extras.cf\n" );
208		parse( extras, LinkageSpec::C );
	208	parse( extras, LinkageSpec::BuiltinC );
209	209
210	210	if ( ! libcfap ) {

src/prelude/builtins.c

-              r9d06142
+              rc0d00b6
 } // ?\?
+static inline forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); double ?/?( double, T ); } )
+double ?\?( T x, signed long int y ) {
+    if ( y >=  0 ) return (double)(x \ (unsigned long int)y);
+    else return 1.0 / x \ (unsigned long int)(-y);
+} // ?\?
+// FIXME (x \ (unsigned long int)y) relies on X ?\?(T, unsigned long) a function that is neither
+// defined, nor passed as an assertion parameter. Without user-defined conversions, cannot specify
+// X as a type that casts to double, yet it doesn't make sense to write functions with that type
+// signature where X is double.
+// static inline forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); double ?/?( double, T ); } )
+// double ?\?( T x, signed long int y ) {
+//     if ( y >=  0 ) return (double)(x \ (unsigned long int)y);
+//     else return 1.0 / x \ (unsigned long int)(-y);
+// } // ?\?
 static inline long int ?\=?( long int & x, unsigned long int y ) { x = x \ y; return x; }

src/prelude/prelude.cf

-              r9d06142
+              rc0d00b6
 forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT * volatile &, const volatile    DT * );
-forall( dtype DT ) DT *                 ?=?(                 DT *          &,                   void * );
-forall( dtype DT ) DT *                 ?=?(                 DT * volatile &,                   void * );
-forall( dtype DT ) const DT *           ?=?( const           DT *          &,                   void * );
-forall( dtype DT ) const DT *           ?=?( const           DT * volatile &,                   void * );
-forall( dtype DT ) const DT *           ?=?( const           DT *          &, const             void * );
-forall( dtype DT ) const DT *           ?=?( const           DT * volatile &, const             void * );
-forall( dtype DT ) volatile DT *        ?=?(       volatile  DT *          &,                   void * );
-forall( dtype DT ) volatile DT *        ?=?(       volatile  DT * volatile &,                   void * );
-forall( dtype DT ) volatile DT *        ?=?(       volatile  DT *          &,       volatile    void * );
-forall( dtype DT ) volatile DT *        ?=?(       volatile  DT * volatile &,       volatile    void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT *          &,                   void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT * volatile &,                   void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT *          &, const             void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT * volatile &, const             void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT *          &,       volatile    void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT * volatile &,       volatile    void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT *          &, const volatile    void * );
-forall( dtype DT ) const volatile DT *  ?=?( const volatile  DT * volatile &, const volatile    void * );
 forall( dtype DT ) void *                ?=?(                void *          &,                 DT * );
 forall( dtype DT ) void *                ?=?(                void * volatile &,                 DT * );
 …
 forall( dtype DT ) const volatile void * ?=?( const volatile void *          &, const volatile  DT * );
 forall( dtype DT ) const volatile void * ?=?( const volatile void * volatile &, const volatile  DT * );
-void *                  ?=?(                void *          &,                void * );
-void *                  ?=?(                void * volatile &,                void * );
-const void *            ?=?( const          void *          &,                void * );
-const void *            ?=?( const          void * volatile &,                void * );
-const void *            ?=?( const          void *          &, const          void * );
-const void *            ?=?( const          void * volatile &, const          void * );
-volatile void *         ?=?(       volatile void *          &,                void * );
-volatile void *         ?=?(       volatile void * volatile &,                void * );
-volatile void *         ?=?(       volatile void *          &,       volatile void * );
-volatile void *         ?=?(       volatile void * volatile &,       volatile void * );
-const volatile void *   ?=?( const volatile void *          &,                void * );
-const volatile void *   ?=?( const volatile void * volatile &,                void * );
-const volatile void *   ?=?( const volatile void *          &, const          void * );
-const volatile void *   ?=?( const volatile void * volatile &, const          void * );
-const volatile void *   ?=?( const volatile void *          &,       volatile void * );
-const volatile void *   ?=?( const volatile void * volatile &,       volatile void * );
-const volatile void *   ?=?( const volatile void *          &, const volatile void * );
-const volatile void *   ?=?( const volatile void * volatile &, const volatile void * );
 //forall( dtype DT ) DT *                       ?=?(                DT *          &, zero_t );
 …
 forall( dtype DT ) void ?{}( const volatile  DT *          &, const volatile    DT * );
-forall( dtype DT ) void ?{}(                 DT *          &,                   void * );
-forall( dtype DT ) void ?{}( const           DT *          &,                   void * );
-forall( dtype DT ) void ?{}( const           DT *          &, const             void * );
-forall( dtype DT ) void ?{}(       volatile  DT *          &,                   void * );
-forall( dtype DT ) void ?{}(       volatile  DT *          &,       volatile    void * );
-forall( dtype DT ) void ?{}( const volatile  DT *          &,                   void * );
-forall( dtype DT ) void ?{}( const volatile  DT *          &, const             void * );
-forall( dtype DT ) void ?{}( const volatile  DT *          &,       volatile    void * );
-forall( dtype DT ) void ?{}( const volatile  DT *          &, const volatile    void * );
 forall( dtype DT ) void ?{}(                 void *          &,                 DT * );
 forall( dtype DT ) void ?{}( const           void *          &,                 DT * );
 …
 forall( dtype DT ) void ?{}( const volatile void *           &, const volatile  DT * );
-void    ?{}(                void *          &,                void * );
-void    ?{}( const          void *          &,                void * );
-void    ?{}( const          void *          &, const          void * );
-void    ?{}(       volatile void *          &,                void * );
-void    ?{}(       volatile void *          &,       volatile void * );
-void    ?{}( const volatile void *          &,                void * );
-void    ?{}( const volatile void *          &, const          void * );
-void    ?{}( const volatile void *          &,       volatile void * );
-void    ?{}( const volatile void *          &, const volatile void * );
 //forall( dtype DT ) void ?{}(              DT *          &, zero_t );
 //forall( dtype DT ) void ?{}(              DT * volatile &, zero_t );

src/tests/.expect/32/KRfunctions.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 signed int __f0__Fi_iPCii__1(signed int __a__i_1, const signed int *__b__PCi_1, signed int __c__i_1){
     __attribute__ ((unused)) signed int ___retval_f0__i_1;

src/tests/.expect/32/attributes.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 signed int __la__Fi___1(){
     __attribute__ ((unused)) signed int ___retval_la__i_1;

src/tests/.expect/32/declarationSpecifier.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 volatile const signed short int __x1__CVs_1;
 static volatile const signed short int __x2__CVs_1;
 …
+}
 static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 static inline signed int invoke_main(signed int argc, char **argv, char **envp);
 signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){

src/tests/.expect/32/extension.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 __extension__ signed int __a__i_1;
 __extension__ signed int __b__i_1;

src/tests/.expect/32/gccExtensions.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 extern signed int __x__i_1 asm ( "xx" );
 signed int __main__Fi_iPPCc__1(signed int __argc__i_1, const char **__argv__PPCc_1){
 …
+}
 static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 static inline signed int invoke_main(signed int argc, char **argv, char **envp);
 signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){

src/tests/.expect/32/literals.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 void __for_each__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object0)(), void *__anonymous_object1), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object2)(), void *__anonymous_object3), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object4)(), void *__anonymous_object5, void *__anonymous_object6), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object7)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object8), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object9)(), void *__anonymous_object10, void *__anonymous_object11), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object12)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object13, void *__anonymous_object14), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object15)(), void *__anonymous_object16, void *__anonymous_object17), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object18)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object19, void *__anonymous_object20), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object21, void *__anonymous_object22), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object23), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object24, void *__anonymous_object25), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object26), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object27, void *__anonymous_object28), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object29), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object30, void *__anonymous_object31), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object32), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object33), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object34), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object35, void *__anonymous_object36), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object37, void *__anonymous_object38), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object39), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object40));
 void __for_each_reverse__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object41)(), void *__anonymous_object42), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object43)(), void *__anonymous_object44), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object45)(), void *__anonymous_object46, void *__anonymous_object47), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object48)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object49), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object50)(), void *__anonymous_object51, void *__anonymous_object52), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object53)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object54, void *__anonymous_object55), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object56)(), void *__anonymous_object57, void *__anonymous_object58), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object59)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object60, void *__anonymous_object61), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object62, void *__anonymous_object63), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object64), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object65, void *__anonymous_object66), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object67), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object68, void *__anonymous_object69), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object70), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object71, void *__anonymous_object72), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object73), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object74), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object75), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object76, void *__anonymous_object77), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object78, void *__anonymous_object79), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object80), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object81));
 …
+}
 static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi___1(); }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 static inline signed int invoke_main(signed int argc, char **argv, char **envp);
 signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){

src/tests/.expect/64/KRfunctions.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 signed int __f0__Fi_iPCii__1(signed int __a__i_1, const signed int *__b__PCi_1, signed int __c__i_1){
     __attribute__ ((unused)) signed int ___retval_f0__i_1;

src/tests/.expect/64/attributes.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 signed int __la__Fi___1(){
     __attribute__ ((unused)) signed int ___retval_la__i_1;

src/tests/.expect/64/declarationSpecifier.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 volatile const signed short int __x1__CVs_1;
 static volatile const signed short int __x2__CVs_1;
 …
+}
 static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 static inline signed int invoke_main(signed int argc, char **argv, char **envp);
 signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){

src/tests/.expect/64/extension.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 __extension__ signed int __a__i_1;
 __extension__ signed int __b__i_1;

src/tests/.expect/64/gccExtensions.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 extern signed int __x__i_1 asm ( "xx" );
 signed int __main__Fi_iPPCc__1(signed int __argc__i_1, const char **__argv__PPCc_1){
 …
+}
 static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 static inline signed int invoke_main(signed int argc, char **argv, char **envp);
 signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){

src/tests/.expect/64/literals.txt

-              r9d06142
+              rc0d00b6
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 void __for_each__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object0)(), void *__anonymous_object1), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object2)(), void *__anonymous_object3), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object4)(), void *__anonymous_object5, void *__anonymous_object6), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object7)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object8), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object9)(), void *__anonymous_object10, void *__anonymous_object11), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object12)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object13, void *__anonymous_object14), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object15)(), void *__anonymous_object16, void *__anonymous_object17), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object18)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object19, void *__anonymous_object20), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object21, void *__anonymous_object22), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object23), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object24, void *__anonymous_object25), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object26), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object27, void *__anonymous_object28), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object29), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object30, void *__anonymous_object31), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object32), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object33), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object34), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object35, void *__anonymous_object36), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object37, void *__anonymous_object38), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object39), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object40));
 void __for_each_reverse__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object41)(), void *__anonymous_object42), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object43)(), void *__anonymous_object44), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object45)(), void *__anonymous_object46, void *__anonymous_object47), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object48)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object49), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object50)(), void *__anonymous_object51, void *__anonymous_object52), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object53)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object54, void *__anonymous_object55), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object56)(), void *__anonymous_object57, void *__anonymous_object58), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object59)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object60, void *__anonymous_object61), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object62, void *__anonymous_object63), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object64), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object65, void *__anonymous_object66), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object67), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object68, void *__anonymous_object69), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object70), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object71, void *__anonymous_object72), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object73), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object74), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object75), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object76, void *__anonymous_object77), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object78, void *__anonymous_object79), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object80), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object81));
 …
+}
 static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi___1(); }
-__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
-__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
-__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
-__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
-extern signed int printf(const char *__restrict __format, ...);
 static inline signed int invoke_main(signed int argc, char **argv, char **envp);
 signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){

src/tests/.expect/castError.txt

-              r9d06142
+              rc0d00b6
   charAlternatives are:
 Cost ( 1, 0, 0, 0 ): Cast of:
+     Variable Expression: f: signed int
+     Variable Expression: f: function
+       accepting unspecified arguments
+     ... returning nothing
    ... to:
      char
 …
 Cost ( 1, 0, 0, 0 ): Cast of:
+     Variable Expression: f: function
+       accepting unspecified arguments
+     ... returning nothing
+     Variable Expression: f: signed int
    ... to:
      char

src/tests/.expect/completeTypeError.txt

-              r9d06142
+              rc0d00b6
 completeTypeError.c:34:1 error: No reasonable alternatives for expression Applying untyped:
+completeTypeError.c:33:1 error: No reasonable alternatives for expression Applying untyped:
   Name: *?
 ...to:
   Name: v
+completeTypeError.c:34:1 error: No reasonable alternatives for expression Applying untyped:
+  Name: *?
+...to:
+  Name: y
+completeTypeError.c:35:1 error: No reasonable alternatives for expression Applying untyped:
+  Name: foo
+...to:
+  Name: v
 completeTypeError.c:36:1 error: No reasonable alternatives for expression Applying untyped:
 …
   Name: v
 completeTypeError.c:37:1 error: No reasonable alternatives for expression Applying untyped:
   Name: quux
 ...to:
   Name: v
 completeTypeError.c:58:1 error: No reasonable alternatives for expression Applying untyped:
 …
   Name: y
 completeTypeError.c:59:1 error: No reasonable alternatives for expression Applying untyped:
   Name: quux
 ...to:
   Name: y
 completeTypeError.c:60:1 error: No reasonable alternatives for expression Applying untyped:
 …
   Name: y
 completeTypeError.c:72:1 error: No reasonable alternatives for expression Applying untyped:
   Name: baz
 …
   Name: z

src/tests/Makefile.am

r9d06142	rc0d00b6
141	141	typedefRedef-ERR1: typedefRedef.c @CFA_BINDIR@/@CFA_NAME@
142	142	${CC} ${AM_CFLAGS} ${CFLAGS} -DERR1 ${<} -o ${@}
	143
	144	alloc-ERROR: alloc.c @CFA_BINDIR@/@CFA_NAME@
	145	${CC} ${AM_CFLAGS} ${CFLAGS} -DERR1 ${<} -o ${@}

src/tests/Makefile.in

-              r9d06142
+              rc0d00b6
         ${CC} ${AM_CFLAGS} ${CFLAGS} -DERR1 ${<} -o ${@}
+alloc-ERROR: alloc.c @CFA_BINDIR@/@CFA_NAME@
+        ${CC} ${AM_CFLAGS} ${CFLAGS} -DERR1 ${<} -o ${@}
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.

src/tests/alloc.c

-              r9d06142
+              rc0d00b6
         // allocation, non-array types
         p = (void *)malloc( sizeof(*p) );                   // C malloc, type unsafe
+        p = (int *)(void *)malloc( sizeof(*p) );                   // C malloc, type unsafe
         *p = 0xdeadbeef;
         printf( "C   malloc %#x\n", *p );
 …
         printf( "\n" );
         p = calloc( dim, sizeof( *p ) );                    // C array calloc, type unsafe
+        p = (int *)calloc( dim, sizeof( *p ) );                    // C array calloc, type unsafe
         printf( "C   array calloc, fill 0\n" );
         for ( int i = 0; i < dim; i += 1 ) { printf( "%#x ", p[i] ); }
 …
         printf( "\n" );
         p = (void *)realloc( p, dim * sizeof(*p) );         // C realloc
+        p = (int *)(void *)realloc( p, dim * sizeof(*p) );         // C realloc
         for ( int i = 0; i < dim; i += 1 ) { p[i] = 0xdeadbeef; }
         printf( "C   realloc\n" );
 …
         stp = malloc();
         printf( "\nSHOULD FAIL\n" );
+#ifdef ERR1
         p = alloc( stp, dim * sizeof(*stp) );
         p = memset( stp, 10 );
         p = memcpy( &st1, &st );
+#endif
 } // main

src/tests/completeTypeError.c

-              r9d06142
+              rc0d00b6
         void *v;
         // A * x;
         // A * y;
         // B * x;
         // B * z;
+        A * x;
+        A * y;
+        B * x;
+        B * z;
         // okay
         *i;
         // *x; // picks B
         // *z;
+        *x; // picks B
+        *z;
         foo(i);
         bar(i);
 …
         bar(v);
         qux(v);
-        foo(v); // questionable, but works at the moment for C compatibility
         // bad
         *v;
+        // *y;
+        *y;
+        foo(v);
         baz(v);
         quux(v);

src/tests/dtor-early-exit.c

r9d06142	rc0d00b6
22	22
23	23	struct A {
24		char * name;
	24	const char * name;
25	25	int * x;
26	26	};

src/tests/init_once.c

r9d06142	rc0d00b6
72	72	insert( &constructed, &x );
73	73
74		x.x = malloc(sizeof(int));
	74	x.x = (int *)malloc(sizeof(int));
75	75	}
76	76

src/tests/multiDimension.c

-              r9d06142
+              rc0d00b6
   printf("default constructing\n");
   (this.a){ 123 };
   this.ptr = malloc(sizeof(int));
+  this.ptr = (int *)malloc(sizeof(int));
+}
 …
   printf("copy constructing\n");
   (this.a){ other.a };
   this.ptr = malloc(sizeof(int));
+  this.ptr = (int *)malloc(sizeof(int));
+}
 …
   printf("constructing with %d\n", a);
   (this.a){ a };
   this.ptr = malloc(sizeof(int));
+  this.ptr = (int *)malloc(sizeof(int));
+}

src/tests/polymorphism.c

-              r9d06142
+              rc0d00b6
 //
+#include <assert.h>
+#include <inttypes.h>
 forall(otype T)
 T f(T x, T y) {
 …
+}
+forall( otype T, otype U )
+size_t struct_size( T i, U j ) {
+        struct S { T i; U j; };
+        return sizeof(S);
+}
+forall( otype T, otype U )
+size_t union_size( T i, U j ) {
+        union B { T i; U j; };
+        return sizeof(B);
+}
+// perform some simple operations on aggregates of T and U
+forall( otype T | { void print(T); int ?==?(T, T); }, otype U | { void print(U); U ?=?(U&, zero_t); } )
+U foo(T i, U j) {
+        struct S { T i; U j; };
+        union B { T i; U j; };
+        S s;
+        s.i = i;
+        assert(s.i == i);
+        B b;
+        b.j = 0;
+        b.i = s.i;
+        return b.j;
+}
 int main() {
+        // ensure that x is not changed by the invocation of a polymorphic function
+        int x = 123;
+        int y = 456;
+        int z = f(x, y);
+        printf("%d %d %d\n", x, y, z);
+        {
+                // ensure that x is not changed by the invocation of a polymorphic function
+                int x = 123;
+                int y = 456;
+                int z = f(x, y);
+                printf("%d %d %d\n", x, y, z);
+        }
+        // explicitly specialize function
+        int (*f)(int) = ident;
+        ((int(*)(int))ident);
+        printf("%d %d\n", f(5), ((int(*)(int))ident)(5));
+        {
+                // explicitly specialize function
+                int (*f)(int) = ident;
+                ((int(*)(int))ident);
+                printf("%d %d\n", f(5), ((int(*)(int))ident)(5));
+        }
+        {
+                // test aggregates with polymorphic members
+                typedef uint32_t x_type;
+                typedef uint64_t y_type;
+                x_type x = 3;
+                y_type y = 3;
+                struct S {
+                        x_type f1;
+                        y_type f2;
+                };
+                union U {
+                        x_type f1;
+                        y_type f2;
+                };
+                // ensure that the size of aggregates with polymorphic members
+                // matches the size of the aggregates in a monomorphic context
+                assert( struct_size(x, y) == sizeof(S) );
+                assert( union_size(x, y) == sizeof(U) );
+                y_type ?=?(y_type & this, zero_t) {
+                        this = (int)0;
+                        return this;
+                }
+                void print(x_type x) {
+                        printf("%"PRIu32"\n", x);
+                }
+                void print(y_type y) {
+                        printf("%"PRIu64"\n", y);
+                }
+                y_type ret = foo(x, y);
+                // duplicate logic from inside of foo to ensure the same results
+                U u;
+                u.f2 = 0;
+                u.f1 = x;
+                assert(ret == u.f2);
+        }
+}

src/tests/tupleVariadic.c

r9d06142	rc0d00b6
73	73	[a0, a1, a2, a3] = args;
74	74	a.size = 4;
75		a.data = malloc(sizeof(int)*a.size);
	75	a.data = (int )malloc(sizeof(int)a.size);
76	76	a.data[0] = a0;
77	77	a.data[1] = a1;

src/tests/vector/vector_int.c

-              r9d06142
+              rc0d00b6
         vec.last = -1;
         vec.capacity = reserve;
         vec.data = malloc( sizeof( int ) * reserve );
+        vec.data = (int *)malloc( sizeof( int ) * reserve );
+}
 …
         vec.last = other.last;
         vec.capacity = other.capacity;
         vec.data = malloc( sizeof( int ) * other.capacity );
+        vec.data = (int *)malloc( sizeof( int ) * other.capacity );
         for (int i = 0; i < vec.last; i++) {
                 vec.data[i] = other.data[i];
 …
 void reserve( vector_int *vec, int reserve ) {
         if ( reserve > vec->capacity ) {
                 vec->data = realloc( vec->data, sizeof( int ) * reserve );
+                vec->data = (int *)realloc( vec->data, sizeof( int ) * reserve );
                 vec->capacity = reserve;
+        }
 …
         if ( vec->last == vec->capacity ) {
                 vec->capacity *= 2;
                 vec->data = realloc( vec->data, sizeof( int ) * vec->capacity );
+                vec->data = (int *)realloc( vec->data, sizeof( int ) * vec->capacity );
+        }
         vec->data[ vec->last ] = element;

Context Navigation

Legend:

Download in other formats: