Changeset ce55a81

.gitignore

r56c44dc	rce55a81
18	18	**/Makefile
19	19	**/Makefile.in
	20	**/Makefile.dist.in
20	21	/version
21	22

Makefile.am

-              r56c44dc
+              rce55a81
 SUBDIRS = driver src . @LIBCFA_TARGET_DIRS@
+DIST_SUBDIRS = driver src . libcfa benchmark longrun_tests tests tools tools/prettyprinter
 @LIBCFA_TARGET_MAKEFILES@ : Makefile $(srcdir)/libcfa/configure
 …
         @ls $(config_file) || (echo "Missing config.data, re-run configure script again" && false)
         @$(eval config_data = $(shell cat $(config_file)))
         @echo "Configuring libcfa with '$(config_data)''"
+        @echo "Configuring libcfa ($(abs_top_srcdir)/libcfa/configure) with '$(config_data)' from $(shell pwd) / $(dir $@)"
         @cd $(dir $@) && $(abs_top_srcdir)/libcfa/configure $(config_data)
 …
 man1_MANS = doc/man/cfa.1
+EXTRA_DIST = LICENSE doc/man/cfa.1 libcfa/configure libcfa/Makefile.dist.am libcfa/Makefile.dist.in
 debug=yes

benchmark/Makefile.am

-              r56c44dc
+              rce55a81
 # Dummy hack tricks
 EXTRA_PROGRAMS = dummy # build but do not install
 dummy_SOURCES = dummyC.c dummyCXX.cpp
+nodist_dummy_SOURCES = dummyC.c dummyCXX.cpp
 dummyC.c:
 …
 ## =========================================================================================================
+all : basic$(EXEEXT) ctxswitch$(EXEEXT) mutex$(EXEEXT) schedint$(EXEEXT) schedext$(EXEEXT) creation$(EXEEXT)
+# all is used by make dist so ignore it
+all:
+all-bench : basic$(EXEEXT) ctxswitch$(EXEEXT) mutex$(EXEEXT) schedint$(EXEEXT) schedext$(EXEEXT) creation$(EXEEXT)
 basic_loop_DURATION = 15000000000
 …
 ## =========================================================================================================
 compile$(EXEEXT) :              \
+bcompile$(EXEEXT) :             \
         compile-array.make      \
         compile-attributes.make \

configure.ac

-              r56c44dc
+              rce55a81
                 \'--enable-gprofiler=*) ;;
                 \'--disable-gprofiler) ;;
+                # skip this, it only causes problems
+                \'--srcdir=*) ;;
                 # append all other arguments to the sub configure arguments
 …
         LIBCFA_TARGET_DIRS="${LIBCFA_TARGET_DIRS} ${lib_dir}"
+        LIBCFA_1TARGET_DIR="${lib_dir}"
         LIBCFA_TARGET_MAKEFILES="${LIBCFA_TARGET_MAKEFILES} ${lib_dir}/Makefile"
 …
 AC_SUBST(LIBCFA_TARGET_DIRS)
+AC_SUBST(LIBCFA_1TARGET_DIR)
 AC_SUBST(LIBCFA_TARGET_MAKEFILES)
 …
         driver/Makefile
         src/Makefile
         benchmark/Makefile
+        libcfa/Makefile:libcfa/Makefile.dist.in
         tests/Makefile
         longrun_tests/Makefile
+        benchmark/Makefile
         tools/Makefile
         tools/prettyprinter/Makefile

doc/LaTeXmacros/common.tex

-              r56c44dc
+              rce55a81
 %% Created On       : Sat Apr  9 10:06:17 2016
 %% Last Modified By : Peter A. Buhr
 %% Last Modified On : Fri May 24 07:59:54 2019
 %% Update Count     : 382
+%% Last Modified On : Fri Sep  4 13:56:52 2020
+%% Update Count     : 383
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 …
 % Names used in the document.
+\usepackage{xspace}
 \newcommand{\CFAIcon}{\textsf{C}\raisebox{\depth}{\rotatebox{180}{\textsf{A}}}\xspace} % Cforall symbolic name
 \newcommand{\CFA}{\protect\CFAIcon}             % safe for section/caption
 …
 }%
+\usepackage{listings}                                                                   % format program code
 \usepackage{lstlang}

doc/man/cfa.1

-              r56c44dc
+              rce55a81
 .\" Created On       : Wed Jul 26 22:34:47 2017
 .\" Last Modified By : Peter A. Buhr
 .\" Last Modified On : Thu Jul 27 10:29:29 2017
 .\" Update Count     : 44
+.\" Last Modified On : Wed Sep  2 17:59:53 2020
+.\" Update Count     : 78
 .\"
 .\" nroff -man cfa.1
 …
 .ds Cf "Cforall
 .\"
 .TH cfa 1 2017-07-27 cfa-\*(Mg
+.TH CFA 1 "2020-09-2" cfa-\*(Mg "\*(Cf Project"
 .SH NAME
 cfa \- \*(Cf Translator and Runtime Library
+cfa \- \*(Cf project translator and runtime library to enhance C
 .SH SYNOPSIS
+cfa [gcc-options] [C/\*(Cf source-files] [assembler/loader files]
+cfa [cfa/gcc-options]
+    [cfa/c source-files]
+    [assembler/loader files]
 .SH DESCRIPTION
+\*(Cf (C-for-all) is an open-source project extending ISO C with modern safety and productivity features, while still ensuring backwards compatibility with C and its programmers.
 The cfa command compiles C and \*(Cf source files and links C/\*(Cf object
 files named on the command line.
 …
 The cfa command introduces a translator pass over the specified source files
 after the C preprocessor but before the C compilation.  The translator converts
+new \*(Cf constructs into C statements.  The cfa command also provides the
+runtime library, which is linked with each \*(Cf application.
+new \*(Cf constructs into C statements.  The cfa command also provides a fully
+concurrent (user-level threads) runtime library, which is linked with the
+\*(Cf application.
 The command line options depend on the particular C compiler used (gcc/clang
 supported).  As with most C compilers, the output is sent to the file a.out(5)
 unless the -o option is present on the command line.  See the reference pages
 for gcc(1) for more information.
+for gcc(1) for more information on command line options.
 .SH OPTIONS
 When multiple conflicting options appear on the command line, e.g.,
 …
 All of the options available to the gcc compiler are available to the cfa
 translator.  The following gcc flags are implicitly turned on:
+.IP -std=gnu99 3
+The 1999 C standard plus GNU extensions.
+.IP -fgnu89-inline
+Use the traditional GNU semantics for inline routines in C99 mode, which allows inline routines in header files.
+.IP "-std=gnu11" 3
+The 2011 C standard plus GNU extensions.
+.IP "-fgnu89-inline"
+Use the traditional GNU semantics for inline routines in C11 mode, which allows inline routines in header files.
+.IP "-imacros stdbool.h"
+Include stdbool.h to get defines for bool/true/false.
+.IP "-latomic -lm"
+Provide access to double-wide CAS instruction and math library.
 .LP
 The following additional options are available:
 .IP -CFA 3
+.IP "-CFA" 3
 Only the C preprocessor and the \*(Cf translator steps are performed and the transformed program is written to standard output, which makes it possible to examine the code generated by the \*(Cf translator.
 The generated code starts with the standard \*(Cf prelude.
 .IP -debug
+.IP "-debug"
 The program is linked with the debugging version of the runtime system.
 The debug version performs runtime checks to help during the debugging phase of a \*(Cf program, but can substantially slow program execution.
 The runtime checks should only be removed after the program is completely debugged.
 .B This option is the default.
 .IP -nodebug
+.IP "-nodebug"
 The program is linked with the non-debugging version of the runtime system, so the execution of the program is faster.
 .I However, no runtime checks or asserts are performed so errors usually result in abnormal program behaviour or termination.
 .IP -help
+.IP "-help"
 Information about the set of \*(Cf compilation flags is printed.
 .IP -nohelp
+.IP "-nohelp"
 Information about the set of \*(Cf compilation flags is not printed.
 .B This option is the default.
 .IP -quiet
+.IP "-quiet"
 The \*(Cf compilation message is not printed at the beginning of a compilation.
 .IP -noquiet
+.IP "-noquiet"
 The \*(Cf compilation message is printed at the beginning of a compilation.
 .B This option is the default.
 …
 available.  These variables allow conditional compilation of programs that must
 work differently in these situations.
 .IP __CFA_MAJOR__ 3
+.IP "__CFA_MAJOR__" 3
 is available during preprocessing and its value is the major version number of \*(Cf.
 .IP __CFA_MINOR__
+.IP "__CFA_MINOR__"
 is available during preprocessing and its value is the minor version number of \*(Cf.
 .IP __CFA_PATCH__
+.IP "__CFA_PATCH__"
 is available during preprocessing and its value is the patch level number of \*(Cf.
 .IP "__CFA__, __CFORALL__, and __cforall"
 are always available during preprocessing and have no value.
 .IP __CFA_DEBUG__
+.IP "__CFA_DEBUG__"
 is available during preprocessing if the -debug compilation option is
 specified.
 …
 .SH REFERENCES
 .HP 3
+\*(Cf Reference and Rational Manual
+.I \*(Cf Home Page
 .br
 http://plg.uwaterloo.ca/~cforall/refrat.pdf
+https://cforall.uwaterloo.ca
 .HP
 .I \*(Cf User Manual
 .br
+http://plg.uwaterloo.ca/~cforall/user.pdf
+https://cforall.uwaterloo.ca/doc/user.pdf
+.SH BUILDS
+Nightly builds are available here https://cforall.uwaterloo.ca/jenkins
 .SH BUGS
 Bugs should be reported to trac@plg.cs.uwaterloo.ca.
+Bugs reportss are available here https://cforall.uwaterloo.ca/trac
 .SH COPYRIGHT
 \*(Cf is covered under the licence agreement in the distribution.
 .SH AUTHORS
 Andrew Beach, Richard Bilson, Peter A. Buhr, Thierry Delisle, Glen Ditchfield,
 Rodolfo G. Esteves, Aaron Moss, Rob Schluntz
+Rodolfo G. Esteves, Aaron Moss, Rob Schluntz, Mubeen Zulfiqar

doc/theses/thierry_delisle_PhD/comp_II/Makefile

-              r56c44dc
+              rce55a81
 ## Define the text source files.
-SOURCES = ${addsuffix .tex, \
-comp_II \
+}
 FIGURES = ${addsuffix .tex, \
-        base \
-        empty \
         emptybit \
         emptytree \
         emptytls \
         resize \
-        system \
+}
 PICTURES = ${addsuffix .pstex, \
+        base \
+        empty \
+        system \
+}
 …
 ## Define the documents that need to be made.
+all: comp_II.pdf presentation.pdf
+comp_II.pdf: ${FIGURES} ${PICTURES}
+presentation.pdf: presentationstyle.sty base.dark.pstex empty.dark.pstex system.dark.pstex
 DOCUMENT = comp_II.pdf
+DOCUMENT = comp_II.pdf presentation.pdf
 BASE = ${basename ${DOCUMENT}}
 …
 # File Dependencies #
+${DOCUMENT} : ${BASE}.ps
+%.pdf : build/%.ps | ${Build}
         ps2pdf $<
+${BASE}.ps : ${BASE}.dvi
         dvips ${Build}/$< -o $@
+build/%.ps : build/%.dvi | ${Build}
+        dvips $< -o $@
+${BASE}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} \
+                ${Macros}/common.tex ${Macros}/indexstyle ../../../bibliography/pl.bib \
+                local.bib glossary.tex | ${Build}
+build/%.dvi : %.tex Makefile | ${Build}
         # Must have *.aux file containing citations for bibtex
         if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} ${basename $@}.tex ; fi
         -${BibTeX} ${Build}/${basename $@}
+        if [ ! -r ${basename $@}.aux ] ; then ${LaTeX} $< ; fi
+        -${BibTeX} ${basename $@}
         # Some citations reference others so run again to resolve these citations
         ${LaTeX} ${basename $@}.tex
         -${BibTeX} ${Build}/${basename $@}
+        ${LaTeX} $<
+        -${BibTeX} ${basename $@}
         # Make index from *.aux entries and input index at end of document
         makeglossaries -q -s ${Build}/${basename $@}.ist ${Build}/${basename $@}
+        -makeglossaries -q -s ${basename $@}.ist ${basename $@}
         # Run again to finish citations
         ${LaTeX} ${basename $@}.tex
+        ${LaTeX} $<
 ## Define the default recipes.
 …
         mkdir -p ${Build}
 %.tex : img/%.fig ${Build}
+%.tex : img/%.fig | ${Build}
         fig2dev -L eepic $< > ${Build}/$@
 …
         fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
+## pstex with inverted colors
+%.dark.pstex : img/%.fig Makefile | ${Build}
+        fig2dev -L pstex $< > ${Build}/$@
+        sed -i 's/\/col-1 {0 setgray} bind def/\/col-1 {1 setgray} bind def/g' ${Build}/$@
+        sed -i 's/\/col0 {0.000 0.000 0.000 srgb} bind def/\/col0 {1.000 1.000 1.000 srgb} bind def/g' ${Build}/$@
+        sed -i 's/\/col7 {1.000 1.000 1.000 srgb} bind def/\/col7 {0.000 0.000 0.000 srgb} bind def/g' ${Build}/$@
+        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
 # Local Variables: #
 # compile-command: "make" #

doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex

-              r56c44dc
+              rce55a81
 It aims to add high-productivity features while maintaining the predictable performance of C.
 As such, concurrency in \CFA\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.
 \CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing.
+\CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programming.
 As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
 …
 and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run.
 \end{enumerate}
+The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state.
+Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
+The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a third fixed thread before scheduling.}, while the scheduling cost can vary based on the system state.
+Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning, however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
 The more threads switch, the more the administration cost of scheduling becomes noticeable.
 It is therefore important to build a scheduler with the lowest possible cost and latency.
 …
 While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness.
 Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance.
 In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash-register at a grocery store.
+In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the express cash register at a grocery store.
 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance.
 …
 \end{quote}
 For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads.
 As such, scheduling performance is generally either defined by the best case scenario, \ie a workload to which the scheduler is tailored, or the worst case scenario, \ie the scheduler behaves no worst than \emph{X}.
+For a general-purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads.
+As such, scheduling performance is generally either defined by the best-case scenario, \ie a workload to which the scheduler is tailored, or the worst-case scenario, \ie the scheduler behaves no worse than \emph{X}.
 For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance.
 Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler.
 …
 To achieve the \CFA scheduling goal includes:
 \begin{enumerate}
+\item
+producing a scheduling strategy with sufficient fairness guarantees,
+\item
+creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily,
+\item
+scheduling blocking I/O operations,
+\item
+and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.
+        \item producing a scheduling strategy with sufficient fairness guarantees,
+        \item creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily,
+        \item scheduling blocking I/O operations,
+        \item and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.
 \end{enumerate}
 …
 \paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount.
 The scheduler cannot allow threads to be dropped from the ready queue, \ie scheduled but never run, or be executed multiple times when only being scheduled once.
 Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup.
+Since \CFA concurrency has no spurious wake up, this definition of correctness also means the scheduler should have no spurious wake up.
 The \CFA scheduler must be correct.
 …
 The \CFA scheduler should offer good performance for all three metrics.
 \paragraph{Fairness} Like performance, this requirement has several aspect : eventual progress, predictability and performance reliability.
+\paragraph{Fairness} Like performance, this requirement has several aspects : eventual progress, predictability and performance reliability.
 \newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation.
 As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about.
 \newterm{Predictability} and \newterm{reliability} means similar workloads achieve similar performance and programmer execution intuition is respected.
 For example, a thread that yields aggressively should not run more often then other tasks.
+As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above-mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about.
+\newterm{Predictability} and \newterm{reliability} mean similar workloads achieve similar performance and programmer execution intuition is respected.
+For example, a thread that yields aggressively should not run more often than other tasks.
 While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers.
 The \CFA scheduler must guarantee eventual progress and should be predictable and offer reliable performance.
 …
 \begin{enumerate}
         \item Threads live long enough for useful feedback information to be to gathered.
+        \item Threads live long enough for useful feedback information to be gathered.
         \item Threads belong to multiple users so fairness across threads is insufficient.
 \end{enumerate}
 …
 Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetimes that are only scheduled a few times.
 Scheduling strategies based on feedback cannot be effective in these cases because there is no opportunity to measure the metrics that underlie the algorithm.
 Note, the problem of \newterm{feedback convergence} (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, \eg threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
+Note, the problem of \newterm{feedback convergence} (reacting too slowly to scheduling events) is not specific to short-lived threads but can also occur with threads that show drastic changes in scheduling, \eg threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
 In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security.
 …
 In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user.
 Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent.
 This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready-queue.
+This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready queue.
 Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback.
 …
 Another broad category of schedulers are priority schedulers.
 In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads.
 Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO.
+Threads with equal priority are scheduled using a secondary strategy, often something simple like round robin or FIFO.
 A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run.
 This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
 An important observation is that threads do not need to have explicit priorities for problems to occur.
 Indeed, any system with multiple ready-queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.
+Indeed, any system with multiple ready queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.
 For example, a popular scheduling strategy that suffers from implicit priorities is work stealing.
 \newterm{Work stealing} is generally presented as follows:
 …
 \subsection{Schedulers without feedback or priorities}
 This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about.
+This proposal conjectures that it is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about.
 The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first run first.
 However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization.
 …
 Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue.
 Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp.
 A higher number of underlying queues leads to less contention on each queue and therefore better performance.
+A higher number of underlying queues lead to less contention on each queue and therefore better performance.
 In a loaded system, it is highly likely the queues are non-empty, \ie several tasks are on each of the underlying queues.
 This means that selecting a queue at random to pop from is highly likely to yield a queue with available items.
 …
 \begin{figure}
         \begin{center}
                 \input{base}
+                \input{base.pstex_t}
         \end{center}
         \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists.
 The timestamp is in all nodes and cell arrays.}
+        The timestamp is in all nodes and cell arrays.}
         \label{fig:base}
 \end{figure}
 …
 \begin{figure}
         \begin{center}
                 \input{empty}
+                \input{empty.pstex_t}
         \end{center}
         \caption{``More empty'' state of the queue: the array contains many empty cells.}
 …
 Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density.
 This leads to four performance cases for the centralized ready-queue, as depicted in Table~\ref{tab:perfcases}.
+This leads to four performance cases for the centralized ready queue, as depicted in Table~\ref{tab:perfcases}.
 The number of processors (many or few) refers to the number of kernel threads \emph{actively} attempting to pop user threads from the queues, not the total number of kernel threads.
 The number of threads (many or few) refers to the number of user threads ready to be run.
 …
 Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.
 Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}.
 Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness.
 The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting ways.
+Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases. However the information can become stale making it difficult to use to ensure correctness.
+The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting way.
 Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful.
 …
 How much scalability is actually needed is highly debatable.
 \emph{libfibre}\cite{libfibre} has compared favorably to other schedulers in webserver tests\cite{Karsten20} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
+\emph{libfibre}\cite{libfibre} has compared favourably to other schedulers in webserver tests\cite{Karsten20} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
 As such, the single atomic instruction on a shared cacheline may be sufficiently performant.
 I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single int representing a thread and intrusive data fields.
 Using this prototype I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}.
+Using this prototype, I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}.
 However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other, when a thread blocks some other thread must intervene to wake it.
 …
 \begin{figure}
         \begin{center}
                 \input{system}
+                \input{system.pstex_t}
         \end{center}
         \caption{Global structure of the \CFA runtime system.}
 …
 This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system.
 As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster.
 How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times.
+How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long periods of times.
 However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance.
 The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks.
 …
 This description effectively matches with the description of a reader-writer lock, infrequent but invasive updates among frequent read operations.
 In the case of the ready queue described above, read operations are operations that push or pop from the ready queue but do not invalidate any references to the ready queue data structures.
 Writes on the other hand would add or remove inner queues, invalidating references to the array of inner queues in a process.
+Writes, on the other hand, would add or remove inner queues, invalidating references to the array of inner queues in a process.
 Therefore, the current proposed approach to this problem is to add a per-cluster reader-writer lock around the ready queue to prevent restructuring of the ready-queue data-structure while threads are being pushed or popped.
 …
 \paragraph{Objectives and Existing Work}
 The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck.
+The lock must offer scalability and performance on par with the actual ready queue in order not to introduce a new bottleneck.
 I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target.
 As such, I do not consider this lock to be a risk for this project.
 …
 give back unneeded CPU time associated with a process to other user processors executing on the computer,
 \item
 and reduce energy consumption in cases where more idle kernel-threads translate to idle CPUs, which can cycle down.
+and reduce energy consumption in cases where more idle kernel-threads translate into idle CPUs, which can cycle down.
 \end{enumerate}
 Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread and the waking thread, and handling which kernel thread should sleep or wake up.
 …
 This operation is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates its intention to block but has not yet gone to sleep when another processor attempts to wake it up.
 The waking-up operation sees the blocked process and signals it, but the blocking process is racing to sleep so the signal is missed.
 In cases where kernel threads are managed as processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually.
+In cases where kernel threads are managed as processors on the current cluster, losing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually.
 Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable).
 However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster.
 …
 Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost.
 This scenario happens when a program oscillates between high and low activity, needing most and then less processors.
+This scenario happens when a program oscillates between high and low activity, needing most and then fewer processors.
 A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up.
 This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer.
 …
 A final important aspect of idle sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up.
 Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to sub-optimal throughput.
 Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency.
+Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to suboptimal throughput.
+Furthermore, transitions from sleeping to awake and vice versa also add unnecessary latency.
 There is already a wealth of research on the subject\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{Karsten20}.
 …
 It is preferable to block the user thread performing the I/O and reuse the underlying kernel-thread to run other ready user threads.
 This approach requires intercepting user-thread calls to I/O operations, redirecting them to an asynchronous I/O interface, and handling the multiplexing/demultiplexing between the synchronous and asynchronous API.
 As such, there are three components needed to implemented support for asynchronous I/O:
+As such, there are three components needed to implement support for asynchronous I/O:
 \begin{enumerate}
 \item
 …
 It is sufficient to make one work in the complex context of the \CFA runtime.
 \uC uses the $select$\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.
 $select$ entails significant complexity and is being replaced in UNIX operating-systems, which make it a less interesting alternative.
+$select$ entails significant complexity and is being replaced in UNIX operating systems, which make it a less interesting alternative.
 Another popular interface is $epoll$\cite{epoll}, which is supposed to be cheaper than $select$.
 However, $epoll$ also does not handle the file system and anectodal evidence suggest it has problem with linux pipes and $TTY$s.
+However, $epoll$ also does not handle the file system and anecdotal evidence suggest it has problems with Linux pipes and $TTY$s.
 A popular cross-platform alternative is $libuv$\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).
 However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together.
 A very recent alternative that I am investigating is $io_uring$\cite{io_uring}.
 It claims to address some of the issues with $epoll$ and my early investigating suggest that the claim is accurate.
 $io_uring$ uses a much more general approach where system calls are register to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.
+It claims to address some of the issues with $epoll$ and my early investigating suggests that the claim is accurate.
+$io_uring$ uses a much more general approach where system calls are registered to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.
 I believe this approach allows for fewer problems, \eg the manpage for $open$\cite{open} states:
 \begin{quote}
         Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices;
         that is, I/O operations will (briefly) block when device activity is required, regardless of whether $O_NONBLOCK$ is set.
         Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behavior when specifying this flag for regular files and block devices.
+Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices;
+that is, I/O operations will (briefly) block when device activity is required, regardless of whether $O_NONBLOCK$ is set.
+Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behaviour when specifying this flag for regular files and block devices.
 \end{quote}
 This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors.
 For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime, unless further work shows problems I haven't encountered yet.
+For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime unless further work shows problems I haven't encountered yet.
 However, only a small subset of the features are available in Ubuntu as of April 2020\cite{wiki:ubuntu-linux}, which will limit performance comparisons.
 I do not believe this will affect the comparison result.
 …
 \section{Discussion}
 I believe that runtime system and scheduling are still open topics.
 Many ``state of the art'' production frameworks still use single threaded event-loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no wideyl available system language offers modern threading facilities.
+Many ``state of the art'' production frameworks still use single-threaded event loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no widely available system language offers modern threading facilities.
 I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible.
 …
 \hline November 2020 & March 2021   & Completion of the implementation. \\
 \hline March 2021 & April 2021  & Final Performance experiments. \\
 \hline May 2021 & August 2021 & Thesis writing and defense. \\
+\hline May 2021 & August 2021 & Thesis writing and defence. \\
 \hline
 \end{tabular}

doc/theses/thierry_delisle_PhD/comp_II/img/base.fig

-              r56c44dc
+              rce55a81
 #FIG 3.2  Produced by xfig version 3.2.5c
+#FIG 3.2  Produced by xfig version 3.2.7b
 Landscape
 Center
 Inches
 Letter
+Letter
 .00
 Single
 …
 3 0 1 0 0 50 -1 20 0.000 1 0.0000 6975 4200 20 20 6975 4200 6995 4200
 -6
-2400 2100 3000 2700
-3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
-1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-2475 3000 2475
-1 0 50 -1 0 11 0.0000 2 120 210 2700 2650 TS\001
--6
-2400 3000 3000 3600
-3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
-1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
-3375 3000 3375
-1 0 50 -1 0 11 0.0000 2 120 210 2700 3550 TS\001
--6
 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 2400 300 300 3900 2400 4200 2400
 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3900 3300 300 300 3900 3300 4200 3300
 …
 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 6300 3300 300 300 6300 3300 6600 3300
 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4509 3302 300 300 4509 3302 4809 3302
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 3300 300 300 2700 3300 3000 3300
+3 0 1 0 7 50 -1 -1 0.000 1 0.0000 2700 2400 300 300 2700 2400 3000 2400
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
 3900 3000 4500
 …
 1 1.00 45.00 90.00
 4200 6300 3600
 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2
+1 0 1 -1 7 50 -1 -1 0.000 0 0 -1 1 0 2
 1 1.00 45.00 90.00
 4200 2700 3600
 …
 1 1.00 45.00 90.00
 4200 4500 3600
+2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
+2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
+2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
+2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
+1 0 50 -1 0 11 0.0000 2 120 210 2700 4450 TS\001
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+3375 3000 3375
+1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+2475 3000 2475
+2 -1 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001
+2 -1 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001
+1 -1 50 -1 0 11 0.0000 2 135 180 2700 4450 TS\001
+2 -1 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001
+2 -1 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001
+1 -1 50 -1 0 11 0.0000 2 135 180 2700 3550 TS\001
+1 -1 50 -1 0 11 0.0000 2 135 180 2700 2650 TS\001

doc/theses/thierry_delisle_PhD/comp_II/img/empty.fig

-              r56c44dc
+              rce55a81
 #FIG 3.2  Produced by xfig version 3.2.5c
+#FIG 3.2  Produced by xfig version 3.2.7b
 Landscape
 Center
 Inches
 Letter
+Letter
 .00
 Single
 …
 1 1.00 45.00 90.00
 4200 2700 3600
 2 0 50 -1 0 12 0.0000 2 180 660 2100 4200 Array of\001
 2 0 50 -1 0 12 0.0000 2 165 600 2100 4425 Queues\001
 2 0 50 -1 0 12 0.0000 2 135 645 2100 3075 Threads\001
 2 0 50 -1 0 12 0.0000 2 180 525 2100 2850 Ready\001
+2 -1 50 -1 0 12 0.0000 2 165 720 2100 4200 Array of\001
+2 -1 50 -1 0 12 0.0000 2 150 540 2100 4425 Queues\001
+2 -1 50 -1 0 12 0.0000 2 135 630 2100 3075 Threads\001
+2 -1 50 -1 0 12 0.0000 2 165 450 2100 2850 Ready\001

doc/theses/thierry_delisle_PhD/comp_II/img/system.fig

-              r56c44dc
+              rce55a81
 Center
 Inches
 Letter
+Letter
 .00
 Single
 …
 1 -1 0 0 0 10 0.0000 2 105 990 2175 3525 Discrete-event\001
 1 -1 0 0 0 10 0.0000 2 135 795 2175 4350 preemption\001
 0 -1 0 0 0 10 0.0000 2 150 1290 2325 4875 genrator/coroutine\001
+0 -1 0 0 0 10 0.0000 2 150 1290 2325 4875 generator/coroutine\001
 0 -1 0 0 0 10 0.0000 2 120 270 4050 4875 task\001
 0 -1 0 0 0 10 0.0000 2 105 450 7050 4875 cluster\001

driver/cfa.cc

-              r56c44dc
+              rce55a81
 // Created On       : Tue Aug 20 13:44:49 2002
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Aug 20 23:43:59 2020
 // Update Count     : 436
+// Last Modified On : Wed Sep  2 17:59:20 2020
+// Update Count     : 438
 //
 …
                 #endif // __x86_64__
                 args[nargs++] = "-ldl";
-                args[nargs++] = "-lrt";
                 args[nargs++] = "-lm";
         } // if

libcfa/prelude/Makefile.am

r56c44dc	rce55a81
22	22	cfalibdir = ${CFA_LIBDIR}
23	23	cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
	24
	25	EXTRA_DIST = bootloader.cf builtins.c builtins.def extras.c extras.regx extras.regx2 prelude-gen.cc prototypes.awk prototypes.c prototypes.sed sync-builtins.cf
24	26
25	27	CC = @LOCAL_CFACC@

libcfa/src/Makefile.am

-              r56c44dc
+              rce55a81
 # AM_CFAFLAGS for only cfa source
 # use -no-include-stdhdr to prevent rebuild cycles
 # The built sources must not depend on the installed headers
 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
+# The built sources must not depend on the installed inst_headers_src
+AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr -I$(srcdir)/concurrency $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
 AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
 …
 #----------------------------------------------------------------------------------------------------------------
 if BUILDLIB
+headers_nosrc = bitmanip.hfa exception.hfa math.hfa gmp.hfa time_t.hfa clock.hfa \
+                bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa \
+                containers/list.hfa containers/stackLockFree.hfa concurrency/iofwd.hfa
+inst_headers_nosrc = \
+        bitmanip.hfa \
+        clock.hfa \
+        exception.hfa \
+        gmp.hfa \
+        math.hfa \
+        time_t.hfa \
+        bits/align.hfa \
+        bits/containers.hfa \
+        bits/debug.hfa \
+        bits/defs.hfa \
+        bits/locks.hfa \
+        concurrency/iofwd.hfa \
+        containers/list.hfa \
+        containers/stackLockFree.hfa
+headers = common.hfa fstream.hfa heap.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa \
+                time.hfa stdlib.hfa parseargs.hfa \
+                containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
+inst_headers_src = \
+        common.hfa \
+        fstream.hfa \
+        heap.hfa \
+        iostream.hfa \
+        iterator.hfa \
+        limits.hfa \
+        parseargs.hfa \
+        rational.hfa \
+        stdlib.hfa \
+        time.hfa \
+        containers/maybe.hfa \
+        containers/pair.hfa \
+        containers/result.hfa \
+        containers/vector.hfa
+libsrc = startup.cfa interpose.cfa bits/debug.cfa assert.cfa exception.c virtual.c ${headers:.hfa=.cfa}
+libsrc = ${inst_headers_src} ${inst_headers_src:.hfa=.cfa} \
+        assert.cfa \
+        bits/algorithm.hfa \
+        bits/debug.cfa \
+        exception.c \
+        exception.h \
+        interpose.cfa \
+        lsda.h \
+        startup.cfa \
+        startup.hfa \
+        virtual.c \
+        virtual.h
 # not all platforms support concurrency, add option do disable it
+thread_headers_nosrc = bits/random.hfa concurrency/invoke.h concurrency/kernel/fwd.hfa
+inst_thread_headers_nosrc = \
+        bits/random.hfa \
+        concurrency/invoke.h \
+        concurrency/kernel/fwd.hfa
+thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa \
+                concurrency/monitor.hfa concurrency/mutex.hfa concurrency/exception.hfa
+inst_thread_headers_src = \
+        concurrency/coroutine.hfa \
+        concurrency/exception.hfa \
+        concurrency/kernel.hfa \
+        concurrency/monitor.hfa \
+        concurrency/mutex.hfa \
+        concurrency/thread.hfa
+thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
+                concurrency/invoke.c concurrency/io.cfa concurrency/iocall.cfa \
+                concurrency/io/setup.cfa \
+                concurrency/kernel/startup.cfa concurrency/preemption.cfa \
+                concurrency/ready_queue.cfa concurrency/stats.cfa \
+                ${thread_headers:.hfa=.cfa}
+thread_libsrc = ${inst_thread_headers_src} ${inst_thread_headers_src:.hfa=.cfa} \
+        bits/signal.hfa \
+        concurrency/alarm.cfa \
+        concurrency/alarm.hfa \
+        concurrency/CtxSwitch-@ARCHITECTURE@.S \
+        concurrency/invoke.c \
+        concurrency/io.cfa \
+        concurrency/io/setup.cfa \
+        concurrency/io/types.hfa \
+        concurrency/iocall.cfa \
+        concurrency/iofwd.hfa \
+        concurrency/kernel_private.hfa \
+        concurrency/kernel/startup.cfa \
+        concurrency/preemption.cfa \
+        concurrency/preemption.hfa \
+        concurrency/ready_queue.cfa \
+        concurrency/ready_subqueue.hfa \
+        concurrency/snzi.hfa \
+        concurrency/stats.cfa \
+        concurrency/stats.hfa \
+        concurrency/stats.hfa
 else
 headers =
 thread_headers =
 headers_nosrc =
 thread_headers_nosrc =
+inst_headers_src =
+inst_thread_headers_src =
+inst_headers_nosrc =
+inst_thread_headers_nosrc =
 libsrc =
 endif
 …
 #----------------------------------------------------------------------------------------------------------------
+libcfa_la_SOURCES = prelude.cfa ${libsrc}
+libcfa_la_SOURCES = ${libsrc}
+nodist_libcfa_la_SOURCES = prelude.cfa
 libcfa_la_LDFLAGS = -version-info @CFA_VERSION@
 …
 cfa_includedir = $(CFA_INCDIR)
+nobase_cfa_include_HEADERS = ${stdhdr} ${headers} ${headers_nosrc} ${thread_headers} ${thread_headers_nosrc}
+nobase_cfa_include_HEADERS = ${stdhdr} ${inst_headers_src} ${inst_headers_nosrc} ${inst_thread_headers_src} ${inst_thread_headers_nosrc}
+EXTRA_DIST = stdhdr
 #----------------------------------------------------------------------------------------------------------------

libcfa/src/bits/defs.hfa

-              r56c44dc
+              rce55a81
 // Created On       : Thu Nov  9 13:24:10 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Aug 13 22:00:23 2020
 // Update Count     : 19
+// Last Modified On : Wed Aug 26 16:22:32 2020
+// Update Count     : 20
 //
 …
 static inline long long int rdtscl(void) {
+        #if defined( __aarch64__ )
+        #if defined( __i386 ) || defined( __x86_64 )
+        unsigned int lo, hi;
+        __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+        return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
+        #elif defined( __aarch64__ ) || defined( __arm__ )
         // https://github.com/google/benchmark/blob/v1.1.0/src/cycleclock.h#L116
         long long int virtual_timer_value;
 …
         return virtual_timer_value;
         #else
+        unsigned int lo, hi;
+        __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+        return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
+                #error unsupported hardware architecture
         #endif // __ARM_ARCH
+}

libcfa/src/bits/locks.hfa

-              r56c44dc
+              rce55a81
+        }
+        // Semaphore which only supports a single thread and one post
+        // Semaphore which only supports a single thread
+        // Synchronozation primitive which only supports a single thread and one post
+        // Similar to a binary semaphore with a 'one shot' semantic
+        // is expected to be discarded after each party call their side
         struct oneshot {
+                // Internal state :
+                //     0p     : is initial state (wait will block)
+                //     1p     : fulfilled (wait won't block)
+                // any thread : a thread is currently waiting
                 struct $thread * volatile ptr;
         };
 …
                 void ^?{}(oneshot & this) {}
+                // Wait for the post, return immidiately if it already happened.
+                // return true if the thread was parked
                 bool wait(oneshot & this) {
                         for() {
 …
+                }
+                // Mark as fulfilled, wake thread if needed
+                // return true if a thread was unparked
                 bool post(oneshot & this) {
                         struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
 …
+                }
+        }
+        // base types for future to build upon
+        // It is based on the 'oneshot' type to allow multiple futures
+        // to block on the same instance, permitting users to block a single
+        // thread on "any of" [a given set of] futures.
+        // does not support multiple threads waiting on the same future
+        struct future_t {
+                // Internal state :
+                //     0p      : is initial state (wait will block)
+                //     1p      : fulfilled (wait won't block)
+                //     2p      : in progress ()
+                //     3p      : abandoned, server should delete
+                // any oneshot : a context has been setup to wait, a thread could wait on it
+                struct oneshot * volatile ptr;
+        };
+        static inline {
+                void  ?{}(future_t & this) {
+                        this.ptr = 0p;
+                }
+                void ^?{}(future_t & this) {}
+                // check if the future is available
+                bool available( future_t & this ) {
+                        return this.ptr == 1p;
+                }
+                // Prepare the future to be waited on
+                // intented to be use by wait, wait_any, waitfor, etc. rather than used directly
+                bool setup( future_t & this, oneshot & wait_ctx ) {
+                        /* paranoid */ verify( wait_ctx.ptr == 0p );
+                        // The future needs to set the wait context
+                        for() {
+                                struct oneshot * expected = this.ptr;
+                                // Is the future already fulfilled?
+                                if(expected == 1p) return false; // Yes, just return false (didn't block)
+                                // The future is not fulfilled, try to setup the wait context
+                                /* paranoid */ verify( expected == 0p );
+                                if(__atomic_compare_exchange_n(&this.ptr, &expected, &wait_ctx, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                        return true;
+                                }
+                        }
+                }
+                // Stop waiting on a future
+                // When multiple futures are waited for together in "any of" pattern
+                // futures that weren't fulfilled before the thread woke up
+                // should retract the wait ctx
+                // intented to be use by wait, wait_any, waitfor, etc. rather than used directly
+                void retract( future_t & this, oneshot & wait_ctx ) {
+                        // Remove the wait context
+                        struct oneshot * got = __atomic_exchange_n( &this.ptr, 0p, __ATOMIC_SEQ_CST);
+                        // got == 0p: future was never actually setup, just return
+                        if( got == 0p ) return;
+                        // got == wait_ctx: since fulfil does an atomic_swap,
+                        // if we got back the original then no one else saw context
+                        // It is safe to delete (which could happen after the return)
+                        if( got == &wait_ctx ) return;
+                        // got == 1p: the future is ready and the context was fully consumed
+                        // the server won't use the pointer again
+                        // It is safe to delete (which could happen after the return)
+                        if( got == 1p ) return;
+                        // got == 2p: the future is ready but the context hasn't fully been consumed
+                        // spin until it is safe to move on
+                        if( got == 2p ) {
+                                while( this.ptr != 1p ) Pause();
+                                return;
+                        }
+                        // got == any thing else, something wen't wrong here, abort
+                        abort("Future in unexpected state");
+                }
+                // Mark the future as abandoned, meaning it will be deleted by the server
+                void abandon( future_t & this ) {
+                        struct oneshot * got = __atomic_exchange_n( &this.ptr, 3p, __ATOMIC_SEQ_CST);
+                        // got == 2p: the future is ready but the context hasn't fully been consumed
+                        // spin until it is safe to move on
+                        if( got == 2p ) {
+                                while( this.ptr != 1p ) Pause();
+                        }
+                        return;
+                }
+                // from the server side, mark the future as fulfilled
+                // delete it if needed
+                bool fulfil( future_t & this ) {
+                        for() {
+                                struct oneshot * expected = this.ptr;
+                                // was this abandoned?
+                                if( expected == 3p ) { free( &this ); return false; }
+                                /* paranoid */ verify( expected != 1p ); // Future is already fulfilled, should not happen
+                                /* paranoid */ verify( expected != 2p ); // Future is bein fulfilled by someone else, this is even less supported then the previous case.
+                                // If there is a wait context, we need to consume it and mark it as consumed after
+                                // If there is no context then we can skip the in progress phase
+                                struct oneshot * want = expected == 0p ? 1p : 2p;
+                                if(__atomic_compare_exchange_n(&this.ptr, &expected, want, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+                                        if( expected == 0p ) { /* paranoid */ verify( this.ptr == 1p); return false; }
+                                        bool ret = post( *expected );
+                                        __atomic_store_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
+                                        return ret;
+                                }
+                        }
+                }
+                // Wait for the future to be fulfilled
+                bool wait( future_t & this ) {
+                        oneshot temp;
+                        if( !setup(this, temp) ) return false;
+                        // Wait context is setup, just wait on it
+                        bool ret = wait( temp );
+                        // Wait for the future to tru
+                        while( this.ptr == 2p ) Pause();
+                        // Make sure the state makes sense
+                        // Should be fulfilled, could be in progress but it's out of date if so
+                        // since if that is the case, the oneshot was fulfilled (unparking this thread)
+                        // and the oneshot should not be needed any more
+                        __attribute__((unused)) struct oneshot * was = this.ptr;
+                        /* paranoid */ verifyf( was == 1p, "Expected this.ptr to be 1p, was %p\n", was );
+                        // Mark the future as fulfilled, to be consistent
+                        // with potential calls to avail
+                        // this.ptr = 1p;
+                        return ret;
+                }
+        }
 #endif

libcfa/src/concurrency/CtxSwitch-arm64.S

-              r56c44dc
+              rce55a81
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 //
 // CtxSwitch-arm.S --
+// CtxSwitch-arm64.S --
 //
 // Author           : Peter A. Buhr
 // Created On       : Sun Aug 16 07:50:13 2020
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Aug 20 18:43:51 2020
 // Update Count     : 24
+// Last Modified On : Wed Aug 26 16:24:59 2020
+// Update Count     : 25
 //
 …
 #define SAVE            20 * 8
         .file "CtxSwitch-arm.S"
+        .file "CtxSwitch-arm64.S"
         .text
         .align 2
         .global __cfactx_switch
         .type __cfactx_switch, @function
+        .type __cfactx_switch, %function
 __cfactx_switch:
 …
         .align 2
         .global __cfactx_invoke_stub
         .type __cfactx_invoke_stub, @function
+        .type __cfactx_invoke_stub, %function
 __cfactx_invoke_stub:
         mov x0, x19                                                     // load main as parameter 0

libcfa/src/concurrency/kernel.cfa

-              r56c44dc
+              rce55a81
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Aug 14 15:23:00 2020
 // Update Count     : 69
+// Last Modified On : Mon Aug 31 07:08:20 2020
+// Update Count     : 71
 //
 …
+                )
+#elif defined( __ARM_ARCH )
+#elif defined( __arm__ )
+        #define __x87_store
+        #define __x87_load
+#elif defined( __aarch64__ )
         #define __x87_store              \
                 uint32_t __fpcntl[2];    \
 …
 #else
         #error unknown hardware architecture
+        #error unsupported hardware architecture
 #endif

libcfa/src/concurrency/preemption.cfa

-              r56c44dc
+              rce55a81
 // Created On       : Mon Jun 5 14:20:42 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Aug 21 13:45:32 2020
 // Update Count     : 52
+// Last Modified On : Wed Aug 26 16:46:03 2020
+// Update Count     : 53
 //
 …
 #elif defined( __x86_64 )
 #define CFA_REG_IP gregs[REG_RIP]
+#elif defined( __ARM_ARCH )
+#elif defined( __arm__ )
+#define CFA_REG_IP arm_pc
+#elif defined( __aarch64__ )
 #define CFA_REG_IP pc
 #else
 #error unknown hardware architecture
+#error unsupported hardware architecture
 #endif

libcfa/src/exception.c

-              r56c44dc
+              rce55a81
 // Created On       : Mon Jun 26 15:13:00 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Aug 21 11:27:56 2020
 // Update Count     : 29
+// Last Modified On : Sat Aug 29 15:52:22 2020
+// Update Count     : 34
 //
 …
 #include "exception.h"
-// Implementation of the secret header is hardware dependent.
-#if defined( __x86_64 ) || defined( __i386 )
-#elif defined( __ARM_ARCH )
-#warning FIX ME: check if anything needed for ARM
-#else
-#warning Exception Handling: No known architecture detected.
-#endif
 #include <stdlib.h>
 …
 #include "stdhdr/assert.h"
+// FIX ME: temporary hack to keep ARM build working
+#if defined( __ARM_ARCH )
+#warning FIX ME: temporary hack to keep ARM build working
 #ifndef _URC_FATAL_PHASE1_ERROR
 #define _URC_FATAL_PHASE1_ERROR 3
 …
 #define _URC_FATAL_PHASE2_ERROR 2
 #endif // ! _URC_FATAL_PHASE2_ERROR
+#endif // __ARM_ARCH
 #include "lsda.h"
 …
 #pragma GCC pop_options
+#elif defined( __ARM_ARCH )
+_Unwind_Reason_Code __gcfa_personality_v0(
+                int version,
+                _Unwind_Action actions,
+                unsigned long long exception_class,
+                struct _Unwind_Exception * unwind_exception,
+                struct _Unwind_Context * unwind_context) {
+        return _URC_CONTINUE_UNWIND;
+}
+__attribute__((noinline))
+void __cfaehm_try_terminate(void (*try_block)(),
+                void (*catch_block)(int index, exception_t * except),
+                __attribute__((unused)) int (*match_block)(exception_t * except)) {
+}
+#else
+        #error unsupported hardware architecture
 #endif // __x86_64 || __i386

libcfa/src/heap.cfa

-              r56c44dc
+              rce55a81
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Mon Aug 24 20:29:24 2020
 // Update Count     : 926
+// Last Modified On : Thu Sep  3 16:22:54 2020
+// Update Count     : 943
 //
 …
 #include "math.hfa"                                                                             // ceiling
 #include "bitmanip.hfa"                                                                 // is_pow2, ceiling2
-#define MIN(x, y) (y > x ? x : y)
 static bool traceHeap = false;
 …
                 headers( "realloc", naddr, header, freeElem, bsize, oalign );
                 memcpy( naddr, oaddr, MIN( osize, size ) );             // copy bytes
+                memcpy( naddr, oaddr, min( osize, size ) );             // copy bytes
                 free( oaddr );
 …
         #endif // __STATISTICS__
-        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
-  if ( unlikely( oaddr == 0p ) ) {
-                #ifdef __STATISTICS__
-                __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
-                #endif // __STATISTICS__
-                return memalignNoStats( nalign, size );
-        } // if
         if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
         #ifdef __CFA_DEBUG__
 …
         #endif // __CFA_DEBUG__
+        HeapManager.Storage.Header * header;
+        HeapManager.FreeHeader * freeElem;
+        size_t bsize, oalign;
+        headers( "resize", oaddr, header, freeElem, bsize, oalign );
+        size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+        if ( oalign <= nalign && (uintptr_t)oaddr % nalign == 0 ) { // <= alignment and new alignment happens to match
+                if ( oalign > libAlign() ) {                                    // fake header ?
+        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
+  if ( unlikely( oaddr == 0p ) ) {
+                #ifdef __STATISTICS__
+                __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
+                #endif // __STATISTICS__
+                return memalignNoStats( nalign, size );
+        } // if
+        // Attempt to reuse existing storage.
+        HeapManager.Storage.Header * header = headerAddr( oaddr );
+        if ( unlikely ( ( header->kind.fake.alignment & 1 == 1 &&       // old fake header ?
+                                 (uintptr_t)oaddr % nalign == 0 &&                              // lucky match ?
+                                 header->kind.fake.alignment <= nalign &&               // ok to leave LSB at 1
+                                 nalign <= 128 )                                                                // not too much alignment storage wasted ?
+                        ||   ( header->kind.fake.alignment & 1 != 1 &&          // old real header ( aligned on libAlign ) ?
+                                 nalign == libAlign() ) ) ) {                                   // new alignment also on libAlign
+                HeapManager.FreeHeader * freeElem;
+                size_t bsize, oalign;
+                headers( "resize", oaddr, header, freeElem, bsize, oalign );
+                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+                if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
                         headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+                } // if
+                if ( size <= odsize && odsize <= size * 2 ) {   // allow 50% wasted storage for smaller size
+                        header->kind.real.blockSize &= -2;                      // turn off 0 fill
+                        header->kind.real.size = size;                          // reset allocation size
+                        header->kind.real.blockSize &= -2;              // turn off 0 fill
+                        header->kind.real.size = size;                  // reset allocation size
                         return oaddr;
                 } // if
 …
         #endif // __CFA_DEBUG__
+        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
+  if ( unlikely( oaddr == 0p ) ) {
+                #ifdef __STATISTICS__
+                __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+                __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+                #endif // __STATISTICS__
+                return memalignNoStats( nalign, size );
+        } // if
         HeapManager.Storage.Header * header;
         HeapManager.FreeHeader * freeElem;
 …
         headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+        if ( oalign <= nalign && (uintptr_t)oaddr % nalign == 0 ) { // <= alignment and new alignment happens to match
+                if ( oalign > libAlign() ) {                                    // fake header ?
+                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+                } // if
+        // Attempt to reuse existing storage.
+        if ( unlikely ( ( header->kind.fake.alignment & 1 == 1 &&       // old fake header ?
+                                 (uintptr_t)oaddr % nalign == 0 &&                              // lucky match ?
+                                 header->kind.fake.alignment <= nalign &&               // ok to leave LSB at 1
+                                 nalign <= 128 )                                                                // not too much alignment storage wasted ?
+                        ||   ( header->kind.fake.alignment & 1 != 1 &&          // old real header ( aligned on libAlign ) ?
+                                 nalign == libAlign() ) ) ) {                                   // new alignment also on libAlign
+                headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
                 return realloc( oaddr, size );
         } // if
 …
         #endif // __STATISTICS__
-        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( unlikely( size == 0 ) ) { free( oaddr ); return 0p; } // special cases
-  if ( unlikely( oaddr == 0p ) ) return memalignNoStats( nalign, size );
         size_t osize = header->kind.real.size;                          // old allocation size
         bool ozfill = (header->kind.real.blockSize & 2) != 0; // old allocation zero filled
 …
         headers( "realloc", naddr, header, freeElem, bsize, oalign );
         memcpy( naddr, oaddr, MIN( osize, size ) );                     // copy bytes
+        memcpy( naddr, oaddr, min( osize, size ) );                     // copy bytes
         free( oaddr );

libcfa/src/stdlib.hfa

-              r56c44dc
+              rce55a81
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Aug 14 23:38:50 2020
 // Update Count     : 504
+// Last Modified On : Tue Sep  1 20:32:34 2020
+// Update Count     : 505
 //
 …
 // Macro because of returns
-#define $VAR_ALLOC( allocation, alignment ) \
-        if ( _Alignof(T) <= libAlign() ) return (T *)(void *)allocation( (size_t)sizeof(T) ); /* C allocation */ \
-        else return (T *)alignment( _Alignof(T), sizeof(T) )
 #define $ARRAY_ALLOC( allocation, alignment, dim ) \
         if ( _Alignof(T) <= libAlign() ) return (T *)(void *)allocation( dim, (size_t)sizeof(T) ); /* C allocation */ \
         else return (T *)alignment( _Alignof(T), dim, sizeof(T) )
-#define $RE_SPECIALS( ptr, size, allocation, alignment ) \
-        if ( unlikely( size == 0 ) || unlikely( ptr == 0p ) ) { \
-                if ( unlikely( size == 0 ) ) free( ptr ); \
-                $VAR_ALLOC( malloc, memalign ); \
-        } /* if */
 static inline forall( dtype T | sized(T) ) {
         // Cforall safe equivalents, i.e., implicit size specification
         T * malloc( void ) {
+                $VAR_ALLOC( malloc, memalign );
+                if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( (size_t)sizeof(T) ); // C allocation
+                else return (T *)memalign( _Alignof(T), sizeof(T) );
         } // malloc
 …
         T * resize( T * ptr, size_t size ) {                            // CFA resize, eliminate return-type cast
-                $RE_SPECIALS( ptr, size, malloc, memalign );
                 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)resize( (void *)ptr, size ); // CFA resize
                 else return (T *)(void *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize
 …
         T * realloc( T * ptr, size_t size ) {                           // CFA realloc, eliminate return-type cast
-                $RE_SPECIALS( ptr, size, malloc, memalign );
                 if ( _Alignof(T) <= libAlign() ) return (T *)(void *)realloc( (void *)ptr, size ); // C realloc
                 else return (T *)(void *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc
 …
                 size_t copy_end = 0;
                 if(Resize) {
                         ptr = (T*) (void *) resize( (int *)Resize, Align, Dim * size );
                 } else if (Realloc) {
+                if ( Resize ) {
+                        ptr = (T*) (void *) resize( (void *)Resize, Align, Dim * size );
+                } else if ( Realloc ) {
                         if (Fill.tag != '0') copy_end = min(malloc_size( Realloc ), Dim * size);
                         ptr = (T*) (void *) realloc( (int *)Realloc, Align, Dim * size );
+                        ptr = (T*) (void *) realloc( (void *)Realloc, Align, Dim * size );
                 } else {
                         ptr = (T*) (void *) memalign( Align, Dim * size );
 …
                         memset( (char *)ptr + copy_end, (int)Fill.c, Dim * size - copy_end );
                 } else if(Fill.tag == 't') {
                         for ( int i = copy_end; i <= Dim * size - size ; i += size ) {
+                        for ( int i = copy_end; i < Dim * size; i += size ) {
                                 memcpy( (char *)ptr + i, &Fill.t, size );
+                        }
 …
                         memcpy( (char *)ptr + copy_end, Fill.at, min(Dim * size - copy_end, Fill.size) );
                 } else if(Fill.tag == 'T') {
                         for ( int i = copy_end; i <= Dim * size - size ; i += size ) {
+                        for ( int i = copy_end; i < Dim * size; i += size ) {
                                 memcpy( (char *)ptr + i, Fill.at, size );
+                        }

src/AST/Convert.cpp

-              r56c44dc
+              rce55a81
                         new KeywordCastExpr(
                                 get<Expression>().accept1(node->arg),
+                                castTarget
+                                castTarget,
+                                {node->concrete_target.field, node->concrete_target.getter}
+                        )
                 );
 …
                                 old->location,
                                 GET_ACCEPT_1(arg, Expr),
+                                castTarget
+                                castTarget,
+                                {old->concrete_target.field, old->concrete_target.getter}
+                        )
                 );

src/AST/Copy.hpp

r56c44dc	rce55a81
21	21	#include "Stmt.hpp"
22	22	#include "Type.hpp"
	23	#include <unordered_set>
	24	#include <unordered_map>
23	25
24	26	namespace ast {

src/AST/Expr.hpp

-              r56c44dc
+              rce55a81
 public:
         ptr<Expr> arg;
+        struct Concrete {
+                std::string field;
+                std::string getter;
+                Concrete() = default;
+                Concrete(const Concrete &) = default;
+        };
         ast::AggregateDecl::Aggregate target;
+        Concrete concrete_target;
         KeywordCastExpr( const CodeLocation & loc, const Expr * a, ast::AggregateDecl::Aggregate t )
         : Expr( loc ), arg( a ), target( t ) {}
+        KeywordCastExpr( const CodeLocation & loc, const Expr * a, ast::AggregateDecl::Aggregate t, const Concrete & ct )
+        : Expr( loc ), arg( a ), target( t ), concrete_target( ct ) {}
         /// Get a name for the target type

src/AST/Pass.hpp

-              r56c44dc
+              rce55a81
 //
 // Several additional features are available through inheritance
+// | PureVisitor           - makes the visitor pure, it never modifies nodes in place and always
+//                           clones nodes it needs to make changes to
 // | WithTypeSubstitution  - provides polymorphic const TypeSubstitution * env for the
 //                           current expression
 …
 /// Keep track of the polymorphic const TypeSubstitution * env for the current expression
+/// If used the visitor will always clone nodes.
+struct PureVisitor {};
 struct WithConstTypeSubstitution {
         const TypeSubstitution * env = nullptr;

src/AST/Pass.impl.hpp

-              r56c44dc
+              rce55a81
 namespace ast {
+        template<typename node_t>
+        node_t * shallowCopy( const node_t * node );
         namespace __pass {
                 // Check if this is either a null pointer or a pointer to an empty container
 …
                 static inline bool empty( T * ptr ) {
                         return !ptr || ptr->empty();
+                }
+                template< typename core_t, typename node_t >
+                static inline node_t* mutate(const node_t *node) {
+                        return std::is_base_of<PureVisitor, core_t>::value ? ::ast::shallowCopy(node) : ::ast::mutate(node);
+                }
 …
                 if( __pass::differs(old_val, new_val) ) {
                         auto new_parent = mutate(parent);
+                        auto new_parent = __pass::mutate<core_t>(parent);
                         new_parent->*child = new_val;
                         parent = new_parent;
 …
                         if ( node->forall.empty() ) return;
                         node_t * mut = mutate( node );
+                        node_t * mut = __pass::mutate<core_t>( node );
                         mut->forall = subs->clone( node->forall, *this );
                         node = mut;
 …
                 if(mutated) {
                         auto n = mutate(node);
+                        auto n = __pass::mutate<core_t>(node);
                         n->clauses = std::move( new_clauses );
                         node = n;
 …
                         auto nval = call_accept( node->field ); \
                         if(nval != node->field ) { \
                                 auto nparent = mutate(node); \
+                                auto nparent = __pass::mutate<core_t>(node); \
                                 nparent->field = nval; \
                                 node = nparent; \
 …
                 if(mutated) {
                         auto n = mutate(node);
+                        auto n = __pass::mutate<core_t>(node);
                         n->associations = std::move( new_kids );
                         node = n;
 …
+                        }
                         if (mutated) {
                                 auto new_node = mutate( node );
+                                auto new_node = __pass::mutate<core_t>( node );
                                 new_node->typeEnv.swap( new_map );
                                 node = new_node;
 …
+                        }
                         if (mutated) {
                                 auto new_node = mutate( node );
+                                auto new_node = __pass::mutate<core_t>( node );
                                 new_node->varEnv.swap( new_map );
                                 node = new_node;

src/AST/Pass.proto.hpp

r56c44dc	rce55a81
22	22	template<typename core_t>
23	23	class Pass;
	24
	25	struct PureVisitor;
24	26
25	27	namespace __pass {

src/AST/Print.cpp

-              r56c44dc
+              rce55a81
 #include "Type.hpp"
 #include "TypeSubstitution.hpp"
+#include "CompilationState.h"
 #include "Common/utility.h" // for group_iterate
 …
                 if ( node->result ) {
+                        os << endl << indent << "... with resolved type:" << endl;
+                        ++indent;
+                        os << indent;
+                        node->result->accept( *this );
+                        --indent;
+                        if (!deterministic_output) {
+                                os << endl << indent << "... with resolved type:" << endl;
+                                ++indent;
+                                os << indent;
+                                node->result->accept( *this );
+                                --indent;
+                        }
+                }

src/AST/TypeSubstitution.hpp

-              r56c44dc
+              rce55a81
         template< typename SynTreeClass >
         struct ApplyResult {
-                // const SynTreeClass * node;
                 ast::ptr<SynTreeClass> node;
                 int count;
 …
 // definitition must happen after PassVisitor is included so that WithGuards can be used
 struct TypeSubstitution::Substituter : public WithGuards, public WithVisitorRef<Substituter> {
+struct TypeSubstitution::Substituter : public WithGuards, public WithVisitorRef<Substituter>, public PureVisitor {
                 static size_t traceId;
 …
         assert( input );
         Pass<Substituter> sub( *this, false );
         input = strict_dynamic_cast< const SynTreeClass * >( deepCopy(input)->accept( sub ) );
+        input = strict_dynamic_cast< const SynTreeClass * >( input->accept( sub ) );
         return { input, sub.core.subCount };
+}
 …
         assert( input );
         Pass<Substituter> sub( *this, true );
         input = strict_dynamic_cast< const SynTreeClass * >( deepCopy(input)->accept( sub ) );
+        input = strict_dynamic_cast< const SynTreeClass * >( input->accept( sub ) );
         return { input, sub.core.subCount };
+}

src/AST/module.mk

-              r56c44dc
+              rce55a81
 SRC_AST = \
         AST/AssertAcyclic.cpp \
+        AST/AssertAcyclic.hpp \
         AST/Attribute.cpp \
+        AST/Attribute.hpp \
+        AST/Bitfield.hpp \
+        AST/Chain.hpp \
         AST/Convert.cpp \
+        AST/Convert.hpp \
+        AST/Copy.hpp \
+        AST/CVQualifiers.hpp \
         AST/Decl.cpp \
+        AST/Decl.hpp \
         AST/DeclReplacer.cpp \
+        AST/DeclReplacer.hpp \
+        AST/Eval.hpp \
         AST/Expr.cpp \
+        AST/Expr.hpp \
         AST/ForallSubstitutionTable.cpp \
+        AST/ForallSubstitutionTable.hpp \
+        AST/ForallSubstitutor.hpp \
+        AST/FunctionSpec.hpp \
+        AST/Fwd.hpp \
         AST/GenericSubstitution.cpp \
+        AST/GenericSubstitution.hpp \
         AST/Init.cpp \
+        AST/Init.hpp \
+        AST/Label.hpp \
         AST/LinkageSpec.cpp \
+        AST/LinkageSpec.hpp \
         AST/Node.cpp \
+        AST/Node.hpp \
+        AST/ParseNode.hpp \
         AST/Pass.cpp \
+        AST/Pass.hpp \
+        AST/Pass.impl.hpp \
+        AST/Pass.proto.hpp \
         AST/Print.cpp \
+        AST/Print.hpp \
         AST/Stmt.cpp \
+        AST/Stmt.hpp \
+        AST/StorageClasses.hpp \
         AST/SymbolTable.cpp \
+        AST/SymbolTable.hpp \
         AST/Type.cpp \
+        AST/Type.hpp \
         AST/TypeEnvironment.cpp \
+        AST/TypeSubstitution.cpp
+        AST/TypeEnvironment.hpp \
+        AST/TypeSubstitution.cpp \
+        AST/TypeSubstitution.hpp \
+        AST/Visitor.hpp
 SRC += $(SRC_AST)

src/CodeGen/module.mk

-              r56c44dc
+              rce55a81
 SRC_CODEGEN = \
         CodeGen/CodeGenerator.cc \
+        CodeGen/CodeGenerator.h \
         CodeGen/FixMain.cc \
+        CodeGen/FixMain.h \
         CodeGen/GenType.cc \
+        CodeGen/OperatorTable.cc
+        CodeGen/GenType.h \
+        CodeGen/OperatorTable.cc \
+        CodeGen/OperatorTable.h \
+        CodeGen/Options.h
 SRC += $(SRC_CODEGEN) CodeGen/Generate.cc CodeGen/FixNames.cc
+SRC += $(SRC_CODEGEN) CodeGen/Generate.cc CodeGen/Generate.h CodeGen/FixNames.cc CodeGen/FixNames.h
 SRCDEMANGLE += $(SRC_CODEGEN)

src/CodeTools/module.mk

-              r56c44dc
+              rce55a81
 ###############################################################################
+SRC += CodeTools/DeclStats.cc \
+SRC += \
+        CodeTools/DeclStats.cc \
+        CodeTools/DeclStats.h \
         CodeTools/ResolvProtoDump.cc \
+        CodeTools/TrackLoc.cc
+        CodeTools/ResolvProtoDump.h \
+        CodeTools/TrackLoc.cc \
+        CodeTools/TrackLoc.h

src/Common/module.mk

-              r56c44dc
+              rce55a81
 SRC_COMMON = \
       Common/Assert.cc \
+      Common/CodeLocation.h \
+      Common/CompilerError.h \
+      Common/Debug.h \
+      Common/ErrorObjects.h \
       Common/Eval.cc \
+      Common/FilterCombos.h \
+      Common/Indenter.h \
       Common/PassVisitor.cc \
+      Common/PassVisitor.h \
+      Common/PassVisitor.impl.h \
+      Common/PassVisitor.proto.h \
+      Common/PersistentMap.h \
+      Common/ScopedMap.h \
       Common/SemanticError.cc \
+      Common/SemanticError.h \
+      Common/Stats.h \
+      Common/Stats/Base.h \
       Common/Stats/Counter.cc \
+      Common/Stats/Counter.h \
       Common/Stats/Heap.cc \
+      Common/Stats/Heap.h \
       Common/Stats/Stats.cc \
       Common/Stats/Time.cc \
+      Common/UniqueName.cc
+      Common/Stats/Time.h \
+      Common/UnimplementedError.h \
+      Common/UniqueName.cc \
+      Common/UniqueName.h \
+      Common/utility.h \
+      Common/VectorMap.h
 SRC += $(SRC_COMMON) Common/DebugMalloc.cc

src/Concurrency/module.mk

r56c44dc	rce55a81
15	15	###############################################################################
16	16
17		SRC += Concurrency/Keywords.cc Concurrency/~~Waitfor.cc~~
	17	SRC += Concurrency/Keywords.cc Concurrency/Keywords.h Concurrency/Waitfor.cc Concurrency/Waitfor.h
18	18	SRCDEMANGLE += Concurrency/Keywords.cc
19	19

src/ControlStruct/module.mk

-              r56c44dc
+              rce55a81
 SRC_CONTROLSTRUCT = \
         ControlStruct/ForExprMutator.cc \
+        ControlStruct/ForExprMutator.h \
         ControlStruct/LabelFixer.cc \
+        ControlStruct/LabelFixer.h \
         ControlStruct/LabelGenerator.cc \
+        ControlStruct/LabelGenerator.h \
         ControlStruct/MLEMutator.cc \
+        ControlStruct/Mutate.cc
+        ControlStruct/MLEMutator.h \
+        ControlStruct/Mutate.cc \
+        ControlStruct/Mutate.h
 SRC += $(SRC_CONTROLSTRUCT) ControlStruct/ExceptTranslate.cc
+SRC += $(SRC_CONTROLSTRUCT) ControlStruct/ExceptTranslate.cc ControlStruct/ExceptTranslate.h
 SRCDEMANGLE += $(SRC_CONTROLSTRUCT)

src/GenPoly/module.mk

-              r56c44dc
+              rce55a81
 SRC += GenPoly/Box.cc \
+       GenPoly/Box.h \
+       GenPoly/ErasableScopedMap.h \
+       GenPoly/FindFunction.cc \
+       GenPoly/FindFunction.h \
        GenPoly/GenPoly.cc \
+       GenPoly/GenPoly.h \
+       GenPoly/InstantiateGeneric.cc \
+       GenPoly/InstantiateGeneric.h \
+       GenPoly/Lvalue.cc \
+       GenPoly/Lvalue.h \
+       GenPoly/ScopedSet.h \
        GenPoly/ScrubTyVars.cc \
        GenPoly/Lvalue.cc \
+       GenPoly/ScrubTyVars.h \
        GenPoly/Specialize.cc \
+       GenPoly/FindFunction.cc \
+       GenPoly/InstantiateGeneric.cc
+       GenPoly/Specialize.h
 SRCDEMANGLE += GenPoly/GenPoly.cc GenPoly/Lvalue.cc
+SRCDEMANGLE += GenPoly/GenPoly.cc GenPoly/GenPoly.h GenPoly/Lvalue.cc GenPoly/Lvalue.h

src/InitTweak/module.mk

-              r56c44dc
+              rce55a81
 ###############################################################################
+SRC += InitTweak/GenInit.cc \
+SRC += \
+        InitTweak/FixGlobalInit.cc \
+        InitTweak/FixGlobalInit.h \
         InitTweak/FixInit.cc \
+        InitTweak/FixGlobalInit.cc \
+        InitTweak/InitTweak.cc
+        InitTweak/FixInit.h \
+        InitTweak/GenInit.cc \
+        InitTweak/GenInit.h \
+        InitTweak/InitTweak.cc \
+        InitTweak/InitTweak.h
+SRCDEMANGLE += InitTweak/GenInit.cc \
+        InitTweak/InitTweak.cc
+SRCDEMANGLE += \
+        InitTweak/GenInit.cc \
+        InitTweak/GenInit.h \
+        InitTweak/InitTweak.cc \
+        InitTweak/InitTweak.h

src/Makefile.am

-              r56c44dc
+              rce55a81
 SRC = main.cc \
+      CompilationState.cc \
+      CompilationState.h \
       MakeLibCfa.cc \
+      CompilationState.cc
+        MakeLibCfa.h
 SRCDEMANGLE = CompilationState.cc
 …
 ___driver_cfa_cpp_SOURCES = $(SRC)
 ___driver_cfa_cpp_LDADD = -ldl $(LIBPROFILER) $(LIBTCMALLOC)
+EXTRA_DIST = include/cassert include/optional BasicTypes-gen.cc
 AM_CXXFLAGS = @HOST_FLAGS@ -Wno-deprecated -Wall -Wextra -DDEBUG_ALL -I./Parser -I$(srcdir)/Parser -I$(srcdir)/include -DYY_NO_INPUT -O3 -g -std=c++14 $(TCMALLOCFLAG)

src/Parser/module.mk

-              r56c44dc
+              rce55a81
        Parser/ExpressionNode.cc \
        Parser/InitializerNode.cc \
+       Parser/lex.ll \
        Parser/ParseNode.cc \
+       Parser/ParseNode.h \
+       Parser/parser.yy \
+       Parser/ParserTypes.h \
+       Parser/parserutility.cc \
+       Parser/parserutility.h \
        Parser/StatementNode.cc \
        Parser/TypeData.cc \
+       Parser/TypeData.h \
        Parser/TypedefTable.cc \
+       Parser/lex.ll \
+       Parser/parser.yy \
+       Parser/parserutility.cc
+       Parser/TypedefTable.h
 MOSTLYCLEANFILES += Parser/lex.cc Parser/parser.cc Parser/parser.hh Parser/parser.output

src/ResolvExpr/CandidateFinder.cpp

-              r56c44dc
+              rce55a81
+                }
+                void postvisit( const ast::KeywordCastExpr * castExpr ) {
+                        const auto & loc = castExpr->location;
+                        assertf( castExpr->result, "Cast target should have been set in Validate." );
+                        auto ref = castExpr->result.strict_as<ast::ReferenceType>();
+                        auto inst = ref->base.strict_as<ast::StructInstType>();
+                        auto target = inst->base.get();
+                        CandidateFinder finder{ symtab, tenv };
+                        auto pick_alternatives = [target, this](CandidateList & found, bool expect_ref) {
+                                for(auto & cand : found) {
+                                        const ast::Type * expr = cand->expr->result.get();
+                                        if(expect_ref) {
+                                                auto res = dynamic_cast<const ast::ReferenceType*>(expr);
+                                                if(!res) { continue; }
+                                                expr = res->base.get();
+                                        }
+                                        if(auto insttype = dynamic_cast<const ast::TypeInstType*>(expr)) {
+                                                auto td = cand->env.lookup(insttype->name);
+                                                if(!td) { continue; }
+                                                expr = td->bound.get();
+                                        }
+                                        if(auto base = dynamic_cast<const ast::StructInstType*>(expr)) {
+                                                if(base->base == target) {
+                                                        candidates.push_back( std::move(cand) );
+                                                        reason.code = NoReason;
+                                                }
+                                        }
+                                }
+                        };
+                        try {
+                                // Attempt 1 : turn (thread&)X into ($thread&)X.__thrd
+                                // Clone is purely for memory management
+                                std::unique_ptr<const ast::Expr> tech1 { new ast::UntypedMemberExpr(loc, new ast::NameExpr(loc, castExpr->concrete_target.field), castExpr->arg) };
+                                // don't prune here, since it's guaranteed all alternatives will have the same type
+                                finder.find( tech1.get(), ResolvMode::withoutPrune() );
+                                pick_alternatives(finder.candidates, false);
+                                return;
+                        } catch(SemanticErrorException & ) {}
+                        // Fallback : turn (thread&)X into ($thread&)get_thread(X)
+                        std::unique_ptr<const ast::Expr> fallback { ast::UntypedExpr::createDeref(loc,  new ast::UntypedExpr(loc, new ast::NameExpr(loc, castExpr->concrete_target.getter), { castExpr->arg })) };
+                        // don't prune here, since it's guaranteed all alternatives will have the same type
+                        finder.find( fallback.get(), ResolvMode::withoutPrune() );
+                        pick_alternatives(finder.candidates, true);
+                        // Whatever happens here, we have no more fallbacks
+                }
                 void postvisit( const ast::UntypedMemberExpr * memberExpr ) {
                         CandidateFinder aggFinder{ symtab, tenv };

src/ResolvExpr/Unify.cc

-              r56c44dc
+              rce55a81
                 /// If this isn't done when satifying ttype assertions, then argument lists can have
                 /// different size and structure when they should be compatible.
                 struct TtypeExpander_new : public ast::WithShortCircuiting {
+                struct TtypeExpander_new : public ast::WithShortCircuiting, public ast::PureVisitor {
                         ast::TypeEnvironment & tenv;
 …
                                 // TtypeExpander pass is impure (may mutate nodes in place)
                                 // need to make nodes shared to prevent accidental mutation
+                                ast::ptr<ast::DeclWithType> dc = d;
+                                dc = dc->accept( expander );
+                                ast::ptr<ast::DeclWithType> dc = d->accept(expander);
                                 auto types = flatten( dc->get_type() );
                                 for ( ast::ptr< ast::Type > & t : types ) {
 …
                         ast::Pass<TtypeExpander_new> expander{ tenv };
+                        ast::ptr<ast::TupleType> tuplec = tuple;
+                        ast::ptr<ast::TupleType> tuple2c = tuple2;
+                        const ast::Type * flat = tuplec->accept( expander );
+                        const ast::Type * flat2 = tuple2c->accept( expander );
+                        const ast::Type * flat = tuple->accept( expander );
+                        const ast::Type * flat2 = tuple2->accept( expander );
                         auto types = flatten( flat );

src/ResolvExpr/module.mk

-              r56c44dc
+              rce55a81
       ResolvExpr/Alternative.cc \
       ResolvExpr/AlternativeFinder.cc \
+      ResolvExpr/AlternativeFinder.h \
+      ResolvExpr/Alternative.h \
       ResolvExpr/Candidate.cpp \
       ResolvExpr/CandidateFinder.cpp \
+      ResolvExpr/CandidateFinder.hpp \
+      ResolvExpr/Candidate.hpp \
       ResolvExpr/CastCost.cc \
       ResolvExpr/CommonType.cc \
       ResolvExpr/ConversionCost.cc \
+      ResolvExpr/ConversionCost.h \
+      ResolvExpr/Cost.h \
       ResolvExpr/CurrentObject.cc \
+      ResolvExpr/CurrentObject.h \
       ResolvExpr/ExplodedActual.cc \
+      ResolvExpr/ExplodedActual.h \
       ResolvExpr/ExplodedArg.cpp \
+      ResolvExpr/ExplodedArg.hpp \
       ResolvExpr/FindOpenVars.cc \
+      ResolvExpr/FindOpenVars.h \
       ResolvExpr/Occurs.cc \
       ResolvExpr/PolyCost.cc \
 …
       ResolvExpr/PtrsCastable.cc \
       ResolvExpr/RenameVars.cc \
+      ResolvExpr/RenameVars.h \
       ResolvExpr/ResolveAssertions.cc \
+      ResolvExpr/ResolveAssertions.h \
       ResolvExpr/Resolver.cc \
+      ResolvExpr/Resolver.h \
       ResolvExpr/ResolveTypeof.cc \
+      ResolvExpr/ResolveTypeof.h \
+      ResolvExpr/ResolvMode.h \
       ResolvExpr/SatisfyAssertions.cpp \
+      ResolvExpr/SatisfyAssertions.hpp \
       ResolvExpr/SpecCost.cc \
       ResolvExpr/TypeEnvironment.cc \
+      ResolvExpr/Unify.cc
+      ResolvExpr/TypeEnvironment.h \
+      ResolvExpr/typeops.h \
+      ResolvExpr/Unify.cc \
+      ResolvExpr/Unify.h \
+      ResolvExpr/WidenMode.h
+SRC += $(SRC_RESOLVEXPR) ResolvExpr/AlternativePrinter.cc
+SRC += $(SRC_RESOLVEXPR) ResolvExpr/AlternativePrinter.cc ResolvExpr/AlternativePrinter.h
 SRCDEMANGLE += $(SRC_RESOLVEXPR)

src/SymTab/module.mk

-              r56c44dc
+              rce55a81
 SRC_SYMTAB = \
       SymTab/Autogen.cc \
+      SymTab/Autogen.h \
       SymTab/FixFunction.cc \
+      SymTab/FixFunction.h \
       SymTab/Indexer.cc \
+      SymTab/Indexer.h \
       SymTab/Mangler.cc \
       SymTab/ManglerCommon.cc \
+      SymTab/Validate.cc
+      SymTab/Mangler.h \
+      SymTab/Validate.cc \
+      SymTab/Validate.h
 SRC += $(SRC_SYMTAB)

src/SynTree/Expression.cc

-              r56c44dc
+              rce55a81
 #include "Type.h"                    // for Type, BasicType, Type::Qualifiers
 #include "TypeSubstitution.h"        // for TypeSubstitution
+#include "CompilationState.h"        // for deterministic_output
 #include "GenPoly/Lvalue.h"
 …
         if ( result ) {
+                os << std::endl << indent << "with resolved type:" << std::endl;
+                os << (indent+1);
+                result->print( os, indent+1 );
+                if (!deterministic_output) {
+                        os << std::endl << indent << "with resolved type:" << std::endl;
+                        os << (indent+1);
+                        result->print( os, indent+1 );
+                }
+        }
 …
+}
+KeywordCastExpr::KeywordCastExpr( Expression * arg, AggregateDecl::Aggregate target ) : Expression(), arg(arg), target( target ) {
+}
+KeywordCastExpr::KeywordCastExpr( const KeywordCastExpr & other ) : Expression( other ), arg( maybeClone( other.arg ) ), target( other.target ) {
+}
+KeywordCastExpr::KeywordCastExpr( Expression * arg, AggregateDecl::Aggregate target ) : Expression(), arg(arg), target( target ) {}
+KeywordCastExpr::KeywordCastExpr( Expression * arg, AggregateDecl::Aggregate target, const KeywordCastExpr::Concrete & concrete_target ) : Expression(), arg(arg), target( target ), concrete_target(concrete_target) {}
+KeywordCastExpr::KeywordCastExpr( const KeywordCastExpr & other ) : Expression( other ), arg( maybeClone( other.arg ) ), target( other.target ) {}
 KeywordCastExpr::~KeywordCastExpr() {

src/SynTree/Expression.h

r56c44dc	rce55a81
248	248
249	249	KeywordCastExpr( Expression * arg, AggregateDecl::Aggregate target );
	250	KeywordCastExpr( Expression * arg, AggregateDecl::Aggregate target, const Concrete & concrete_target );
250	251	KeywordCastExpr( const KeywordCastExpr & other );
251	252	virtual ~KeywordCastExpr();

src/SynTree/module.mk

-              r56c44dc
+              rce55a81
       SynTree/ApplicationExpr.cc \
       SynTree/ArrayType.cc \
+      SynTree/Attribute.cc \
+      SynTree/Attribute.h \
       SynTree/AttrType.cc \
       SynTree/Attribute.cc \
+      SynTree/BaseSyntaxNode.h \
       SynTree/BasicType.cc \
       SynTree/CommaExpr.cc \
       SynTree/CompoundStmt.cc \
       SynTree/Constant.cc \
+      SynTree/Constant.h \
+      SynTree/Declaration.cc \
+      SynTree/Declaration.h \
+      SynTree/DeclarationWithType.cc \
       SynTree/DeclReplacer.cc \
+      SynTree/DeclReplacer.h \
       SynTree/DeclStmt.cc \
-      SynTree/Declaration.cc \
-      SynTree/DeclarationWithType.cc \
       SynTree/Expression.cc \
+      SynTree/Expression.h \
       SynTree/FunctionDecl.cc \
       SynTree/FunctionType.cc \
       SynTree/Initializer.cc \
+      SynTree/Initializer.h \
+      SynTree/Label.h \
       SynTree/LinkageSpec.cc \
+      SynTree/LinkageSpec.h \
+      SynTree/Mutator.h \
       SynTree/NamedTypeDecl.cc \
       SynTree/ObjectDecl.cc \
 …
       SynTree/ReferenceType.cc \
       SynTree/Statement.cc \
+      SynTree/Statement.h \
+      SynTree/SynTree.h \
       SynTree/TupleExpr.cc \
       SynTree/TupleType.cc \
 …
       SynTree/TypeDecl.cc \
       SynTree/TypeExpr.cc \
+      SynTree/Type.h \
+      SynTree/TypeofType.cc \
       SynTree/TypeSubstitution.cc \
       SynTree/TypeofType.cc \
+      SynTree/TypeSubstitution.h \
       SynTree/VarArgsType.cc \
+      SynTree/Visitor.h \
       SynTree/VoidType.cc \
       SynTree/ZeroOneType.cc

src/Tuples/module.mk

-              r56c44dc
+              rce55a81
 ###############################################################################
+SRC += Tuples/TupleAssignment.cc Tuples/TupleExpansion.cc Tuples/Explode.cc \
+        Tuples/Tuples.cc
+SRCDEMANGLE += Tuples/TupleAssignment.cc Tuples/TupleExpansion.cc Tuples/Explode.cc \
+        Tuples/Tuples.cc
+SRC_TUPLES = \
+        Tuples/Explode.cc \
+        Tuples/Explode.h \
+        Tuples/TupleAssignment.cc \
+        Tuples/TupleExpansion.cc \
+        Tuples/Tuples.cc \
+        Tuples/Tuples.h
+SRC += $(SRC_TUPLES)
+SRCDEMANGLE += $(SRC_TUPLES)

src/Validate/module.mk

r56c44dc	rce55a81
15	15	###############################################################################
16	16
17		SRC += Validate/HandleAttributes.cc Validate/~~FindSpecialDecls.cc~~
18		SRCDEMANGLE += Validate/HandleAttributes.cc Validate/~~FindSpecialDecls.cc~~
	17	SRC += Validate/HandleAttributes.cc Validate/HandleAttributes.h Validate/FindSpecialDecls.cc Validate/FindSpecialDecls.h
	18	SRCDEMANGLE += Validate/HandleAttributes.cc Validate/HandleAttributes.h Validate/FindSpecialDecls.cc Validate/FindSpecialDecls.h

src/Virtual/module.mk

r56c44dc	rce55a81
15	15	###############################################################################
16	16
17		SRC += Virtual/ExpandCasts.cc
	17	SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h

tests/.expect/alloc-ERROR.txt

-              r56c44dc
+              rce55a81
 alloc.cfa:362:1 error: No reasonable alternatives for expression Applying untyped:
+alloc.cfa:361:1 error: No reasonable alternatives for expression Applying untyped:
   Name: ?=?
 ...to:
 …
           Name: stp
-      with resolved type:
-        unsigned long int
 alloc.cfa:363:1 error: No reasonable alternatives for expression Applying untyped:
+alloc.cfa:362:1 error: No reasonable alternatives for expression Applying untyped:
   Name: ?=?
 ...to:
 …
     Name: stp
     constant expression (10 10: signed int)
-    with resolved type:
-      signed int
 alloc.cfa:364:1 error: No reasonable alternatives for expression Applying untyped:
+alloc.cfa:363:1 error: No reasonable alternatives for expression Applying untyped:
   Name: ?=?
 ...to:

tests/.expect/alloc.txt

-              r56c44dc
+              rce55a81
 xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
 CFA array alloc, fill 0xef
 xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef
+xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
 CFA array alloc, fill from array
 xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef, 0xefefefef 0xefefefef,
+xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef, 0xdeadbeef 0xdeadbeef,
 C realloc
 xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef
+xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
 CFA realloc
 xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101
+xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101
 CFA realloc array alloc

tests/.expect/castError.txt

-              r56c44dc
+              rce55a81
   Name: f
 ... to:
-  char
-with resolved type:
   char Alternatives are:
 Cost ( 1, 0, 0, 0, 0, 0, 0 ): Explicit Cast of:
 …
       ... returning nothing
-      with resolved type:
-        pointer to function
-          accepting unspecified arguments
-        ... returning nothing
     ... to:
-      char
-    with resolved type:
       char
   (types:
 …
 Cost ( 1, 0, 0, 0, 0, 0, 0 ): Explicit Cast of:
       Variable Expression: f: double
-      with resolved type:
-        double
     ... to:
-      char
-    with resolved type:
       char
   (types:
 …
 Cost ( 1, 0, 0, 0, 0, 0, 0 ): Explicit Cast of:
       Variable Expression: f: signed int
-      with resolved type:
-        signed int
     ... to:
-      char
-    with resolved type:
       char
   (types:
 …
   Comma Expression:
     constant expression (3 3: signed int)
-    with resolved type:
-      signed int
     Name: v
+... to: nothing
+with resolved type:
+  void  Alternatives are:
+... to: nothing Alternatives are:
 Cost ( 0, 0, 2, 0, 0, 0, 0 ): Generated Cast of:
       Comma Expression:
         constant expression (3 3: signed int)
-        with resolved type:
-          signed int
         Variable Expression: v: unsigned char
-        with resolved type:
-          unsigned char
-      with resolved type:
-        unsigned char
     ... to: nothing
-    with resolved type:
-      void
   (types:
     void
 …
       Comma Expression:
         constant expression (3 3: signed int)
-        with resolved type:
-          signed int
         Variable Expression: v: signed short int
-        with resolved type:
-          signed short int
-      with resolved type:
-        signed short int
     ... to: nothing
-    with resolved type:
-      void
   (types:
     void
 …
     char
-with resolved type:
-  instance of struct S with body 1
-  ... with parameters
-    char

tests/.expect/init1.txt

-              r56c44dc
+              rce55a81
 ... to:
   reference to signed int
-with resolved type:
-  reference to signed int
 init1.cfa:97:1 error: No reasonable alternatives for expression Applying untyped:
   Name: ?{}
 …
   Generated Cast of:
     Variable Expression: _retval_f_py: pointer to signed int
-    with resolved type:
-      pointer to signed int
   ... to:
-    reference to pointer to signed int
-  with resolved type:
     reference to pointer to signed int
   Name: px
 …
 ... to:
   reference to float
-with resolved type:
-  reference to float
 init1.cfa:107:1 error: No reasonable alternatives for expression Applying untyped:
   Name: ?{}
 …
   Generated Cast of:
     Variable Expression: _retval_f_py2: pointer to float
-    with resolved type:
-      pointer to float
   ... to:
-    reference to pointer to float
-  with resolved type:
     reference to pointer to float
   Name: cpx
 …
 ... to:
   reference to instance of type T (not function type)
-with resolved type:
-  reference to instance of type T (not function type)
 init1.cfa:118:1 error: No reasonable alternatives for expression Applying untyped:
   Name: ?{}
 …
   Generated Cast of:
     Variable Expression: _retval_anycvt: pointer to instance of type T (not function type)
-    with resolved type:
-      pointer to instance of type T (not function type)
   ... to:
-    reference to pointer to instance of type T (not function type)
-  with resolved type:
     reference to pointer to instance of type T (not function type)
   Name: s

tests/Makefile.am

r56c44dc	rce55a81
67	67	avl_test_SOURCES = avltree/avl_test.cfa avltree/avl0.cfa avltree/avl1.cfa avltree/avl2.cfa avltree/avl3.cfa avltree/avl4.cfa avltree/avl-private.cfa
68	68	# automake doesn't know we still need C/CPP rules so pretend like we have a C program
69		_dummy_hack_SOURCES = .dummy_hack.c .dummy_hackxx.cpp
	69	nodist__dummy_hack_SOURCES = .dummy_hack.c .dummy_hackxx.cpp
70	70
71	71	#----------------------------------------------------------------------------------------------------------------

tests/alloc.cfa

-              r56c44dc
+              rce55a81
         free( ip );
         ip = alloc_set( fill );                                                         // CFA alloc, fill
+        ip = alloc( fill`fill );                                                                // CFA alloc, fill
         printf( "CFA alloc, fill %08x\n", *ip );
         free( ip );
         ip = alloc_set( 3 );                                                            // CFA alloc, fill
+        ip = alloc( 3`fill );                                                           // CFA alloc, fill
         printf( "CFA alloc, fill %d\n", *ip );
         free( ip );
 …
         free( ip );
         ip = alloc_set( 2 * dim, fill );                                        // CFA array alloc, fill
+        ip = alloc( 2 * dim, fill`fill );                                       // CFA array alloc, fill
         printf( "CFA array alloc, fill %#hhx\n", fill );
         for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
 …
         free( ip );
         ip = alloc_set( 2 * dim, 0xdeadbeef );                          // CFA array alloc, fill
+        ip = alloc( 2 * dim, ((int)0xdeadbeef)`fill );                          // CFA array alloc, fill
         printf( "CFA array alloc, fill %#hhx\n", 0xdeadbeef );
         for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
         ip1 = alloc_set( 2 * dim, ip, 2 * dim );                                // CFA array alloc, fill
+        ip1 = alloc( 2 * dim, [ip, 2 * dim]`fill );                             // CFA array alloc, fill
         printf( "CFA array alloc, fill from array\n" );
         for ( i; 2 * dim ) { printf( "%#x %#x, ", ip[i], ip1[i] ); }
 …
         printf( "\n" );
         ip = alloc( ip, dim );                                                          // CFA realloc array alloc
+        ip = alloc( dim, ip`realloc );                                                          // CFA realloc array alloc
         for ( i; dim ) { ip[i] = 0xdeadbeef; }
         printf( "CFA realloc array alloc\n" );
 …
         // do not free
         ip = alloc( ip, 2 * dim );                                                      // CFA realloc array alloc
+        ip = alloc( 2 * dim, ip`realloc );                                                      // CFA realloc array alloc
         for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }         // fill upper part
         printf( "CFA realloc array alloc\n" );
 …
         // do not free
         ip = alloc( ip, dim );                                                          // CFA realloc array alloc
+        ip = alloc( dim, ip`realloc );                                                          // CFA realloc array alloc
         printf( "CFA realloc array alloc\n" );
         for ( i; dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
         ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
+        ip = alloc( 3 * dim, ip`realloc, fill`fill );                           // CFA realloc array alloc, fill
         printf( "CFA realloc array alloc, fill\n" );
         for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
         ip = alloc_set( ip, dim, fill );                                        // CFA realloc array alloc, fill
+        ip = alloc( dim, ip`realloc, fill`fill );                                       // CFA realloc array alloc, fill
         printf( "CFA realloc array alloc, fill\n" );
         for ( i; dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
         ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
+        ip = alloc( 3 * dim, ip`realloc, fill`fill );                           // CFA realloc array alloc, fill
         printf( "CFA realloc array alloc, fill\n" );
         for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
 #if 0 // FIX ME
         ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
+        ip = alloc( 5 * dim, ip`realloc, 5`fill );                                      // CFA realloc array alloc, 5
         printf( "CFA realloc array alloc, 5\n" );
         for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
         ip = alloc_set( ip, dim, 5 );                                           // CFA realloc array alloc, 5
+        ip = alloc( dim, ip`realloc, 5`fill );                                          // CFA realloc array alloc, 5
         printf( "CFA realloc array alloc, 5\n" );
         for ( i; dim ) { printf( "%#x ", ip[i] ); }
 …
         // do not free
         ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
+        ip = alloc( 5 * dim, ip`realloc, 5`fill );                                      // CFA realloc array alloc, 5
         printf( "CFA realloc array alloc, 5\n" );
         for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
 …
     ip = alloc();
         *ip = 5;
     double * dp = alloc( ip );
+    double * dp = alloc( ip`resize );
         *dp = 5.5;
     S * sp = alloc( dp );
+    S * sp = alloc( dp`resize );
         *sp = (S){ {0, 1, 2, 3, 4} };
     ip = alloc( sp );
+    ip = alloc( sp`resize );
         *ip = 3;
     free( ip );
 …
     ip = alloc( 5 );
         for ( i; 5 ) { ip[i] = 5; }
     dp = alloc( ip, 5 );
+    dp = alloc( 5, ip`resize );
         for ( i; 5 ) { dp[i] = 5.5; }
     sp = alloc( dp, 5 );
+    sp = alloc( 5, dp`resize );
         for ( i; 5 ) { sp[i] = (S){ {0, 1, 2, 3, 4} }; }
     ip = alloc( sp, 3 );
+    ip = alloc( 3, sp`resize );
         for ( i; 3 ) { ip[i] = 3; }
     ip = alloc( ip, 7 );
+    ip = alloc( 7, ip`realloc );
         for ( i; 7 ) { ip[i] = 7; }
     ip = alloc( ip, 7, false );
+    ip = alloc( 7, ip`resize );
         for ( i; 7 ) { ip[i] = 7; }
     free( ip );
 …
         free( stp );
         stp = &(*alloc_align( Alignment)){ 42, 42.5 };          // CFA alloc_align
+        stp = &(*alloc( Alignment`align)){ 42, 42.5 };          // CFA alloc_align
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA alloc_align %d %g\n", stp->x, stp->y );
         free( stp );
         stp = &(*alloc_align( Alignment )){ 42, 42.5 };         // CFA alloc_align
+        stp = &(*alloc( Alignment`align )){ 42, 42.5 };         // CFA alloc_align
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA alloc_align %d %g\n", stp->x, stp->y );
         free( stp );
         stp = alloc_align_set( Alignment, fill );                       // CFA memalign, fill
+        stp = alloc( Alignment`align, fill`fill );                      // CFA memalign, fill
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA alloc_align fill %#x %a\n", stp->x, stp->y );
         free( stp );
         stp = alloc_align_set( Alignment, (Struct){ 42, 42.5 } ); // CFA memalign, fill
+        stp = alloc( Alignment`align, (Struct){ 42, 42.5 }`fill ); // CFA memalign, fill
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA alloc_align fill %d %g\n", stp->x, stp->y );
         // do not free
         stp = &(*alloc_align( stp, 4096 )){ 42, 42.5 };         // CFA realign
+        stp = &(*alloc( stp`realloc, 4096`align )){ 42, 42.5 };         // CFA realign
         assert( (uintptr_t)stp % 4096 == 0 );
         printf( "CFA alloc_align %d %g\n", stp->x, stp->y );
 …
         printf( "\n" );
         stp = alloc_align( Alignment, dim );                // CFA array memalign
+        stp = alloc( dim, Alignment`align );                // CFA array memalign
         assert( (uintptr_t)stp % Alignment == 0 );
         for ( i; dim ) { stp[i] = (Struct){ 42, 42.5 }; }
 …
         free( stp );
         stp = alloc_align_set( Alignment, dim, fill );          // CFA array memalign, fill
+        stp = alloc( dim, Alignment`align, fill`fill );         // CFA array memalign, fill
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA array alloc_align, fill\n" );
 …
         free( stp );
         stp = alloc_align_set( Alignment, dim, (Struct){ 42, 42.5 } ); // CFA array memalign, fill
+        stp = alloc( dim, Alignment`align, ((Struct){ 42, 42.5 })`fill ); // CFA array memalign, fill
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA array alloc_align, fill\n" );
 …
         // do not free
         stp1 = alloc_align_set( Alignment, dim, stp, dim );     // CFA array memalign, fill
+        stp1 = alloc( dim, Alignment`align, [stp, dim]`fill );  // CFA array memalign, fill
         assert( (uintptr_t)stp % Alignment == 0 );
         printf( "CFA array alloc_align, fill array\n" );
 …
         free( stp1 );
         stp = alloc_align( stp, 4096, dim );                            // CFA aligned realloc array
+        stp = alloc( dim, stp`realloc, 4096`align );                            // CFA aligned realloc array
         assert( (uintptr_t)stp % 4096 == 0 );
         for ( i; dim ) { stp[i] = (Struct){ 42, 42.5 }; }
 …
         for ( i; dim ) { printf( "%#x %a, ", sta1[i].x, sta1[i].y ); }
         printf( "\n" );
         // new, non-array types

tests/concurrent/examples/boundedBufferEXT.cfa

-              r56c44dc
+              rce55a81
 //
 // Cforall Version 1.0.0 Copyright (C) 2018 University of Waterloo
 //
+//
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 …
+}
+enum { Prods = 4, Cons = 5 };
+Producer * prods[Prods];
+Consumer * cons[Cons];
 int main() {
         Buffer(int) buffer;
-        enum { Prods = 4, Cons = 5 };
-        Producer * prods[Prods];
-        Consumer * cons[Cons];
         int sums[Cons];
         int i;

tests/errors/.expect/completeType.x64.txt

-              r56c44dc
+              rce55a81
     Name: x
+... to: nothing
+with resolved type:
+  void  Alternatives are:
+... to: nothing Alternatives are:
 Cost ( 0, 1, 2, 0, 1, -1, 0 ): Generated Cast of:
       Application of
 …
-        with resolved type:
-          pointer to forall
-            _90_4_DT: data type
-            function
-          ... with parameters
-            intrinsic pointer to instance of type _90_4_DT (not function type)
-          ... returning
-            _retval__operator_deref: reference to instance of type _90_4_DT (not function type)
-            ... with attributes:
-              Attribute with name: unused
       ... to arguments
         Variable Expression: x: pointer to instance of struct A with body 0
-        with resolved type:
-          pointer to instance of struct A with body 0
-      with resolved type:
-        reference to instance of struct A with body 0
     ... to: nothing
-    with resolved type:
-      void
   (types:
     void
 …
-        with resolved type:
-          pointer to forall
-            _90_4_DT: data type
-            function
-          ... with parameters
-            intrinsic pointer to instance of type _90_4_DT (not function type)
-          ... returning
-            _retval__operator_deref: reference to instance of type _90_4_DT (not function type)
-            ... with attributes:
-              Attribute with name: unused
       ... to arguments
         Variable Expression: x: pointer to instance of struct B with body 1
-        with resolved type:
-          pointer to instance of struct B with body 1
-      with resolved type:
-        reference to instance of struct B with body 1
     ... to: nothing
-    with resolved type:
-      void
   (types:
     void
 …
             ... returning nothing
-            with resolved type:
-              pointer to forall
-                _109_0_T: sized data type
-                ... with assertions
-                  ?=?: pointer to function
-                  ... with parameters
-                    reference to instance of type _109_0_T (not function type)
-                    instance of type _109_0_T (not function type)
-                  ... returning
-                    _retval__operator_assign: instance of type _109_0_T (not function type)
-                    ... with attributes:
-                      Attribute with name: unused
-                  ?{}: pointer to function
-                  ... with parameters
-                    reference to instance of type _109_0_T (not function type)
-                  ... returning nothing
-                  ?{}: pointer to function
-                  ... with parameters
-                    reference to instance of type _109_0_T (not function type)
-                    instance of type _109_0_T (not function type)
-                  ... returning nothing
-                  ^?{}: pointer to function
-                  ... with parameters
-                    reference to instance of type _109_0_T (not function type)
-                  ... returning nothing
-                function
-              ... with parameters
-                pointer to instance of type _109_0_T (not function type)
-              ... returning nothing
           ... to arguments
             Variable Expression: z: pointer to instance of type T (not function type)
-            with resolved type:
-              pointer to instance of type T (not function type)
-          with resolved type:
-            void
         (types:
           void

tests/errors/.expect/completeType.x86.txt

-              r56c44dc
+              rce55a81
     Name: x
+... to: nothing
+with resolved type:
+  void  Alternatives are:
+... to: nothing Alternatives are:
 Cost ( 0, 1, 2, 0, 1, -1, 0 ): Generated Cast of:
       Application of
 …
-        with resolved type:
-          pointer to forall
-            _89_4_DT: data type
-            function
-          ... with parameters
-            intrinsic pointer to instance of type _89_4_DT (not function type)
-          ... returning
-            _retval__operator_deref: reference to instance of type _89_4_DT (not function type)
-            ... with attributes:
-              Attribute with name: unused
       ... to arguments
         Variable Expression: x: pointer to instance of struct A with body 0
-        with resolved type:
-          pointer to instance of struct A with body 0
-      with resolved type:
-        reference to instance of struct A with body 0
     ... to: nothing
-    with resolved type:
-      void
   (types:
     void
 …
-        with resolved type:
-          pointer to forall
-            _89_4_DT: data type
-            function
-          ... with parameters
-            intrinsic pointer to instance of type _89_4_DT (not function type)
-          ... returning
-            _retval__operator_deref: reference to instance of type _89_4_DT (not function type)
-            ... with attributes:
-              Attribute with name: unused
       ... to arguments
         Variable Expression: x: pointer to instance of struct B with body 1
-        with resolved type:
-          pointer to instance of struct B with body 1
-      with resolved type:
-        reference to instance of struct B with body 1
     ... to: nothing
-    with resolved type:
-      void
   (types:
     void
 …
             ... returning nothing
-            with resolved type:
-              pointer to forall
-                _108_0_T: sized data type
-                ... with assertions
-                  ?=?: pointer to function
-                  ... with parameters
-                    reference to instance of type _108_0_T (not function type)
-                    instance of type _108_0_T (not function type)
-                  ... returning
-                    _retval__operator_assign: instance of type _108_0_T (not function type)
-                    ... with attributes:
-                      Attribute with name: unused
-                  ?{}: pointer to function
-                  ... with parameters
-                    reference to instance of type _108_0_T (not function type)
-                  ... returning nothing
-                  ?{}: pointer to function
-                  ... with parameters
-                    reference to instance of type _108_0_T (not function type)
-                    instance of type _108_0_T (not function type)
-                  ... returning nothing
-                  ^?{}: pointer to function
-                  ... with parameters
-                    reference to instance of type _108_0_T (not function type)
-                  ... returning nothing
-                function
-              ... with parameters
-                pointer to instance of type _108_0_T (not function type)
-              ... returning nothing
           ... to arguments
             Variable Expression: z: pointer to instance of type T (not function type)
-            with resolved type:
-              pointer to instance of type T (not function type)
-          with resolved type:
-            void
         (types:
           void

tests/literals.cfa

-              r56c44dc
+              rce55a81
 // Created On       : Sat Sep  9 16:34:38 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Aug 20 13:51:12 2020
 // Update Count     : 225
+// Last Modified On : Sat Aug 29 10:57:56 2020
+// Update Count     : 226
 //
 …
         -0X0123456789.0123456789P-09;  -0X0123456789.0123456789P-09f;  -0X0123456789.0123456789P-09l;  -0X0123456789.0123456789P-09F;  -0X0123456789.0123456789P-09L;
+#if defined( __i386 ) || defined( __x86_64 )
 #if defined(__GNUC__) && __GNUC_PREREQ(7,0)                             // gcc version >= 7
 // floating with length, gcc f16/f128x unsupported and no prelude code for any _FloatXXx, so they work by conversion to long double
 …
         /* -0x123456789.0123456789P-09F16; */  -0x123456789.0123456789P-09F32;  -0x123456789.0123456789P-09F32x;  -0x123456789.0123456789P-09F64;  -0x123456789.0123456789P-09F64x;  -0x123456789.0123456789P-09W;  -0x123456789.0123456789P-09F128;  -0x123456789.0123456789P-09q;  /* -0x123456789.0123456789P-09q; */
 #endif // __GNUC_PREREQ(7,0)
+#endif // __i386 ) || __x86_64
 #ifdef __CFA__

tests/pybin/tools.py

r56c44dc	rce55a81
238	238	# helper function to check if a files contains only a specific string
239	239	def file_contains_only(file, text) :
240		with open(file~~) as f:~~
	240	with open(file, encoding="latin-1") as f: # use latin-1 so all chars mean something.
241	241	ff = f.read().strip()
242	242	result = ff == text.strip()

tests/raii/.expect/ctor-autogen-ERR1.txt

-              r56c44dc
+              rce55a81
         x: signed int
       ... returning nothing
-      with resolved type:
-        function
-        ... with parameters
-          _dst: reference to instance of struct Managed with body 1
-          x: signed int
-        ... returning nothing
       ... deleted by: ?{}: function
 …
-              with resolved type:
-                pointer to function
-                ... with parameters
-                  intrinsic reference to signed int
-                  intrinsic signed int
-                ... returning
-                  _retval__operator_assign: signed int
-                  ... with attributes:
-                    Attribute with name: unused
             ... to arguments
               Generated Cast of:
 …
                   Generated Cast of:
                     Variable Expression: m: reference to instance of struct Managed with body 1
-                    with resolved type:
-                      reference to instance of struct Managed with body 1
                   ... to:
                     instance of struct Managed with body 1
-                  with resolved type:
-                    instance of struct Managed with body 1
-                with resolved type:
-                  signed int
               ... to:
-                reference to signed int
-              with resolved type:
                 reference to signed int
               Generated Cast of:
                 constant expression (0 0: zero_t)
-                with resolved type:
-                  zero_t
               ... to:
                 signed int
-              with resolved type:
-                signed int
-            with resolved type:
-              signed int
             ... with environment:
               Types:
 …
     Generated Cast of:
       Variable Expression: x: instance of struct Managed with body 1
-      with resolved type:
-        instance of struct Managed with body 1
     ... to:
       reference to instance of struct Managed with body 1
-    with resolved type:
-      reference to instance of struct Managed with body 1
     constant expression (123 123: signed int)
-    with resolved type:
-      signed int
-  with resolved type:
-    void
 ... to: nothing
-with resolved type:
-  void

tests/test.py

-              r56c44dc
+              rce55a81
         parser.add_argument('--arch', help='Test for specific architecture', type=comma_separated(str), default=None)
         parser.add_argument('--continue', help='When multiple specifications are passed (debug/install/arch), sets whether or not to continue if the last specification failed', type=yes_no, default='yes', dest='continue_')
         parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=60)
+        parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=120)
         parser.add_argument('--global-timeout', help='Maximum cumulative duration in seconds after the ALL tests are considered to have timed out', type=int, default=7200)
         parser.add_argument('--timeout-with-gdb', help='Instead of killing the command when it times out, orphan it and print process id to allow gdb to attach', type=yes_no, default="no")
 …
                 if success(retcode):
                         if settings.generating :
                                 # if we are ounly generating the output we still need to check that the test actually exists
+                                # if we are only generating the output we still need to check that the test actually exists
                                 if no_rule(out_file, test.target()) :
                                         retcode = 1

tests/warnings/.expect/self-assignment.txt

-              r56c44dc
+              rce55a81
 warnings/self-assignment.cfa:29:1 warning: self assignment of expression: Generated Cast of:
   Variable Expression: j: signed int
-  with resolved type:
-    signed int
 ... to:
-  reference to signed int
-with resolved type:
   reference to signed int
 warnings/self-assignment.cfa:30:1 warning: self assignment of expression: Generated Cast of:
   Variable Expression: s: instance of struct S with body 1
-  with resolved type:
-    instance of struct S with body 1
 ... to:
-  reference to instance of struct S with body 1
-with resolved type:
   reference to instance of struct S with body 1
 warnings/self-assignment.cfa:31:1 warning: self assignment of expression: Generated Cast of:
 …
   ... from aggregate:
     Variable Expression: s: instance of struct S with body 1
-    with resolved type:
-      instance of struct S with body 1
-  with resolved type:
-    signed int
 ... to:
-  reference to signed int
-with resolved type:
   reference to signed int
 warnings/self-assignment.cfa:32:1 warning: self assignment of expression: Generated Cast of:
 …
     ... from aggregate:
       Variable Expression: t: instance of struct T with body 1
-      with resolved type:
-        instance of struct T with body 1
-    with resolved type:
-      instance of struct S with body 1
-  with resolved type:
-    signed int
 ... to:
   reference to signed int
-with resolved type:
-  reference to signed int

tools/Makefile.am

-              r56c44dc
+              rce55a81
 ACLOCAL_AMFLAGS  = -I automake
+AM_CFLAGS = -Wall -Wextra -O2 -g
+EXTRA_DIST = build/distcc_hash build/push2dist.sh
 noinst_PROGRAMS = busy catchsig repeat watchdog
+AM_CFLAGS = -Wall -Wextra -O2 -g
+busy_LDFLAGS     = -pthread
+busy_SOURCES     = busy.c
+busy_LDFLAGS     = -pthread
+catchsig_SOURCES = catchsig.c
+repeat_SOURCES   = repeat.c
+watchdog_SOURCES = watchdog.c
+nodist_busy_SOURCES     = busy.c
+nodist_catchsig_SOURCES = catchsig.c
+nodist_repeat_SOURCES   = repeat.c
+nodist_watchdog_SOURCES = watchdog.c

tools/prettyprinter/Makefile.am

r56c44dc	rce55a81
30	30	tools_prettyprinter_PROGRAMS = pretty
31	31	tools_prettyprinterdir = ../
32		pretty_SOURCES = ${SRC}
	32	nodist_pretty_SOURCES = ${SRC}
33	33	pretty_LDADD = ${LEXLIB} -ldl # yywrap
34	34	pretty_CXXFLAGS = -Wno-deprecated -Wall -DYY_NO_INPUT -O2 -g -std=c++14

Context Navigation

Legend:

Download in other formats: