Index: Jenkins/FullBuild
===================================================================
--- Jenkins/FullBuild	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ Jenkins/FullBuild	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -17,6 +17,6 @@
 
 				parallel (
-					clang_x86: { trigger_build( 'gcc-8',   'x86' ) },
-					gcc_5_x86: { trigger_build( 'gcc-7',   'x86' ) },
+					gcc_8_x86: { trigger_build( 'gcc-8',   'x86' ) },
+					gcc_7_x86: { trigger_build( 'gcc-7',   'x86' ) },
 					gcc_6_x86: { trigger_build( 'gcc-6',   'x86' ) },
 					gcc_9_x64: { trigger_build( 'gcc-9',   'x64' ) },
Index: Jenkinsfile
===================================================================
--- Jenkinsfile	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ Jenkinsfile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -392,5 +392,5 @@
 			break
 			case 'clang':
-				this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6', '-flto=thin -flto-jobs=0')
+				this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-9', '-flto=thin -flto-jobs=0')
 			break
 			default :
Index: benchmark/Makefile.am
===================================================================
--- benchmark/Makefile.am	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/Makefile.am	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -113,6 +113,4 @@
 creation_cfa_generator_DURATION = 1000000000
 creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
-creation_cfa_thread_DURATION = 10000000
-creation_upp_thread_DURATION = ${creation_cfa_thread_DURATION}
 creation_DURATION = 10000000
 
@@ -148,5 +146,5 @@
 
 cleancsv:
-	rm -f compile.csv basic.csv ctxswitch.csv mutex.csv scheduling.csv
+	rm -f compile.csv basic.csv ctxswitch.csv mutex.csv schedint.csv
 
 jenkins$(EXEEXT): cleancsv
@@ -159,6 +157,6 @@
 	+make mutex.csv
 	-+make mutex.diff.csv
-	+make scheduling.csv
-	-+make scheduling.diff.csv
+	+make schedint.csv
+	-+make schedint.diff.csv
 @DOifskipcompile@
 	cat compile.csv
@@ -169,6 +167,6 @@
 	cat mutex.csv
 	-cat mutex.diff.csv
-	cat scheduling.csv
-	-cat scheduling.diff.csv
+	cat schedint.csv
+	-cat schedint.diff.csv
 
 compile.csv:
@@ -200,5 +198,5 @@
 	$(srcdir)/fixcsv.sh $@
 
-scheduling.csv:
+schedint.csv:
 	echo "building $@"
 	echo "schedint-1,schedint-2,schedext-1,schedext-2" > $@
@@ -291,15 +289,15 @@
 ctxswitch-python_coroutine$(EXEEXT):
 	$(BENCH_V_PY)echo "#!/bin/sh" > a.out
-	echo "python3.7 $(srcdir)/ctxswitch/python_cor.py" >> a.out
+	echo "python3 $(srcdir)/ctxswitch/python_cor.py \"$$""@\"" >> a.out
 	chmod a+x a.out
 
 ctxswitch-nodejs_coroutine$(EXEEXT):
 	$(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
-	echo "nodejs $(srcdir)/ctxswitch/node_cor.js" >> a.out
+	echo "nodejs $(srcdir)/ctxswitch/node_cor.js \"$$""@\"" >> a.out
 	chmod a+x a.out
 
 ctxswitch-nodejs_await$(EXEEXT):
 	$(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
-	echo "nodejs $(srcdir)/ctxswitch/node_await.js" >> a.out
+	echo "nodejs $(srcdir)/ctxswitch/node_await.js \"$$""@\"" >> a.out
 	chmod a+x a.out
 
@@ -313,5 +311,5 @@
 	$(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
 	echo "#!/bin/sh" > a.out
-	echo "java JavaThread" >> a.out
+	echo "java JavaThread \"$$""@\"" >> a.out
 	chmod a+x a.out
 
@@ -355,5 +353,5 @@
 	$(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
 	echo "#!/bin/sh" > a.out
-	echo "java JavaThread" >> a.out
+	echo "java JavaThread \"$$""@\"" >> a.out
 	chmod a+x a.out
 
@@ -387,5 +385,5 @@
 	$(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
 	echo "#!/bin/sh" > a.out
-	echo "java JavaThread" >> a.out
+	echo "java JavaThread \"$$""@\"" >> a.out
 	chmod a+x a.out
 
@@ -454,10 +452,10 @@
 creation-python_coroutine$(EXEEXT):
 	$(BENCH_V_PY)echo "#!/bin/sh" > a.out
-	echo "python3.7 $(srcdir)/creation/python_cor.py" >> a.out
+	echo "python3 $(srcdir)/creation/python_cor.py \"$$""@\"" >> a.out
 	chmod a+x a.out
 
 creation-nodejs_coroutine$(EXEEXT):
 	$(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
-	echo "nodejs $(srcdir)/creation/node_cor.js" >> a.out
+	echo "nodejs $(srcdir)/creation/node_cor.js \"$$""@\"" >> a.out
 	chmod a+x a.out
 
@@ -471,5 +469,5 @@
 	$(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
 	echo "#!/bin/sh" > a.out
-	echo "java JavaThread" >> a.out
+	echo "java JavaThread \"$$""@\"" >> a.out
 	chmod a+x a.out
 
@@ -492,29 +490,29 @@
 
 compile-array$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/array.cfa
 
 compile-attributes$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/attributes.cfa
 
 compile-empty$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(srcdir)/compile/empty.cfa
 
 compile-expression$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/expression.cfa
 
 compile-io$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/io1.cfa
 
 compile-monitor$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
 
 compile-operators$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/operators.cfa
 
 compile-thread$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/concurrent/thread.cfa
 
 compile-typeof$(EXEEXT):
-	$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
+	$(CFACOMPILE) -DNO_COMPILED_PRAGMA -fsyntax-only -w $(testdir)/typeof.cfa
 
 ## =========================================================================================================
Index: benchmark/creation/JavaThread.java
===================================================================
--- benchmark/creation/JavaThread.java	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/creation/JavaThread.java	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -47,6 +47,6 @@
 	}
 	public static void main(String[] args) throws InterruptedException {
-		if ( args.length > 2 ) System.exit( 1 );
-		if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+		if ( args.length > 1 ) System.exit( 1 );
+		if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
 
 		for (int i = Integer.parseInt("5"); --i >= 0 ; ) {
Index: benchmark/ctxswitch/JavaThread.java
===================================================================
--- benchmark/ctxswitch/JavaThread.java	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/ctxswitch/JavaThread.java	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -40,6 +40,6 @@
 	}
 	public static void main(String[] args) throws InterruptedException {
-		if ( args.length > 2 ) System.exit( 1 );
-		if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+		if ( args.length > 1 ) System.exit( 1 );
+		if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
 
 		for (int i = Integer.parseInt("5"); --i >= 0 ; ) {
Index: benchmark/io/http/main.cfa
===================================================================
--- benchmark/io/http/main.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/io/http/main.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -125,5 +125,5 @@
 						workers[i].flags   = 0;
 					}
-					unpark( workers[i] __cfaabi_dbg_ctx2 );
+					unpark( workers[i] );
 				}
 				printf("%d workers started on %d processors\n", options.clopts.nworkers, options.clopts.nprocs);
Index: benchmark/io/http/worker.cfa
===================================================================
--- benchmark/io/http/worker.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/io/http/worker.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -22,5 +22,5 @@
 
 void main( Worker & this ) {
-	park( __cfaabi_dbg_ctx );
+	park();
 	/* paranoid */ assert( this.pipe[0] != -1 );
 	/* paranoid */ assert( this.pipe[1] != -1 );
Index: benchmark/io/readv.cfa
===================================================================
--- benchmark/io/readv.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/io/readv.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -54,5 +54,5 @@
 
 void main( Reader & ) {
-	park( __cfaabi_dbg_ctx );
+	park();
 	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
 
@@ -151,5 +151,5 @@
 
 				for(i; nthreads) {
-					unpark( threads[i] __cfaabi_dbg_ctx2 );
+					unpark( threads[i] );
 				}
 				wait(duration, start, end, is_tty);
Index: benchmark/mutex/JavaThread.java
===================================================================
--- benchmark/mutex/JavaThread.java	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/mutex/JavaThread.java	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -47,6 +47,6 @@
 	}
 	public static void main(String[] args) throws InterruptedException {
-		if ( args.length > 2 ) System.exit( 1 );
-		if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+		if ( args.length > 1 ) System.exit( 1 );
+		if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
 
 		for (int n = Integer.parseInt("5"); --n >= 0 ; ) {
Index: benchmark/readyQ/yield.cfa
===================================================================
--- benchmark/readyQ/yield.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/readyQ/yield.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -32,5 +32,5 @@
 
 void main( Yielder & this ) {
-	park( __cfaabi_dbg_ctx );
+	park();
 	/* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
 
@@ -70,5 +70,5 @@
 
 				for(i; nthreads) {
-					unpark( threads[i] __cfaabi_dbg_ctx2 );
+					unpark( threads[i] );
 				}
 				wait(duration, start, end, is_tty);
Index: benchmark/schedint/JavaThread.java
===================================================================
--- benchmark/schedint/JavaThread.java	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ benchmark/schedint/JavaThread.java	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -75,6 +75,6 @@
 	}
 	public static void main(String[] args) throws InterruptedException {
-		if ( args.length > 2 ) System.exit( 1 );
-		if ( args.length == 2 ) { times = Long.parseLong(args[1]); }
+		if ( args.length > 1 ) System.exit( 1 );
+		if ( args.length == 1 ) { times = Long.parseLong(args[0]); }
 
 		for (int n = Integer.parseInt("5"); --n >= 0 ; ) {
Index: doc/LaTeXmacros/common.tex
===================================================================
--- doc/LaTeXmacros/common.tex	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/LaTeXmacros/common.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -11,6 +11,6 @@
 %% Created On       : Sat Apr  9 10:06:17 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Fri Sep  4 13:56:52 2020
-%% Update Count     : 383
+%% Last Modified On : Mon Oct  5 09:34:46 2020
+%% Update Count     : 464
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -55,19 +55,4 @@
 \newlength{\parindentlnth}
 \setlength{\parindentlnth}{\parindent}
-
-\newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{#1}}}
-\newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}
-\newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
-
-\newlength{\gcolumnposn}				% temporary hack because lstlisting does not handle tabs correctly
-\newlength{\columnposn}
-\setlength{\gcolumnposn}{2.5in}
-\setlength{\columnposn}{\gcolumnposn}
-\newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@basicstyle{\LstCommentStyle{#2}}}}
-\newcommand{\CRT}{\global\columnposn=\gcolumnposn}
-
-% allow escape sequence in lstinline
-%\usepackage{etoolbox}
-%\patchcmd{\lsthk@TextStyle}{\let\lst@DefEsc\@empty}{}{}{\errmessage{failed to patch}}
 
 \usepackage{pslatex}					% reduce size of san serif font
@@ -244,8 +229,28 @@
 \usepackage{listings}									% format program code
 \usepackage{lstlang}
-
-\newcommand{\CFADefaults}{%
+\makeatletter
+
+\newcommand{\LstBasicStyle}[1]{{\lst@basicstyle{#1}}}
+\newcommand{\LstKeywordStyle}[1]{{\lst@basicstyle{\lst@keywordstyle{#1}}}}
+\newcommand{\LstCommentStyle}[1]{{\lst@basicstyle{\lst@commentstyle{#1}}}}
+
+\newlength{\gcolumnposn}				% temporary hack because lstlisting does not handle tabs correctly
+\newlength{\columnposn}
+\setlength{\gcolumnposn}{2.75in}
+\setlength{\columnposn}{\gcolumnposn}
+\newcommand{\C}[2][\@empty]{\ifx#1\@empty\else\global\setlength{\columnposn}{#1}\global\columnposn=\columnposn\fi\hfill\makebox[\textwidth-\columnposn][l]{\lst@basicstyle{\LstCommentStyle{#2}}}}
+\newcommand{\CRT}{\global\columnposn=\gcolumnposn}
+
+% allow escape sequence in lstinline
+%\usepackage{etoolbox}
+%\patchcmd{\lsthk@TextStyle}{\let\lst@DefEsc\@empty}{}{}{\errmessage{failed to patch}}
+
+% allow adding to lst literate
+\def\addToLiterate#1{\protect\edef\lst@literate{\unexpanded\expandafter{\lst@literate}\unexpanded{#1}}}
+\lst@Key{add to literate}{}{\addToLiterate{#1}}
+\makeatother
+
+\newcommand{\CFAStyle}{%
 \lstset{
-language=CFA,
 columns=fullflexible,
 basicstyle=\linespread{0.9}\sf,			% reduce line spacing and use sanserif font
@@ -262,22 +267,37 @@
 belowskip=3pt,
 % replace/adjust listing characters that look bad in sanserif
-literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
+literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.75ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptscriptstyle\land\,$}}1
 	{~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
 	{<-}{$\leftarrow$}2 {=>}{$\Rightarrow$}2 {->}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.075ex}}}\kern-0.2ex\textgreater}2,
-moredelim=**[is][\color{red}]{?}{?},	% red highlighting ?...? (registered trademark symbol) emacs: C-q M-.
+}% lstset
+}% CFAStyle
+
+\ifdefined\CFALatin% extra Latin-1 escape characters
+\lstnewenvironment{cfa}[1][]{
+\lstset{
+language=CFA,
+moredelim=**[is][\color{red}]{®}{®},	% red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
 moredelim=**[is][\color{blue}]{ß}{ß},	% blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
 moredelim=**[is][\color{OliveGreen}]{¢}{¢}, % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
 moredelim=[is][\lstset{keywords={}}]{¶}{¶}, % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
+% replace/adjust listing characters that look bad in sanserif
+add to literate={`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
 }% lstset
-}% CFADefaults
-\newcommand{\CFAStyle}{%
-\CFADefaults
+\lstset{#1}
+}{}
 % inline code ©...© (copyright symbol) emacs: C-q M-)
 \lstMakeShortInline©					% single-character for \lstinline
-}% CFAStyle
-
-\lstnewenvironment{cfa}[1][]
-{\CFADefaults\lstset{#1}}
-{}
+\else% regular ASCI characters
+\lstnewenvironment{cfa}[1][]{
+\lstset{
+language=CFA,
+escapechar=\$,							% LaTeX escape in CFA code
+moredelim=**[is][\color{red}]{@}{@},	% red highlighting @...@
+}% lstset
+\lstset{#1}
+}{}
+% inline code @...@ (at symbol)
+\lstMakeShortInline@					% single-character for \lstinline
+\fi%
 
 % Local Variables: %
Index: doc/LaTeXmacros/lstlang.sty
===================================================================
--- doc/LaTeXmacros/lstlang.sty	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/LaTeXmacros/lstlang.sty	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -8,6 +8,6 @@
 %% Created On       : Sat May 13 16:34:42 2017
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Tue Jan  8 14:40:33 2019
-%% Update Count     : 21
+%% Last Modified On : Wed Sep 23 22:40:04 2020
+%% Update Count     : 24
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -115,7 +115,7 @@
 		auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__, __const, __const__,
 		coroutine, disable, dtype, enable, exception, __extension__, fallthrough, fallthru, finally,
-		__float80, float80, __float128, float128, forall, ftype, _Generic, _Imaginary, __imag, __imag__,
+		__float80, float80, __float128, float128, forall, ftype, generator, _Generic, _Imaginary, __imag, __imag__,
 		inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or,
-		otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, thread,
+		otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, suspend, thread,
 		_Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
 		virtual, __volatile, __volatile__, waitfor, when, with, zero_t,
@@ -125,5 +125,7 @@
 
 % C++ programming language
-\lstdefinelanguage{C++}[ANSI]{C++}{}
+\lstdefinelanguage{C++}[ANSI]{C++}{
+	morekeywords={nullptr,}
+}
 
 % uC++ programming language, based on ANSI C++
Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/bibliography/pl.bib	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1005,5 +1005,5 @@
     key		= {Cforall Benchmarks},
     author	= {{\textsf{C}{$\mathbf{\forall}$} Benchmarks}},
-    howpublished= {\href{https://plg.uwaterloo.ca/~cforall/doc/CforallConcurrentBenchmarks.tar}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-doc/\-CforallConcurrentBenchmarks.tar}},
+    howpublished= {\href{https://github.com/cforall/ConcurrentBenchmarks_SPE20}{https://\-github.com/\-cforall/\-ConcurrentBenchmarks\_SPE20}},
 }
 
@@ -1973,5 +1973,5 @@
     title	= {Cooperating Sequential Processes},
     institution	= {Technological University},
-    address	= {Eindhoven, Netherlands},
+    address	= {Eindhoven, Neth.},
     year	= 1965,
     note	= {Reprinted in \cite{Genuys68} pp. 43--112.}
Index: doc/papers/concurrency/Paper.tex
===================================================================
--- doc/papers/concurrency/Paper.tex	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/papers/concurrency/Paper.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -224,17 +224,17 @@
 {}
 \lstnewenvironment{C++}[1][]                            % use C++ style
-{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{uC++}[1][]
-{\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{Go}[1][]
-{\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=Golang,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{python}[1][]
-{\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 \lstnewenvironment{java}[1][]
-{\lstset{language=java,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
+{\lstset{language=java,moredelim=**[is][\protect\color{red}]{`}{`}}\lstset{#1}}
 {}
 
@@ -284,5 +284,5 @@
 
 \begin{document}
-\linenumbers				% comment out to turn off line numbering
+%\linenumbers				% comment out to turn off line numbering
 
 \maketitle
@@ -450,16 +450,16 @@
 \hline
 stateful			& thread	& \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
-\hline    
-\hline    
+\hline
+\hline
 No					& No		& \textbf{1}\ \ \ @struct@				& \textbf{2}\ \ \ @mutex@ @struct@		\\
-\hline    
+\hline
 Yes (stackless)		& No		& \textbf{3}\ \ \ @generator@			& \textbf{4}\ \ \ @mutex@ @generator@	\\
-\hline    
+\hline
 Yes (stackful)		& No		& \textbf{5}\ \ \ @coroutine@			& \textbf{6}\ \ \ @mutex@ @coroutine@	\\
-\hline    
+\hline
 No					& Yes		& \textbf{7}\ \ \ {\color{red}rejected}	& \textbf{8}\ \ \ {\color{red}rejected}	\\
-\hline    
+\hline
 Yes (stackless)		& Yes		& \textbf{9}\ \ \ {\color{red}rejected}	& \textbf{10}\ \ \ {\color{red}rejected} \\
-\hline    
+\hline
 Yes (stackful)		& Yes		& \textbf{11}\ \ \ @thread@				& \textbf{12}\ \ @mutex@ @thread@		\\
 \end{tabular}
@@ -2896,5 +2896,5 @@
 \label{s:RuntimeStructureCluster}
 
-A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue.
+A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue~\cite{Buhr90a}.
 The term \newterm{virtual processor} is introduced as a synonym for kernel thread to disambiguate between user and kernel thread.
 From the language perspective, a virtual processor is an actual processor (core).
@@ -2992,10 +2992,11 @@
 \end{cfa}
 where CPU time in nanoseconds is from the appropriate language clock.
-Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language.
+Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language;
+each @N@ appears after the experiment name in the following tables.
 The total time is divided by @N@ to obtain the average time for a benchmark.
 Each benchmark experiment is run 13 times and the average appears in the table.
+For languages with a runtime JIT (Java, Node.js, Python), a single half-hour long experiment is run to check stability;
+all long-experiment results are statistically equivalent, \ie median/average/standard-deviation correlate with the short-experiment results, indicating the short experiments reached a steady state.
 All omitted tests for other languages are functionally identical to the \CFA tests and available online~\cite{CforallConcurrentBenchmarks}.
-% tar --exclude-ignore=exclude -cvhf benchmark.tar benchmark
-% cp -p benchmark.tar /u/cforall/public_html/doc/concurrent_benchmark.tar
 
 \paragraph{Creation}
@@ -3006,7 +3007,6 @@
 
 \begin{multicols}{2}
-\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
-\begin{cfa}
-@coroutine@ MyCoroutine {};
+\begin{cfa}[xleftmargin=0pt]
+`coroutine` MyCoroutine {};
 void ?{}( MyCoroutine & this ) {
 #ifdef EAGER
@@ -3016,5 +3016,5 @@
 void main( MyCoroutine & ) {}
 int main() {
-	BENCH( for ( N ) { @MyCoroutine c;@ } )
+	BENCH( for ( N ) { `MyCoroutine c;` } )
 	sout | result;
 }
@@ -3030,18 +3030,19 @@
 
 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
-\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
-\CFA generator			& 0.6		& 0.6		& 0.0		\\
-\CFA coroutine lazy		& 13.4		& 13.1		& 0.5		\\
-\CFA coroutine eager	& 144.7		& 143.9		& 1.5		\\
-\CFA thread				& 466.4		& 468.0		& 11.3		\\
-\uC coroutine			& 155.6		& 155.7		& 1.7		\\
-\uC thread				& 523.4		& 523.9		& 7.7		\\
-Python generator		& 123.2		& 124.3		& 4.1		\\
-Node.js generator		& 33.4		& 33.5		& 0.3		\\
-Goroutine thread		& 751.0		& 750.5		& 3.1		\\
-Rust tokio thread		& 1860.0	& 1881.1	& 37.6		\\
-Rust thread				& 53801.0	& 53896.8	& 274.9		\\
-Java thread				& 120274.0	& 120722.9	& 2356.7	\\
-Pthreads thread			& 31465.5	& 31419.5	& 140.4
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+\CFA generator (1B)			& 0.6		& 0.6		& 0.0		\\
+\CFA coroutine lazy	(100M)	& 13.4		& 13.1		& 0.5		\\
+\CFA coroutine eager (10M)	& 144.7		& 143.9		& 1.5		\\
+\CFA thread (10M)			& 466.4		& 468.0		& 11.3		\\
+\uC coroutine (10M)			& 155.6		& 155.7		& 1.7		\\
+\uC thread (10M)			& 523.4		& 523.9		& 7.7		\\
+Python generator (10M)		& 123.2		& 124.3		& 4.1		\\
+Node.js generator (10M)		& 33.4		& 33.5		& 0.3		\\
+Goroutine thread (10M)		& 751.0		& 750.5		& 3.1		\\
+Rust tokio thread (10M)		& 1860.0	& 1881.1	& 37.6		\\
+Rust thread	(250K)			& 53801.0	& 53896.8	& 274.9		\\
+Java thread (250K)			& 119256.0	& 119679.2	& 2244.0	\\
+% Java thread (1 000 000)		& 123100.0	& 123052.5	& 751.6 	\\
+Pthreads thread	(250K)		& 31465.5	& 31419.5	& 140.4
 \end{tabular}
 \end{multicols}
@@ -3052,19 +3053,20 @@
 Internal scheduling is measured using a cycle of two threads signalling and waiting.
 Figure~\ref{f:schedint} shows the code for \CFA, with results in Table~\ref{t:schedint}.
-Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
-Java scheduling is significantly greater because the benchmark explicitly creates multiple threads in order to prevent the JIT from making the program sequential, \ie removing all locking.
+Note, the \CFA incremental cost for bulk acquire is a fixed cost for small numbers of mutex objects.
+User-level threading has one kernel thread, eliminating contention between the threads (direct handoff of the kernel thread).
+Kernel-level threading has two kernel threads allowing some contention.
 
 \begin{multicols}{2}
-\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
-\begin{cfa}
+\setlength{\tabcolsep}{3pt}
+\begin{cfa}[xleftmargin=0pt]
 volatile int go = 0;
-@condition c;@
-@monitor@ M {} m1/*, m2, m3, m4*/;
-void call( M & @mutex p1/*, p2, p3, p4*/@ ) {
-	@signal( c );@
-}
-void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
+`condition c;`
+`monitor` M {} m1/*, m2, m3, m4*/;
+void call( M & `mutex p1/*, p2, p3, p4*/` ) {
+	`signal( c );`
+}
+void wait( M & `mutex p1/*, p2, p3, p4*/` ) {
 	go = 1;	// continue other thread
-	for ( N ) { @wait( c );@ } );
+	for ( N ) { `wait( c );` } );
 }
 thread T {};
@@ -3091,12 +3093,13 @@
 
 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
-\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
-\CFA @signal@, 1 monitor	& 364.4		& 364.2		& 4.4		\\
-\CFA @signal@, 2 monitor	& 484.4		& 483.9		& 8.8		\\
-\CFA @signal@, 4 monitor	& 709.1		& 707.7		& 15.0		\\
-\uC @signal@ monitor		& 328.3		& 327.4		& 2.4		\\
-Rust cond. variable			& 7514.0	& 7437.4	& 397.2		\\
-Java @notify@ monitor		& 9623.0	& 9654.6	& 236.2		\\
-Pthreads cond. variable		& 5553.7	& 5576.1	& 345.6
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+\CFA @signal@, 1 monitor (10M)	& 364.4		& 364.2		& 4.4		\\
+\CFA @signal@, 2 monitor (10M)	& 484.4		& 483.9		& 8.8		\\
+\CFA @signal@, 4 monitor (10M)	& 709.1		& 707.7		& 15.0		\\
+\uC @signal@ monitor (10M)		& 328.3		& 327.4		& 2.4		\\
+Rust cond. variable	(1M)		& 7514.0	& 7437.4	& 397.2		\\
+Java @notify@ monitor (1M)		& 8717.0	& 8774.1	& 471.8		\\
+% Java @notify@ monitor (100 000 000)		& 8634.0	& 8683.5	& 330.5		\\
+Pthreads cond. variable (1M)	& 5553.7	& 5576.1	& 345.6
 \end{tabular}
 \end{multicols}
@@ -3107,14 +3110,14 @@
 External scheduling is measured using a cycle of two threads calling and accepting the call using the @waitfor@ statement.
 Figure~\ref{f:schedext} shows the code for \CFA with results in Table~\ref{t:schedext}.
-Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
+Note, the \CFA incremental cost for bulk acquire is a fixed cost for small numbers of mutex objects.
 
 \begin{multicols}{2}
-\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
+\setlength{\tabcolsep}{5pt}
 \vspace*{-16pt}
-\begin{cfa}
-@monitor@ M {} m1/*, m2, m3, m4*/;
-void call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
-void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
-	for ( N ) { @waitfor( call : p1/*, p2, p3, p4*/ );@ }
+\begin{cfa}[xleftmargin=0pt]
+`monitor` M {} m1/*, m2, m3, m4*/;
+void call( M & `mutex p1/*, p2, p3, p4*/` ) {}
+void wait( M & `mutex p1/*, p2, p3, p4*/` ) {
+	for ( N ) { `waitfor( call : p1/*, p2, p3, p4*/ );` }
 }
 thread T {};
@@ -3133,14 +3136,14 @@
 \columnbreak
 
-\vspace*{-16pt}
+\vspace*{-18pt}
 \captionof{table}{External-scheduling comparison (nanoseconds)}
 \label{t:schedext}
 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
-\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
-\CFA @waitfor@, 1 monitor	& 367.1	& 365.3	& 5.0	\\
-\CFA @waitfor@, 2 monitor	& 463.0	& 464.6	& 7.1	\\
-\CFA @waitfor@, 4 monitor	& 689.6	& 696.2	& 21.5	\\
-\uC \lstinline[language=uC++]|_Accept| monitor	& 328.2	& 329.1	& 3.4	\\
-Go \lstinline[language=Golang]|select| channel	& 365.0	& 365.5	& 1.2
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+\CFA @waitfor@, 1 monitor (10M)	& 367.1	& 365.3	& 5.0	\\
+\CFA @waitfor@, 2 monitor (10M)	& 463.0	& 464.6	& 7.1	\\
+\CFA @waitfor@, 4 monitor (10M)	& 689.6	& 696.2	& 21.5	\\
+\uC \lstinline[language=uC++]|_Accept| monitor (10M)	& 328.2	& 329.1	& 3.4	\\
+Go \lstinline[language=Golang]|select| channel (10M)	& 365.0	& 365.5	& 1.2
 \end{tabular}
 \end{multicols}
@@ -3155,8 +3158,8 @@
 
 \begin{multicols}{2}
-\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
-\begin{cfa}
-@monitor@ M {} m1/*, m2, m3, m4*/;
-call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
+\setlength{\tabcolsep}{3pt}
+\begin{cfa}[xleftmargin=0pt]
+`monitor` M {} m1/*, m2, m3, m4*/;
+call( M & `mutex p1/*, p2, p3, p4*/` ) {}
 int main() {
 	BENCH( for( N ) call( m1/*, m2, m3, m4*/ ); )
@@ -3173,14 +3176,15 @@
 \label{t:mutex}
 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
-\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
-test-and-test-set lock			& 19.1	& 18.9	& 0.4	\\
-\CFA @mutex@ function, 1 arg.	& 48.3	& 47.8	& 0.9	\\
-\CFA @mutex@ function, 2 arg.	& 86.7	& 87.6	& 1.9	\\
-\CFA @mutex@ function, 4 arg.	& 173.4	& 169.4	& 5.9	\\
-\uC @monitor@ member rtn.		& 54.8	& 54.8	& 0.1	\\
-Goroutine mutex lock			& 34.0	& 34.0	& 0.0	\\
-Rust mutex lock					& 33.0	& 33.2	& 0.8	\\
-Java synchronized method		& 31.0	& 31.0	& 0.0	\\
-Pthreads mutex Lock				& 31.0	& 31.1	& 0.4
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+test-and-test-set lock (50M)		& 19.1	& 18.9	& 0.4	\\
+\CFA @mutex@ function, 1 arg. (50M)	& 48.3	& 47.8	& 0.9	\\
+\CFA @mutex@ function, 2 arg. (50M)	& 86.7	& 87.6	& 1.9	\\
+\CFA @mutex@ function, 4 arg. (50M)	& 173.4	& 169.4	& 5.9	\\
+\uC @monitor@ member rtn. (50M)		& 54.8	& 54.8	& 0.1	\\
+Goroutine mutex lock (50M)			& 34.0	& 34.0	& 0.0	\\
+Rust mutex lock (50M)				& 33.0	& 33.2	& 0.8	\\
+Java synchronized method (50M)		& 31.0	& 30.9	& 0.5	\\
+% Java synchronized method (10 000 000 000)		& 31.0 & 30.2 & 0.9 \\
+Pthreads mutex Lock (50M)			& 31.0	& 31.1	& 0.4
 \end{tabular}
 \end{multicols}
@@ -3201,5 +3205,5 @@
 % To: "Peter A. Buhr" <pabuhr@plg2.cs.uwaterloo.ca>
 % Date: Fri, 24 Jan 2020 13:49:18 -0500
-% 
+%
 % I can also verify that the previous version, which just tied a bunch of promises together, *does not* go back to the
 % event loop at all in the current version of Node. Presumably they're taking advantage of the fact that the ordering of
@@ -3211,15 +3215,14 @@
 
 \begin{multicols}{2}
-\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
-\begin{cfa}[aboveskip=0pt,belowskip=0pt]
-@coroutine@ C {};
-void main( C & ) { for () { @suspend;@ } }
+\begin{cfa}[xleftmargin=0pt]
+`coroutine` C {};
+void main( C & ) { for () { `suspend;` } }
 int main() { // coroutine test
 	C c;
-	BENCH( for ( N ) { @resume( c );@ } )
+	BENCH( for ( N ) { `resume( c );` } )
 	sout | result;
 }
 int main() { // thread test
-	BENCH( for ( N ) { @yield();@ } )
+	BENCH( for ( N ) { `yield();` } )
 	sout | result;
 }
@@ -3234,20 +3237,22 @@
 \label{t:ctx-switch}
 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
-\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
-C function			& 1.8		& 1.8		& 0.0	\\
-\CFA generator		& 1.8		& 2.0		& 0.3	\\
-\CFA coroutine		& 32.5		& 32.9		& 0.8	\\
-\CFA thread			& 93.8		& 93.6		& 2.2	\\
-\uC coroutine		& 50.3		& 50.3		& 0.2	\\
-\uC thread			& 97.3		& 97.4		& 1.0	\\
-Python generator	& 40.9		& 41.3		& 1.5	\\
-Node.js await		& 1852.2	& 1854.7	& 16.4	\\
-Node.js generator	& 33.3		& 33.4		& 0.3	\\
-Goroutine thread	& 143.0		& 143.3		& 1.1	\\
-Rust async await	& 32.0		& 32.0		& 0.0	\\
-Rust tokio thread	& 143.0		& 143.0		& 1.7	\\
-Rust thread			& 332.0		& 331.4		& 2.4	\\
-Java thread			& 405.0		& 415.0		& 17.6	\\
-Pthreads thread		& 334.3		& 335.2		& 3.9
+\multicolumn{1}{@{}r}{N\hspace*{10pt}} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
+C function (10B)			& 1.8		& 1.8		& 0.0	\\
+\CFA generator (5B)			& 1.8		& 2.0		& 0.3	\\
+\CFA coroutine (100M)		& 32.5		& 32.9		& 0.8	\\
+\CFA thread (100M)			& 93.8		& 93.6		& 2.2	\\
+\uC coroutine (100M)		& 50.3		& 50.3		& 0.2	\\
+\uC thread (100M)			& 97.3		& 97.4		& 1.0	\\
+Python generator (100M)		& 40.9		& 41.3		& 1.5	\\
+Node.js await (5M)			& 1852.2	& 1854.7	& 16.4	\\
+Node.js generator (100M)	& 33.3		& 33.4		& 0.3	\\
+Goroutine thread (100M)		& 143.0		& 143.3		& 1.1	\\
+Rust async await (100M)		& 32.0		& 32.0		& 0.0	\\
+Rust tokio thread (100M)	& 143.0		& 143.0		& 1.7	\\
+Rust thread (25M)			& 332.0		& 331.4		& 2.4	\\
+Java thread (100M)			& 405.0		& 415.0		& 17.6	\\
+% Java thread (  100 000 000)			& 413.0 & 414.2 & 6.2 \\
+% Java thread (5 000 000 000)			& 415.0 & 415.2 & 6.1 \\
+Pthreads thread (25M)		& 334.3		& 335.2		& 3.9
 \end{tabular}
 \end{multicols}
@@ -3258,8 +3263,11 @@
 Languages using 1:1 threading based on pthreads can at best meet or exceed, due to language overhead, the pthread results.
 Note, pthreads has a fast zero-contention mutex lock checked in user space.
-Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions.
+Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions (context-switching or locking).
+As well, for locking experiments, M:N threading has less contention if only one kernel thread is used.
 Languages with stackful coroutines have higher cost than stackless coroutines because of stack allocation and context switching;
 however, stackful \uC and \CFA coroutines have approximately the same performance as stackless Python and Node.js generators.
 The \CFA stackless generator is approximately 25 times faster for suspend/resume and 200 times faster for creation than stackless Python and Node.js generators.
+The Node.js context-switch is costly when asynchronous await must enter the event engine because a promise is not fulfilled.
+Finally, the benchmark results correlate across programming languages with and without JIT, indicating the JIT has completed any runtime optimizations.
 
 
@@ -3319,5 +3327,5 @@
 
 The authors recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach, and Michael Brooks; David Dice for commenting and helping with the Java benchmarks; and Gregor Richards for helping with the Node.js benchmarks.
-This research is funded by a grant from Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
+This research is funded by the NSERC/Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
 
 {%
Index: doc/papers/concurrency/annex/local.bib
===================================================================
--- doc/papers/concurrency/annex/local.bib	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/papers/concurrency/annex/local.bib	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -59,5 +59,5 @@
 @manual{Cpp-Transactions,
 	keywords	= {C++, Transactional Memory},
-	title		= {Technical Specification for C++ Extensions for Transactional Memory},
+	title		= {Tech. Spec. for C++ Extensions for Transactional Memory},
 	organization= {International Standard ISO/IEC TS 19841:2015 },
 	publisher   = {American National Standards Institute},
Index: doc/papers/concurrency/mail2
===================================================================
--- doc/papers/concurrency/mail2	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/papers/concurrency/mail2	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -959,2 +959,117 @@
 Software: Practice and Experience Editorial Office
 
+
+
+Date: Wed, 2 Sep 2020 20:55:34 +0000
+From: Richard Jones <onbehalfof@manuscriptcentral.com>
+Reply-To: R.E.Jones@kent.ac.uk
+To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
+Subject: Software: Practice and Experience - Decision on Manuscript ID
+ SPE-19-0219.R2
+
+02-Sep-2020
+
+Dear Dr Buhr,
+
+Many thanks for submitting SPE-19-0219.R2 entitled "Advanced Control-flow and Concurrency in Cforall" to Software: Practice and Experience. The paper has now been reviewed and the comments of the referees are included at the bottom of this letter. I apologise for the length of time it has taken to get these.
+
+Both reviewers consider this paper to be close to acceptance. However, before I can accept this paper, I would like you address the comments of Reviewer 2, particularly with regard to the description of the adaptation Java harness to deal with warmup. I would expect to see a convincing argument that the computation has reached a steady state. I would also like you to provide the values for N for each benchmark run. This should be very straightforward for you to do. There are a couple of papers on steady state that you may wish to consult (though I am certainly not pushing my own work).
+
+1) Barrett, Edd; Bolz-Tereick, Carl Friedrich; Killick, Rebecca; Mount, Sarah and Tratt, Laurence. Virtual Machine Warmup Blows Hot and Cold. OOPSLA 2017. https://doi.org/10.1145/3133876
+Virtual Machines (VMs) with Just-In-Time (JIT) compilers are traditionally thought to execute programs in two phases: the initial warmup phase determines which parts of a program would most benefit from dynamic compilation, before JIT compiling those parts into machine code; subsequently the program is said to be at a steady state of peak performance. Measurement methodologies almost always discard data collected during the warmup phase such that reported measurements focus entirely on peak performance. We introduce a fully automated statistical approach, based on changepoint analysis, which allows us to determine if a program has reached a steady state and, if so, whether that represents peak performance or not. Using this, we show that even when run in the most controlled of circumstances, small, deterministic, widely studied microbenchmarks often fail to reach a steady state of peak performance on a variety of common VMs. Repeating our experiment on 3 different machines, we found that at most 43.5% of pairs consistently reach a steady state of peak performance.
+
+2) Kalibera, Tomas and Jones, Richard. Rigorous Benchmarking in Reasonable Time. ISMM  2013. https://doi.org/10.1145/2555670.2464160
+Experimental evaluation is key to systems research. Because modern systems are complex and non-deterministic, good experimental methodology demands that researchers account for uncertainty. To obtain valid results, they are expected to run many iterations of benchmarks, invoke virtual machines (VMs) several times, or even rebuild VM or benchmark binaries more than once. All this repetition costs time to complete experiments. Currently, many evaluations give up on sufficient repetition or rigorous statistical methods, or even run benchmarks only in training sizes. The results reported often lack proper variation estimates and, when a small difference between two systems is reported, some are simply unreliable.In contrast, we provide a statistically rigorous methodology for repetition and summarising results that makes efficient use of experimentation time. Time efficiency comes from two key observations. First, a given benchmark on a given platform is typically prone to much less non-determinism than the common worst-case of published corner-case studies. Second, repetition is most needed where most uncertainty arises (whether between builds, between executions or between iterations). We capture experimentation cost with a novel mathematical model, which we use to identify the number of repetitions at each level of an experiment necessary and sufficient to obtain a given level of precision.We present our methodology as a cookbook that guides researchers on the number of repetitions they should run to obtain reliable results. We also show how to present results with an effect size confidence interval. As an example, we show how to use our methodology to conduct throughput experiments with the DaCapo and SPEC CPU benchmarks on three recent platforms.
+
+You have 42 days from the date of this email to submit your revision. If you are unable to complete the revision within this time, please contact me to request a short extension.
+
+You can upload your revised manuscript and submit it through your Author Center. Log into https://mc.manuscriptcentral.com/spe and enter your Author Center, where you will find your manuscript title listed under "Manuscripts with Decisions".
+
+When submitting your revised manuscript, you will be able to respond to the comments made by the referee(s) in the space provided.  You can use this space to document any changes you make to the original manuscript.
+
+If you would like help with English language editing, or other article preparation support, Wiley Editing Services offers expert help with English Language Editing, as well as translation, manuscript formatting, and figure formatting at www.wileyauthors.com/eeo/preparation. You can also check out our resources for Preparing Your Article for general guidance about writing and preparing your manuscript at www.wileyauthors.com/eeo/prepresources.
+ 
+Once again, thank you for submitting your manuscript to Software: Practice and Experience. I look forward to receiving your revision.
+
+Sincerely,
+Richard
+
+Prof. Richard Jones
+Editor, Software: Practice and Experience
+R.E.Jones@kent.ac.uk
+
+Referee(s)' Comments to Author:
+
+Reviewing: 1
+
+Comments to the Author
+Overall, I felt that this draft was an improvement on previous drafts and I don't have further changes to request. 
+
+I appreciated the new language to clarify the relationship of external and internal scheduling, for example, as well as the new measurements of Rust tokio. Also, while I still believe that the choice between thread/generator/coroutine and so forth could be made crisper and clearer, the current draft of Section 2 did seem adequate to me in terms of specifying the considerations that users would have to take into account to make the choice.
+
+
+Reviewing: 2
+
+Comments to the Author
+First: let me apologise for the delay on this review. I'll blame the global pandemic combined with my institution's senior management's counterproductive decisions for taking up most of my time and all of my energy.
+
+At this point, reading the responses, I think we've been around the course enough times that further iteration is unlikely to really improve the paper any further, so I'm happy to recommend acceptance.    My main comments are that there were some good points in the responses to *all* the reviews and I strongly encourage the authors to incorporate those discursive responses into the final paper so they may benefit readers as well as reviewers.   I agree with the recommendations of reviewer #2 that the paper could usefully be split in to two, which I think I made to a previous revision, but I'm happy to leave that decision to the Editor. 
+
+Finally, the paper needs to describe how the Java harness was adapted to deal with warmup; why the computation has warmed up and reached a steady state - similarly for js and Python. The tables should also give the "N" chosen for each benchmark run.
+ 
+minor points
+* don't start sentences with "However"
+* most downloaded isn't an "Award"
+
+
+
+Date: Thu, 1 Oct 2020 05:34:29 +0000
+From: Richard Jones <onbehalfof@manuscriptcentral.com>
+Reply-To: R.E.Jones@kent.ac.uk
+To: pabuhr@uwaterloo.ca
+Subject: Revision reminder - SPE-19-0219.R2
+
+01-Oct-2020
+
+Dear Dr Buhr
+
+SPE-19-0219.R2
+
+This is a reminder that your opportunity to revise and re-submit your manuscript will expire 14 days from now. If you require more time please contact me directly and I may grant an extension to this deadline, otherwise the option to submit a revision online, will not be available.
+
+If your article is of potential interest to the general public, (which means it must be timely, groundbreaking, interesting and impact on everyday society) then please e-mail ejp@wiley.co.uk explaining the public interest side of the research. Wiley will then investigate the potential for undertaking a global press campaign on the article.
+
+I look forward to receiving your revision.
+
+Sincerely,
+
+Prof. Richard Jones
+Editor, Software: Practice and Experience
+
+https://mc.manuscriptcentral.com/spe
+
+
+
+Date: Tue, 6 Oct 2020 15:29:41 +0000
+From: Mayank Roy Chowdhury <onbehalfof@manuscriptcentral.com>
+Reply-To: speoffice@wiley.com
+To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
+Subject: SPE-19-0219.R3 successfully submitted
+
+06-Oct-2020
+
+Dear Dr Buhr,
+
+Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall" has been successfully submitted online and is presently being given full consideration for publication in Software: Practice and Experience.
+
+Your manuscript number is SPE-19-0219.R3.  Please mention this number in all future correspondence regarding this submission.
+
+You can view the status of your manuscript at any time by checking your Author Center after logging into https://mc.manuscriptcentral.com/spe.  If you have difficulty using this site, please click the 'Get Help Now' link at the top right corner of the site.
+
+
+Thank you for submitting your manuscript to Software: Practice and Experience.
+
+Sincerely,
+
+Software: Practice and Experience Editorial Office
+
Index: doc/papers/concurrency/response3
===================================================================
--- doc/papers/concurrency/response3	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/papers/concurrency/response3	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,27 @@
+        I would like you address the comments of Reviewer 2, particularly with regard to the description of the adaptation Java harness to deal with warmup. I would expect to see a convincing argument that the computation has reached a steady state.
+
+We understand referee2 and your concern about the JIT experiments, which is why we verified our experiments with two experts in JIT development for both Java and Node.js before submitting the paper. We also read the supplied papers, but most of the information is not applicable to our work for the following reasons.
+
+1. SPEC benchmarks are medium to large. In contrast, our benchmarks are 5-15 lines in length for each programming language (see code for the Cforall tests in the paper). Hence, there is no significant computations, complex control flow, or use of memory. They test one specific language features (context switch, mutex call, etc.) in isolation over and over again. These language features are fixed (e.g., acquiring and releasing a lock is a fixed cost). Therefore, unless the feature can be removed there is nothing to optimize at runtime. But these features cannot be removed without changing the meaning of the benchmark. If the feature is removed, the timing result would be 0. In fact, it was difficult to prevent the JIT from completely eliding some benchmarks because there are no side-effects.
+
+2. All of our benchmark results correlate across programming languages with and without JIT, indicating the JIT has completed any runtime optimizations (added this sentence to Section 8.1). Any large differences are explained by how a language implements a feature not by how the compiler/JIT precesses that feature. Section 8.1 discusses these points in detail.
+
+3. We also added a sentence about running all JIT-base programming language experiments for 30 minutes and there was no statistical difference, med/avg/std correlated with the short-run experiments, which seems a convincing argument that the benchmark has reached a steady state. If the JIT takes longer than 30 minutes to achieve its optimization goals, it is unlikely to be useful.
+
+4. The purpose of the performance section is not to draw conclusions about improvements. It is to contrast program-language implementation approaches. Section 8.1 talks about ramifications of certain design and implementation decisions with respect to overall performance. The only conclusion we draw about performance is:
+
+   Performance comparisons with other concurrent systems and languages show the Cforall approach is competitive across all basic operations, which translates directly into good performance in well-written applications with advanced control-flow.
+
+
+       I would also like you to provide the values for N for each benchmark run.
+
+Done.
+
+
+Referee 2 suggested
+
+   * don't start sentences with "However"
+
+However, there are numerous grammar sites on the web indicating "however" (a conjunction) at the start of a sentence is acceptable, e.g.:
+
+https://www.merriam-webster.com/words-at-play/can-you-start-a-sentence-with-however This is a stylistic choice, more than anything else, as we have a considerable body of evidence of writers using however to begin sentences, frequently with the meaning of "nevertheless."
Index: doc/proposals/ZeroCostPreemption.md
===================================================================
--- doc/proposals/ZeroCostPreemption.md	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/proposals/ZeroCostPreemption.md	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,16 @@
+## "Zero Cost" Preemption in for Cforall ##
+
+Similar to "Zero Cost" exceptions, this is a proposal to support preemption with little to no runtime cost for the book-keeping. (Other than having exceptions).
+
+Preemption stops users threads at random locations and forces a context switch using a signal handler. Since this is not safe and/or does not make sense in many contexts, the runtime needs a system to disable interrupts for certain regions of codes.
+
+Currently, Cforall uses _[kernel] thread-local storage_(TLS) to handle this, setting a flag to false when preemption should be disabled. This works on x86/x64 but only with a specific TLS model, and does not work with ARM. The problem is that if the loading of the TLS variable is not done in a single instruction, it allows a race condition, where user-threads could disable preemption for the wrong processor, i.e., be moved to a different processor and update the previous processor.
+
+The fix being worked on is to protect the specific TLS variable with a special function.
+
+## The Proposal ##
+A better approach, would be to re-use the Exception Handling Data structure to identify regions of code that do not allow preemption. These regions of code would be marked using the same mechanism which marks stack unwinding requirements.
+
+When the signal handler is called, it would search the stack similarly to how the stack is searched when an exception is thrown and do the context switch or not based on the result.
+
+This is an optimization, since signal handlers for preemption are already rare and costly but enabling/disabling interrupts is very common (1000x more common). Using the "Zero-Cost" exception mechanism, enabling/disabling interrupts should be free at runtime and the rare signal/handler become more expensive.
Index: doc/proposals/function_type_change.md
===================================================================
--- doc/proposals/function_type_change.md	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/proposals/function_type_change.md	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,34 @@
+## Eliminate Variable Declarations in Function Type ##
+
+The parameters of a function had been living in the wrong place.
+
+As the function type has no relation with the actual declarations of the variables, but only the types of them, putting declarations in FunctionType is unnecessary.
+Meanwhile, in new-ast data model, the declaration nodes should be kept as unique as possible, since they semantically denote unique objects in the source code. Shared declarations often lead to undesirable behaviors, especially when weak references exist (reminder: currently weak references only point to declarations). They also pose difficulty for implementing correct _functional_ algorithms, as copying a declaration node _should_ always mean creating a new entity.
+In the programming language and type theory model, declarations are also never a part of function type; the functions `int f(int a)` and `int f(int b)` have the exact same type (int)->(int), and representing the type as (int a)->(int b) is misleading.
+
+
+## Summary of Changes ##
+
+- `ast::FunctionDecl`
+Now owns its parameter and return variables directly.
+
+- `ast::FunctionType`
+Parameter and return types are now pure types (no more decls)
+Forall clause is part of type information so it is still kept.
+
+- Unify.cc
+Renamed some functions to reflect the changes (decl -> type)
+
+- Convert.cpp
+Drop decls in function type, unless it is directly in function decl (move them to `FunctionDecl` params and returns)
+Add dummy variable decls while converting back.
+
+## Relevant Clean-up Work ##
+
+- CurrentObject.cpp
+No longer has weak references to type nodes and replaced by raw pointers. Using weak pointers do not accomplish anything since a non in-place mutation outside invalidates current iterator anyways and an in-place mutation outside is still seen by the iterator with just a raw pointer.
+
+- Validate.cc
+`EnumAndPointerDecay` is redundant in `resolveTypeof` and therefore dropped.
+Note: this pass needs some structural change to accommodate the new function type representation.
+
Index: doc/refrat/refrat.tex
===================================================================
--- doc/refrat/refrat.tex	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/refrat/refrat.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:52:25 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Wed Jan 31 17:30:23 2018
-%% Update Count     : 108
+%% Last Modified On : Mon Oct  5 09:02:53 2020
+%% Update Count     : 110
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -30,36 +30,10 @@
 \usepackage{upquote}									% switch curled `'" to straight
 \usepackage{calc}
-\usepackage{xspace}
 \usepackage{varioref}									% extended references
-\usepackage{listings}									% format program code
 \usepackage[flushmargin]{footmisc}						% support label/reference in footnote
 \usepackage{latexsym}                                   % \Box glyph
 \usepackage{mathptmx}                                   % better math font with "times"
 \usepackage[usenames]{color}
-\input{common}                                          % common CFA document macros
-\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
-\usepackage{breakurl}
-\renewcommand{\UrlFont}{\small\sf}
-
-\usepackage[pagewise]{lineno}
-\renewcommand{\linenumberfont}{\scriptsize\sffamily}
-\usepackage[firstpage]{draftwatermark}
-\SetWatermarkLightness{0.9}
-
-% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
-% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
-% AFTER HYPERREF.
-\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
-
-\setlength{\topmargin}{-0.45in}							% move running title into header
-\setlength{\headsep}{0.25in}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\CFAStyle												% use default CFA format-style
-\lstnewenvironment{C++}[1][]                            % use C++ style
-{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®}#1}}
-{}
-
+\newcommand{\CFALatin}{}
 % inline code ©...© (copyright symbol) emacs: C-q M-)
 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
@@ -69,9 +43,33 @@
 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
 % math escape $...$ (dollar symbol)
+\input{common}                                          % common CFA document macros
+\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
+\usepackage{breakurl}
+\renewcommand{\UrlFont}{\small\sf}
+
+\usepackage[pagewise]{lineno}
+\renewcommand{\linenumberfont}{\scriptsize\sffamily}
+\usepackage[firstpage]{draftwatermark}
+\SetWatermarkLightness{0.9}
+
+% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
+% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
+% AFTER HYPERREF.
+\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
+
+\setlength{\topmargin}{-0.45in}							% move running title into header
+\setlength{\headsep}{0.25in}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+\CFAStyle												% use default CFA format-style
+\lstnewenvironment{C++}[1][]                            % use C++ style
+{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}
+{}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 % Names used in the document.
-\newcommand{\Version}{\input{../../version}}
+\newcommand{\Version}{\input{build/version}}
 \newcommand{\Textbf}[2][red]{{\color{#1}{\textbf{#2}}}}
 \newcommand{\Emph}[2][red]{{\color{#1}\textbf{\emph{#2}}}}
Index: doc/theses/andrew_beach_MMath/glossaries.tex
===================================================================
--- doc/theses/andrew_beach_MMath/glossaries.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/andrew_beach_MMath/glossaries.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,36 @@
+% Variaus Glossary Definitions
+
+% Main glossary entries -- definitions of relevant terminology
+\newglossaryentry{computer}
+{
+name=computer,
+description={A programmable machine that receives input data,
+               stores and manipulates the data, and provides
+               formatted output}
+}
+
+% Nomenclature glossary entries -- New definitions, or unusual terminology
+\newglossary*{nomenclature}{Nomenclature}
+\newglossaryentry{dingledorf}
+{
+type=nomenclature,
+name=dingledorf,
+description={A person of supposed average intelligence who makes
+               incredibly brainless misjudgments}
+}
+
+% List of Abbreviations (abbreviations are from the glossaries-extra package)
+\newabbreviation{aaaaz}{AAAAZ}{American Association of Amature Astronomers
+                               and Zoologists}
+
+% List of Symbols
+\newglossary*{symbols}{List of Symbols}
+\newglossaryentry{rvec}
+{
+name={$\mathbf{v}$},
+sort={label},
+type=symbols,
+description={Random vector: a location in n-dimensional Cartesian space,
+             where each dimensional component is determined by a random
+             process}
+}
Index: doc/theses/andrew_beach_MMath/thesis.tex
===================================================================
--- doc/theses/andrew_beach_MMath/thesis.tex	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/theses/andrew_beach_MMath/thesis.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -34,37 +34,6 @@
 \usepackage[toc,abbreviations]{glossaries-extra}
 
-% Main glossary entries -- definitions of relevant terminology
-\newglossaryentry{computer}
-{
-name=computer,
-description={A programmable machine that receives input data,
-               stores and manipulates the data, and provides
-               formatted output}
-}
-
-% Nomenclature glossary entries -- New definitions, or unusual terminology
-\newglossary*{nomenclature}{Nomenclature}
-\newglossaryentry{dingledorf}
-{
-type=nomenclature,
-name=dingledorf,
-description={A person of supposed average intelligence who makes incredibly
-               brainless misjudgments}
-}
-
-% List of Abbreviations (abbreviations are from the glossaries-extra package)
-\newabbreviation{aaaaz}{AAAAZ}{American Association of Amature Astronomers
-               and Zoologists}
-
-% List of Symbols
-\newglossary*{symbols}{List of Symbols}
-\newglossaryentry{rvec}
-{
-name={$\mathbf{v}$},
-sort={label},
-type=symbols,
-description={Random vector: a location in n-dimensional Cartesian space, where
-               each dimensional component is determined by a random process}
-}
+% Define all the glossaries.
+\input{glossaries}
 
 % Generate the glossaries defined above.
Index: doc/theses/fangren_yu_COOP_S20/Makefile
===================================================================
--- doc/theses/fangren_yu_COOP_S20/Makefile	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/theses/fangren_yu_COOP_S20/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -46,5 +46,4 @@
 # File Dependencies #
 
-
 ${DOCUMENT} : ${BASE}.ps
 	ps2pdf $<
Index: doc/theses/fangren_yu_COOP_S20/Report.tex
===================================================================
--- doc/theses/fangren_yu_COOP_S20/Report.tex	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/theses/fangren_yu_COOP_S20/Report.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1,3 +1,3 @@
-\documentclass[twoside,12pt]{article}
+\documentclass[twoside,11pt]{article}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -11,10 +11,17 @@
 \usepackage[labelformat=simple,aboveskip=0pt,farskip=0pt]{subfig}
 \renewcommand{\thesubfigure}{\alph{subfigure})}
+\usepackage[flushmargin]{footmisc}						% support label/reference in footnote
 \usepackage{latexsym}                                   % \Box glyph
 \usepackage{mathptmx}                                   % better math font with "times"
+\usepackage[toc]{appendix}								% article does not have appendix
 \usepackage[usenames]{color}
 \input{common}                                          % common CFA document macros
 \usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
 \usepackage{breakurl}
+\urlstyle{sf}
+
+% reduce spacing
+\setlist[itemize]{topsep=5pt,parsep=0pt}% global
+\setlist[enumerate]{topsep=5pt,parsep=0pt}% global
 
 \usepackage[pagewise]{lineno}
@@ -26,4 +33,5 @@
 \renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
 \newcommand{\NOTE}{\textbf{NOTE}}
+\newcommand{\TODO}[1]{{\color{Purple}#1}}
 
 \setlength{\topmargin}{-0.45in}							% move running title into header
@@ -32,14 +40,10 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-\CFADefaults
+\CFAStyle												% CFA code-style for all languages
 \lstset{
-language=C++,											% make C++ the default language
-escapechar=\$,											% LaTeX escape in CFA code
-moredelim=**[is][\color{red}]{`}{`},
+language=C++,moredelim=**[is][\color{red}]{@}{@}		% make C++ the default language
 }% lstset
-\lstMakeShortInline@%
 \lstnewenvironment{C++}[1][]                            % use C++ style
-{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{`}{`},#1}}
-{}
+{\lstset{language=C++,moredelim=**[is][\color{red}]{@}{@}}\lstset{#1}}{}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -84,178 +88,158 @@
 \section{Overview}
 
-cfa-cc is the reference compiler for the \CFA programming language, which is a non-
-object-oriented extension to C.
-\CFA attempts to introduce productive modern programming language features to C
-while maintaining as much backward-compatibility as possible, so that most existing C
-programs can seamlessly work with \CFA.
-
-Since the \CFA project was dated back to the early 2000s, and only restarted in the past
-few years, there is a significant amount of legacy code in the current compiler codebase,
-with little proper documentation available. This becomes a difficulty while developing new
-features based on the previous implementations, and especially while diagnosing
-problems.
-
-Currently, the \CFA team is also facing another problem: bad compiler performance. For
-the development of a new programming language, writing a standard library is an
-important part. The incompetence of the compiler causes building the library files to take
-tens of minutes, making iterative development and testing almost impossible. There is
-ongoing effort to rewrite the core data structure of the compiler to overcome the
-performance issue, but many bugs may appear during the work, and lack of documentation
-makes debugging extremely difficult.
-
-This developer's reference will be continuously improved and eventually cover the
-compiler codebase. For now, the focus is mainly on the parts being rewritten, and also the
-performance bottleneck, namely the resolution algorithm. It is aimed to provide new
-developers to the project enough guidance and clarify the purposes and behavior of certain
-functions which are not mentioned in the previous \CFA research papers.
+@cfa-cc@ is the reference compiler for the \CFA programming language, which is a non-object-oriented extension to C.
+\CFA attempts to introduce productive modern programming language features to C while maintaining as much backward-compatibility as possible, so that most existing C programs can seamlessly work with \CFA.
+
+Since the \CFA project dates back to the early 2000s, and only restarted in the past few years, there is a significant amount of legacy code in the current compiler codebase with little documentation.
+The lack of documentation makes it difficult to develop new features from the current implementation and diagnose problems.
+
+Currently, the \CFA team is also facing poor compiler performance.
+For the development of a new programming language, writing standard libraries is an important component.
+The slow compiler causes building of the library files to take tens of minutes, making iterative development and testing almost impossible.
+There is an ongoing effort to rewrite the core data-structure of the compiler to overcome the performance issue, but many bugs have appeared during this work, and lack of documentation is hampering debugging.
+
+This developer's reference manual begins the documentation and should be continuously im\-proved until it eventually covers the entire compiler codebase.
+For now, the focus is mainly on the parts being rewritten, and also the primary performance bottleneck, namely the resolution algorithm.
+Its aimed is to provide new project developers with guidance in understanding the codebase, and clarify the purpose and behaviour of certain functions that are not mentioned in the previous \CFA research papers~\cite{Bilson03,Ditchfield92,Moss19}.
 
 
 \section{Compiler Framework}
 
+\CFA source code is first transformed into an abstract syntax tree (AST) by the parser before analyzed by the compiler.
+
+
 \subsection{AST Representation}
 
-Source code input is first transformed into abstract syntax tree (AST) representation by the
-parser before analyzed by the compiler.
-
-There are 4 major categories of AST nodes used by the compiler, along with some derived
-structures.
-
-\subsubsection{Declaration nodes}
+
+There are 4 major categories of AST nodes used by the compiler, along with some derived structures.
+
+\subsubsection{Declaration Nodes}
 
 A declaration node represents either of:
 \begin{itemize}
 \item
-Type declaration: struct, union, typedef or type parameter (see Appendix A.3)
-\item
-Variable declaration
-\item
-Function declaration
+type declaration: @struct@, @union@, @typedef@ or type parameter (see \VRef[Appendix]{s:KindsTypeParameters})
+\item
+variable declaration
+\item
+function declaration
 \end{itemize}
 Declarations are introduced by standard C declarations, with the usual scoping rules.
-In addition, declarations can also be introduced by the forall clause (which is the origin
-of \CFA's name):
+In addition, declarations can also be qualified by the \lstinline[language=CFA]@forall@ clause (which is the origin of \CFA's name):
 \begin{cfa}
-forall (<$\emph{TypeParameterList}$> | <$\emph{AssertionList}$>)
+forall ( <$\emph{TypeParameterList}$> | <$\emph{AssertionList}$> )
 	$\emph{declaration}$
 \end{cfa}
-Type parameters in \CFA are similar to \CC template type parameters. The \CFA
-declaration
+Type parameters in \CFA are similar to \CC template type parameters.
+The \CFA declaration
 \begin{cfa}
 forall (dtype T) ...
 \end{cfa}
-behaves similarly as the \CC template declaration
+behaves similarly to the \CC template declaration
 \begin{C++}
 template <typename T> ...
 \end{C++}
 
-Assertions are a distinctive feature of \CFA: contrary to the \CC template where
-arbitrary functions and operators can be used in a template definition, in a \CFA
-parametric function, operations on parameterized types must be declared in assertions.
-
+Assertions are a distinctive feature of \CFA, similar to \emph{interfaces} in D and Go, and \emph{traits} in Rust.
+Contrary to the \CC template where arbitrary functions and operators can be used in a template definition, in a \CFA parametric function, operations on parameterized types must be declared in assertions.
 Consider the following \CC template:
 \begin{C++}
-template <typename T> int foo(T t) {
-	return bar(t) + baz(t);
+@template@ forall<typename T> T foo( T t ) {
+	return t + t * t;
 }
 \end{C++}
-Unless bar and baz are also parametric functions taking any argument type, they must be
-declared in the assertions, or otherwise the code will not compile:
+where there are no explicit requirements on the type @T@.
+Therefore, the \CC compiler must deduce what operators are required during textual (macro) expansion of the template at each usage.
+As a result, templates cannot be compiled.
+\CFA assertions specify restrictions on type parameters:
 \begin{cfa}
-forall (dtype T | { int bar(T); int baz(t); }) int foo (T t) {
-	return bar(t) + baz(t);
+forall( dtype T | @{ T ?+?( T, T ); T ?*?( T, T ) }@ ) int foo ( T t ) {
+	return t + t * t;
 }
 \end{cfa}
-Assertions are written using the usual function declaration syntax. The scope of type
-parameters and assertions is the following declaration.
-
-\subsubsection{Type nodes}
-
-A type node represents the type of an object or expression.
-Named types reference the corresponding type declarations. The type of a function is its
-function pointer type (same as standard C).
-With the addition of type parameters, named types may contain a list of parameter values
-(actual parameter types).
-
-\subsubsection{Statement nodes}
-
-Statement nodes represent the statements in the program, including basic expression
-statements, control flows and blocks.
+Assertions are written using the usual \CFA function declaration syntax.
+Only types with operators ``@+@'' and ``@*@'' work with this function, and the function prototype is sufficient to allow separate compilation.
+
+Type parameters and assertions are used in the following compiler data-structures.
+
+
+\subsubsection{Type Nodes}
+
+Type nodes represent the type of an object or expression.
+Named types reference the corresponding type declarations.
+The type of a function is its function pointer type (same as standard C).
+With the addition of type parameters, named types may contain a list of parameter values (actual parameter types).
+
+
+\subsubsection{Statement Nodes}
+
+Statement nodes represent the executable statements in the program, including basic expression statements, control flows and blocks.
 Local declarations (within a block statement) are represented as declaration statements.
 
-\subsubsection{Expression nodes}
-
-Some expressions are represented differently in the compiler before and after resolution
-stage:
+
+\subsubsection{Expression Nodes}
+
+Some expressions are represented differently before and after the resolution stage:
 \begin{itemize}
 \item
-Name expressions: NameExpr pre-resolution, VariableExpr post-resolution
-\item
-Member expressions: UntypedMemberExpr pre-resolution, MemberExpr post-resolution
-\item
-Function call expressions (including overloadable operators): UntypedExpr pre-resolution, ApplicationExpr post-resolution
+Name expressions: @NameExpr@ pre-resolution, @VariableExpr@ post-resolution
+\item
+Member expressions: @UntypedMemberExpr@ pre-resolution, @MemberExpr@ post-resolution
+\item
+\begin{sloppypar}
+Function call expressions (including overloadable operators): @UntypedExpr@ pre-resolution, @ApplicationExpr@ post-resolution
+\end{sloppypar}
 \end{itemize}
-The pre-resolution representations contain only the symbols. Post-resolution results link
-them to the actual variable and function declarations.
+The pre-resolution representation contains only the symbols.
+Post-resolution links them to the actual variable and function declarations.
 
 
 \subsection{Compilation Passes}
 
-Compilation steps are implemented as passes, which follows a general structural recursion
-pattern on the syntax tree.
-
-The basic work flow of compilation passes follows preorder and postorder traversal on
-tree data structure, implemented with visitor pattern, and can be loosely described with
-the following pseudocode:
-\begin{C++}
-Pass::visit (node_t node) {
-	previsit(node);
-	if (visit_children)
+Compilation steps are implemented as passes, which follows a general structural recursion pattern on the syntax tree.
+
+The basic workflow of compilation passes follows preorder and postorder traversal on the AST data-structure, implemented with visitor pattern, and can be loosely described with the following pseudocode:
+\begin{C++}
+Pass::visit( node_t node ) {
+	previsit( node );
+	if ( visit_children )
 		for each child of node:
-			child.accept(this);
-	postvisit(node);
+			child.accept( this );
+	postvisit( node );
 }
 \end{C++}
-Operations in previsit() happen in preorder (top to bottom) and operations in
-postvisit() happen in postorder (bottom to top). The precise order of recursive
-operations on child nodes can be found in @Common/PassVisitor.impl.h@ (old) and
-@AST/Pass.impl.hpp@ (new).
-Implementations of compilation passes need to follow certain conventions:
+Operations in @previsit@ happen in preorder (top to bottom) and operations in @postvisit@ happen in postorder (bottom to top).
+The precise order of recursive operations on child nodes can be found in @Common/PassVisitor.impl.h@ (old) and @AST/Pass.impl.hpp@ (new).
+
+Implementations of compilation passes follow certain conventions:
 \begin{itemize}
 \item
-Passes \textbf{should not} directly override the visit method (Non-virtual Interface
-principle); if a pass desires different recursion behavior, it should set
-@visit_children@ to false and perform recursive calls manually within previsit or
-postvisit procedures. To enable this option, inherit from @WithShortCircuiting@ mixin.
-\item
-previsit may mutate the node but \textbf{must not} change the node type or return null.
-\item
-postvisit may mutate the node, reconstruct it to a different node type, or delete it by
-returning null.
+Passes \textbf{should not} directly override the visit method (Non-virtual Interface principle);
+if a pass desires different recursion behaviour, it should set @visit_children@ to false and perform recursive calls manually within previsit or postvisit procedures.
+To enable this option, inherit from the @WithShortCircuiting@ mixin.
+\item
+previsit may mutate the node but \textbf{must not} change the node type or return @nullptr@.
+\item
+postvisit may mutate the node, reconstruct it to a different node type, or delete it by returning @nullptr@.
 \item
 If the previsit or postvisit method is not defined for a node type, the step is skipped.
-If the return type is declared as void, the original node is returned by default. These
-behaviors are controlled by template specialization rules; see
-@Common/PassVisitor.proto.h@ (old) and @AST/Pass.proto.hpp@ (new) for details.
+If the return type is declared as @void@, the original node is returned by default.
+These behaviours are controlled by template specialization rules;
+see @Common/PassVisitor.proto.h@ (old) and @AST/@ @Pass.proto.hpp@ (new) for details.
 \end{itemize}
 Other useful mixin classes for compilation passes include:
 \begin{itemize}
 \item
-WithGuards allows saving values of variables and restore automatically upon exiting
-the current node.
-\item
-WithVisitorRef creates a wrapped entity of current pass (the actual argument
-passed to recursive calls internally) for explicit recursion, usually used together
-with WithShortCircuiting.
-\item
-WithSymbolTable gives a managed symbol table with built-in scoping rule handling
-(\eg on entering and exiting a block statement)
+@WithGuards@ allows saving and restoring variable values automatically upon entering/exiting the current node.
+\item
+@WithVisitorRef@ creates a wrapped entity for the current pass (the actual argument passed to recursive calls internally) for explicit recursion, usually used together with @WithShortCircuiting@.
+\item
+@WithSymbolTable@ gives a managed symbol table with built-in scoping-rule handling (\eg on entering and exiting a block statement)
 \end{itemize}
-\NOTE: If a pass extends the functionality of another existing pass, due to \CC overloading
-resolution rules, it \textbf{must} explicitly introduce the inherited previsit and postvisit procedures
-to its own scope, or otherwise they will not be picked up by template resolution:
+\NOTE: If a pass extends the functionality of another existing pass, due to \CC overloading resolution rules, it \textbf{must} explicitly introduce the inherited previsit and postvisit procedures to its own scope, or otherwise they are not picked up by template resolution:
 \begin{C++}
 class Pass2: public Pass1 {
-	using Pass1::previsit;
-	using Pass1::postvisit;
+	@using Pass1::previsit;@
+	@using Pass1::postvisit;@
 	// new procedures
 }
@@ -263,76 +247,74 @@
 
 
-\subsection{Data Structure Change WIP (new-ast)}
-
-It has been observed that excessive copying of syntax tree structures accounts for a
-majority of computation cost and significantly slows down the compiler. In the previous
-implementation of the syntax tree, every internal node has a unique parent; therefore all
-copies are required to duplicate everything down to the bottom. A new, experimental
-re-implementation of the syntax tree (source under directory AST/ hereby referred to as
-``new-ast'') attempts to overcome this issue with a functional approach that allows sharing
-of common sub-structures and only makes copies when necessary.
-
-The core of new-ast is a customized implementation of smart pointers, similar to
-@std::shared_ptr@ and @std::weak_ptr@ in \CC standard library. Reference counting is
-used to detect sharing and allows optimization. For a purely functional (a.k.a. immutable)
-data structure, all mutations are modelled by shallow copies along the path of mutation.
+\subsection{Data Structure Change (new-ast)}
+
+It has been observed that excessive copying of syntax tree structures accounts for a majority of computation cost and significantly slows down the compiler.
+In the previous implementation of the syntax tree, every internal node has a unique parent;
+therefore all copies are required to duplicate the entire subtree.
+A new, experimental re-implementation of the syntax tree (source under directory @AST/@ hereby referred to as ``new-ast'') attempts to overcome this issue with a functional approach that allows sharing of common sub-structures and only makes copies when necessary.
+
+The core of new-ast is a customized implementation of smart pointers, similar to @std::shared_ptr@ and @std::weak_ptr@ in the \CC standard library.
+Reference counting is used to detect sharing and allowing certain optimizations.
+For a purely functional (immutable) data-structure, all mutations are modelled by shallow copies along the path of mutation.
 With reference counting optimization, unique nodes are allowed to be mutated in place.
-This however, may potentially introduce some complications and bugs; a few issues are
-discussed near the end of this section.
-
-\subsubsection{Source: AST/Node.hpp}
-
-class @ast::Node@ is the base class of all new-ast node classes, which implements
-reference counting mechanism. Two different counters are recorded: ``strong'' reference
-count for number of nodes semantically owning it; ``weak'' reference count for number of
-nodes holding a mere reference and only need to observe changes.
-class @ast::ptr_base@ is the smart pointer implementation and also takes care of
-resource management.
-
-Direct access through the smart pointer is read-only. A mutable access should be obtained
-by calling shallowCopy or mutate as below.
-
-Currently, the weak pointers are only used to reference declaration nodes from a named
-type, or a variable expression. Since declaration nodes are intended to denote unique
-entities in the program, weak pointers always point to unique (unshared) nodes. This may
-change in the future, and weak references to shared nodes may introduce some problems;
+This however, may potentially introduce some complications and bugs;
+a few issues are discussed near the end of this section.
+
+
+\subsubsection{Source: \lstinline{AST/Node.hpp}}
+
+Class @ast::Node@ is the base class of all new-ast node classes, which implements reference counting mechanism.
+Two different counters are recorded: ``strong'' reference count for number of nodes semantically owning it;
+``weak'' reference count for number of nodes holding a mere reference and only need to observe changes.
+Class @ast::ptr_base@ is the smart pointer implementation and also takes care of resource management.
+
+Direct access through the smart pointer is read-only.
+A mutable access should be obtained by calling @shallowCopy@ or mutate as below.
+
+Currently, the weak pointers are only used to reference declaration nodes from a named type, or a variable expression.
+Since declaration nodes are intended to denote unique entities in the program, weak pointers always point to unique (unshared) nodes.
+This property may change in the future, and weak references to shared nodes may introduce some problems;
 see mutate function below.
 
-All node classes should always use smart pointers in the structure and should not use raw
-pointers.
-
+All node classes should always use smart pointers in structure definitions versus raw pointers.
+Function
 \begin{C++}
 void ast::Node::increment(ref_type ref)
 \end{C++}
-Increments this node's strong or weak reference count.
+increments this node's strong or weak reference count.
+Function
 \begin{C++}
 void ast::Node::decrement(ref_type ref, bool do_delete = true)
 \end{C++}
-Decrements this node's strong or weak reference count. If strong reference count reaches
-zero, the node is deleted by default.
-\NOTE: Setting @do_delete@ to false may result in a detached node. Subsequent code should
-manually delete the node or assign it to a strong pointer to prevent memory leak.
+decrements this node's strong or weak reference count.
+If strong reference count reaches zero, the node is deleted.
+\NOTE: Setting @do_delete@ to false may result in a detached node.
+Subsequent code should manually delete the node or assign it to a strong pointer to prevent memory leak.
+
 Reference counting functions are internally called by @ast::ptr_base@.
+Function
 \begin{C++}
 template<typename node_t>
 node_t * shallowCopy(const node_t * node)
 \end{C++}
-Returns a mutable, shallow copy of node: all child pointers are pointing to the same child
-nodes.
+returns a mutable, shallow copy of node: all child pointers are pointing to the same child nodes.
+Function
 \begin{C++}
 template<typename node_t>
 node_t * mutate(const node_t * node)
 \end{C++}
-If node is unique (strong reference count is 1), returns a mutable pointer to the same node.
-Otherwise, returns shallowCopy(node).
-It is an error to mutate a shared node that is weak-referenced. Currently this does not
-happen. The problem may appear once weak pointers to shared nodes (\eg expression
-nodes) are used; special care will be needed.
-
-\NOTE: This naive uniqueness check may not be sufficient in some cases. A discussion of the
-issue is presented at the end of this section.
+returns a mutable pointer to the same node, if the node is unique (strong reference count is 1);
+otherwise, it returns @shallowCopy(node)@.
+It is an error to mutate a shared node that is weak-referenced.
+Currently this does not happen.
+A problem may appear once weak pointers to shared nodes (\eg expression nodes) are used;
+special care is needed.
+
+\NOTE: This naive uniqueness check may not be sufficient in some cases.
+A discussion of the issue is presented at the end of this section.
+Functions
 \begin{C++}
 template<typename node_t, typename parent_t, typename field_t, typename assn_t>
-const node_t * mutate_field(const node_t * node, field_t parent_t::*field, assn_t && val)
+const node_t * mutate_field(const node_t * node, field_t parent_t::* field, assn_t && val)
 \end{C++}
 \begin{C++}
@@ -342,10 +324,10 @@
 		field_t && val)
 \end{C++}
-Helpers for mutating a field on a node using pointer to member (creates shallow copy
-when necessary).
-
-\subsubsection{Issue: Undetected sharing}
-
-The @mutate@ behavior described above has a problem: deeper shared nodes may be
+are helpers for mutating a field on a node using pointer to a member function (creates shallow copy when necessary).
+
+
+\subsubsection{Issue: Undetected Sharing}
+
+The @mutate@ behaviour described above has a problem: deeper shared nodes may be
 mistakenly considered as unique. \VRef[Figure]{f:DeepNodeSharing} shows how the problem could arise:
 \begin{figure}
@@ -355,71 +337,59 @@
 \label{f:DeepNodeSharing}
 \end{figure}
-Suppose that we are working on the tree rooted at P1, which
-is logically the chain P1-A-B and P2 is irrelevant, and then
-mutate(B) is called. The algorithm considers B as unique since
-it is only directly owned by A. However, the other tree P2-A-B
-indirectly shares the node B and is therefore wrongly mutated.
-
-To partly address this problem, if the mutation is called higher up the tree, a chain
-mutation helper can be used:
-
-\subsubsection{Source: AST/Chain.hpp}
-
+Given the tree rooted at P1, which is logically the chain P1-A-B, and P2 is irrelevant, assume @mutate(B)@ is called.
+The algorithm considers B as unique since it is only directly owned by A.
+However, the other tree P2-A-B indirectly shares the node B and is therefore wrongly mutated.
+
+To partly address this problem, if the mutation is called higher up the tree, a chain mutation helper can be used.
+
+\subsubsection{Source: \lstinline{AST/Chain.hpp}}
+
+Function
 \begin{C++}
 template<typename node_t, Node::ref_type ref_t>
 auto chain_mutate(ptr_base<node_t, ref_t> & base)
 \end{C++}
-This function returns a chain mutator handle which takes pointer-to-member to go down
-the tree while creating shallow copies as necessary; see @struct _chain_mutator@ in the
-source code for details.
-
-For example, in the above diagram, if mutation of B is wanted while at P1, the call using
-@chain_mutate@ looks like the following:
+returns a chain mutator handle that takes pointer-to-member to go down the tree, while creating shallow copies as necessary;
+see @struct _chain_mutator@ in the source code for details.
+
+For example, in the above diagram, if mutation of B is wanted while at P1, the call using @chain_mutate@ looks like the following:
 \begin{C++}
 chain_mutate(P1.a)(&A.b) = new_value_of_b;
 \end{C++}
-Note that if some node in chain mutate is shared (therefore shallow copied), it implies that
-every node further down will also be copied, thus correctly executing the functional
-mutation algorithm. This example code creates copies of both A and B and performs
-mutation on the new nodes, so that the other tree P2-A-B is untouched.
-However, if a pass traverses down to node B and performs mutation, for example, in
-@postvisit(B)@, information on sharing higher up is lost. Since the new-ast structure is only in
-experimental use with the resolver algorithm, which mostly rebuilds the tree bottom-up,
-this issue does not actually happen. It should be addressed in the future when other
-compilation passes are migrated to new-ast and many of them contain procedural
-mutations, where it might cause accidental mutations to other logically independent trees
-(\eg common sub-expression) and become a bug.
-
-
-\vspace*{20pt} % FIX ME, spacing problem with this heading ???
+\NOTE: if some node in chain mutate is shared (therefore shallow copied), it implies that every node further down is also copied, thus correctly executing the functional mutation algorithm.
+This example code creates copies of both A and B and performs mutation on the new nodes, so that the other tree P2-A-B is untouched.
+However, if a pass traverses down to node B and performs mutation, for example, in @postvisit(B)@, information on sharing higher up is lost.
+Since the new-ast structure is only in experimental use with the resolver algorithm, which mostly rebuilds the tree bottom-up, this issue does not actually happen.
+It should be addressed in the future when other compilation passes are migrated to new-ast and many of them contain procedural mutations, where it might cause accidental mutations to other logically independent trees (\eg common sub-expression) and become a bug.
+
+
 \section{Compiler Algorithm Documentation}
 
-This documentation currently covers most of the resolver, data structures used in variable
-and expression resolution, and a few directly related passes. Later passes involving code
-generation is not included yet; documentation for those will be done afterwards.
+This compiler algorithm documentation covers most of the resolver, data structures used in variable and expression resolution, and a few directly related passes.
+Later passes involving code generation are not included yet;
+documentation for those will be done latter.
+
 
 \subsection{Symbol Table}
 
-\NOTE: For historical reasons, the symbol table data structure was called ``indexer'' in the
-old implementation. Hereby we will be using the name SymbolTable everywhere.
-The symbol table stores a mapping from names to declarations and implements a similar
-name space separation rule, and the same scoping rules in standard C.\footnote{ISO/IEC 9899:1999, Sections 6.2.1 and 6.2.3} The difference in
-name space rule is that typedef aliases are no longer considered ordinary identifiers.
-In addition to C tag types (struct, union, enum), \CFA introduces another tag type, trait,
-which is a named collection of assertions.
-
-\subsubsection{Source: AST/SymbolTable.hpp}
-
-\subsubsection{Source: SymTab/Indexer.h}
-
+\NOTE: For historical reasons, the symbol-table data-structure is called @indexer@ in the old implementation.
+Hereby, the name is changed to @SymbolTable@.
+The symbol table stores a mapping from names to declarations, implements a similar name-space separation rule, and provides the same scoping rules as standard C.\footnote{ISO/IEC 9899:1999, Sections 6.2.1 and 6.2.3.}
+The difference in name-space rule is that @typedef@ aliases are no longer considered ordinary identifiers.
+In addition to C tag-types (@struct@, @union@, @enum@), \CFA introduces another tag type, @trait@, which is a named collection of assertions.
+
+
+\subsubsection{Source: \lstinline{AST/SymbolTable.hpp}}
+
+Function
 \begin{C++}
 SymbolTable::addId(const DeclWithType * decl)
 \end{C++}
-Since \CFA allows overloading of variables and functions, ordinary identifier names need
-to be mangled. The mangling scheme is closely based on the Itanium \CC ABI,\footnote{\url{https://itanium-cxx-abi.github.io/cxx-abi/abi.html}, Section 5.1} while
-making adaptations to \CFA specific features, mainly assertions and overloaded variables
-by type. Naming conflicts are handled by mangled names; lookup by name returns a list of
-declarations with the same literal identifier name.
-
+provides name mangling of identifiers, since \CFA allows overloading of variables and functions.
+The mangling scheme is closely based on the Itanium \CC ABI,\footnote{\url{https://itanium-cxx-abi.github.io/cxx-abi/abi.html}, Section 5.1} while making adaptations to \CFA specific features, mainly assertions and overloaded variables by type.
+
+Naming conflicts are handled by mangled names;
+lookup by name returns a list of declarations with the same identifier name.
+Functions
 \begin{C++}
 SymbolTable::addStruct(const StructDecl * decl)
@@ -428,176 +398,175 @@
 SymbolTable::addTrait(const TraitDecl * decl)
 \end{C++}
-Adds a tag type declaration to the symbol table.
+add a tag-type declaration to the symbol table.
+Function
 \begin{C++}
 SymbolTable::addType(const NamedTypeDecl * decl)
 \end{C++}
-Adds a typedef alias to the symbol table.
-
-\textbf{C Incompatibility Note}: Since Cforall allows using struct, union and enum type names
-without the keywords, typedef names and tag type names cannot be disambiguated by
-syntax rules. Currently the compiler puts them together and disallows collision. The
-following program is valid C but not valid Cforall:
+adds a @typedef@ alias to the symbol table.
+
+\textbf{C Incompatibility Note}: Since \CFA allows using @struct@, @union@ and @enum@ type-names without a prefix keyword, as in \CC, @typedef@ names and tag-type names cannot be disambiguated by syntax rules.
+Currently the compiler puts them together and disallows collision.
+The following program is valid C but invalid \CFA (and \CC):
 \begin{C++}
 struct A {};
+typedef int A; // gcc: ok, cfa: Cannot redefine typedef A
+struct A sa; // C disambiguates via struct prefix
+A ia;
+\end{C++}
+In practices, such usage is extremely rare, and hence, this change (as in \CC) has minimal impact on existing C programs.
+The declaration
+\begin{C++}
+struct A {};
+typedef struct A A; // A is an alias for struct A
+A a;
+struct A b;
+\end{C++}
+is not an error because the alias name is identical to the original.
+Finally, the following program is allowed in \CFA:
+\begin{C++}
 typedef int A;
-// gcc: ok, cfa: Cannot redefine typedef A
-\end{C++}
-In actual practices however, such usage is extremely rare, and typedef struct A A; is
-not considered an error, but silently discarded. Therefore, we expect this change to have
-minimal impact on existing C programs.
-Meanwhile, the following program is allowed in Cforall:
-\begin{C++}
-typedef int A;
-void A();
+void A(); // name mangled
 // gcc: A redeclared as different kind of symbol, cfa: ok
 \end{C++}
+because the function name is mangled.
+
 
 \subsection{Type Environment and Unification}
 
-The core of parametric type resolution algorithm.
-Type Environment organizes type parameters in \textbf{equivalent classes} and maps them to
-actual types. Unification is the algorithm that takes two (possibly parametric) types and
-parameter mappings and attempts to produce a common type by matching the type
-environments.
+The following core ideas underlie the parametric type-resolution algorithm.
+A type environment organizes type parameters into \textbf{equivalent classes} and maps them to actual types.
+Unification is the algorithm that takes two (possibly parametric) types and parameter mappings, and attempts to produce a common type by matching information in the type environments.
 
 The unification algorithm is recursive in nature and runs in two different modes internally:
 \begin{itemize}
 \item
-\textbf{Exact} unification mode requires equivalent parameters to match perfectly;
-\item
-\textbf{Inexact} unification mode allows equivalent parameters to be converted to a
-common type.
+Exact unification mode requires equivalent parameters to match perfectly.
+\item
+Inexact unification mode allows equivalent parameters to be converted to a common type.
 \end{itemize}
-For a pair of matching parameters (actually, their equivalent classes), if either side is open
-(not bound to a concrete type yet), they are simply combined.
-
-Within inexact mode, types are allowed to differ on their cv-qualifiers; additionally, if a
-type never appear either in parameter list or as the base type of a pointer, it may also be
-widened (i.e. safely converted). As Cforall currently does not implement subclassing similar
-to object-oriented languages, widening conversions are on primitive types only, for
-example the conversion from int to long.
-
-The need for two unification modes come from the fact that parametric types are
-considered compatible only if all parameters are exactly the same (not just compatible).
-Pointer types also behaves similarly; in fact, they may be viewed as a primitive kind of
-parametric types. @int*@ and @long*@ are different types, just like @vector(int)@ and
-@vector(long)@ are, for the parametric type @vector(T)@.
-
-The resolver should use the following ``@public@'' functions:\footnote{
-Actual code also tracks assertions on type parameters; those extra arguments are omitted here for
-conciseness.}
-
-
-\subsubsection{Source: ResolvExpr/Unify.cc}
-
-\begin{C++}
-bool unify(const Type *type1, const Type *type2, TypeEnvironment &env,
-OpenVarSet &openVars, const SymbolTable &symtab, Type *&commonType)
-\end{C++}
-Attempts to unify @type1@ and @type2@ with current type environment.
-
-If operation succeeds, @env@ is modified by combining the equivalence classes of matching
-parameters in @type1@ and @type2@, and their common type is written to commonType.
-
-If operation fails, returns false.
-\begin{C++}
-bool typesCompatible(const Type * type1, const Type * type2, const
-SymbolTable &symtab, const TypeEnvironment &env)
-bool typesCompatibleIgnoreQualifiers(const Type * type1, const Type *
-type2, const SymbolTable &symtab, const TypeEnvironment &env)
-\end{C++}
-
-Determines if type1 and type2 can possibly be the same type. The second version ignores
-the outermost cv-qualifiers if present.\footnote{
-In const \lstinline@int * const@, only the second \lstinline@const@ is ignored.}
-
-The call has no side effect.
-
-\NOTE: No attempts are made to widen the types (exact unification is used), although the
-function names may suggest otherwise. E.g. @typesCompatible(int, long)@ returns false.
+For a pair of matching parameters (actually, their equivalent classes), if either side is open (not bound to a concrete type yet), they are combined.
+
+Within the inexact mode, types are allowed to differ on their cv-qualifiers (\eg @const@, @volatile@, \etc);
+additionally, if a type never appear either in a parameter list or as the base type of a pointer, it may also be widened (\ie safely converted).
+As \CFA currently does not implement subclassing as in object-oriented languages, widening conversions are only on the primitive types, \eg conversion from @int@ to @long int@.
+
+The need for two unification modes comes from the fact that parametric types are considered compatible only if all parameters are exactly the same (not just compatible).
+Pointer types also behaves similarly;
+in fact, they may be viewed as a primitive kind of parametric types.
+@int *@ and @long *@ are different types, just like @vector(int)@ and @vector(long)@ are, for the parametric type @*(T)@ / @vector(T)@, respectively.
+
+The resolver uses the following @public@ functions:\footnote{
+Actual code also tracks assertions on type parameters; those extra arguments are omitted here for conciseness.}
+
+
+\subsubsection{Source: \lstinline{ResolvExpr/Unify.cc}}
+
+Function
+\begin{C++}
+bool unify(const Type * type1, const Type * type2, TypeEnvironment & env,
+	OpenVarSet & openVars, const SymbolTable & symtab, Type *& commonType)
+\end{C++}
+returns a boolean indicating if the unification succeeds or fails after attempting to unify @type1@ and @type2@ within current type environment.
+If the unify succeeds, @env@ is modified by combining the equivalence classes of matching parameters in @type1@ and @type2@, and their common type is written to @commonType@.
+If the unify fails, nothing changes.
+Functions
+\begin{C++}
+bool typesCompatible(const Type * type1, const Type * type2, const SymbolTable & symtab,
+	const TypeEnvironment & env)
+bool typesCompatibleIgnoreQualifiers(const Type * type1, const Type * type2,
+	const SymbolTable & symtab, const TypeEnvironment & env)
+\end{C++}
+return a boolean indicating if types @type1@ and @type2@ can possibly be the same type.
+The second version ignores the outermost cv-qualifiers if present.\footnote{
+In \lstinline@const int * const@, only the second \lstinline@const@ is ignored.}
+These function have no side effects.
+
+\NOTE: No attempt is made to widen the types (exact unification is used), although the function names may suggest otherwise, \eg @typesCompatible(int, long)@ returns false.
 
 
 \subsection{Expression Resolution}
 
-The design of the current version of expression resolver is outlined in the Ph.D. Thesis from
-Aaron Moss~\cite{Moss19}.
-
+The design of the current version of expression resolver is outlined in the Ph.D.\ thesis by Aaron Moss~\cite{Moss19}.
 A summary of the resolver algorithm for each expression type is presented below.
 
-All overloadable operators are modelled as function calls. For a function call,
-interpretations of the function and arguments are found recursively. Then the following
-steps produce a filtered list of valid interpretations:
+All overloadable operators are modelled as function calls.
+For a function call, interpretations of the function and arguments are found recursively.
+Then the following steps produce a filtered list of valid interpretations:
 \begin{enumerate}
 \item
-From all possible combinations of interpretations of the function and arguments,
-those where argument types may be converted to function parameter types are
-considered valid.
+From all possible combinations of interpretations of the function and arguments, those where argument types may be converted to function parameter types are considered valid.
 \item
 Valid interpretations with the minimum sum of argument costs are kept.
 \item
-Argument costs are then discarded; the actual cost for the function call expression is
-the sum of conversion costs from the argument types to parameter types.
-\item
-For each return type, the interpretations with satisfiable assertions are then sorted
-by actual cost computed in step 3. If for a given type, the minimum cost
-interpretations are not unique, it is said that for that return type the interpretation
-is ambiguous. If the minimum cost interpretation is unique but contains an
-ambiguous argument, it is also considered ambiguous.
+\label{p:argcost}
+Argument costs are then discarded; the actual cost for the function call expression is the sum of conversion costs from the argument types to parameter types.
+\item
+\label{p:returntype}
+For each return type, the interpretations with satisfiable assertions are then sorted by actual cost computed in step~\ref{p:argcost}.
+If for a given type, the minimum cost interpretations are not unique, that return type is ambiguous.
+If the minimum cost interpretation is unique but contains an ambiguous argument, it is also ambiguous.
 \end{enumerate}
-Therefore, for each return type, the resolver produces either of:
+Therefore, for each return type, the resolver produces:
 \begin{itemize}
 \item
-No alternatives
-\item
-A single valid alternative
-\item
-An ambiguous alternative
+no alternatives
+\item
+a single valid alternative
+\item
+an ambiguous alternative
 \end{itemize}
-Note that an ambiguous alternative may be discarded at the parent expressions because a
-different return type matches better for the parent expressions.
-
-The non-overloadable expressions in Cforall are: cast expressions, address-of (unary @&@)
-expressions, short-circuiting logical expressions (@&&@, @||@) and ternary conditional
-expression (@?:@).
-
-For a cast expression, the convertible argument types are kept. Then the result is selected
-by lowest argument cost, and further by lowest conversion cost to target type. If the lowest
-cost is still not unique, or an ambiguous argument interpretation is selected, the cast
-expression is ambiguous. In an expression statement, the top level expression is implicitly
-cast to void.
+\NOTE: an ambiguous alternative may be discarded at the parent expressions because a different return type matches better for the parent expressions.
+
+The \emph{non}-overloadable expressions in \CFA are: cast expressions, address-of (unary @&@) expressions, short-circuiting logical expressions (@&&@, @||@) and ternary conditional expression (@?:@).
+
+For a cast expression, the convertible argument types are kept.
+Then the result is selected by lowest argument cost, and further by lowest conversion cost to target type.
+If the lowest cost is still not unique or an ambiguous argument interpretation is selected, the cast expression is ambiguous.
+In an expression statement, the top level expression is implicitly cast to @void@.
 
 For an address-of expression, only lvalue results are kept and the minimum cost is selected.
 
-For logical expressions @&&@ and @||@, arguments are implicitly cast to bool, and follow the rule
-of cast expression as above.
-
-For the ternary conditional expression, the condition is implicitly cast to bool, and the
-branch expressions must have compatible types. Each pair of compatible branch
-expression types produce a possible interpretation, and the cost is defined as the sum of
-expression costs plus the sum of conversion costs to the common type.
-
-TODO: Write a specification for expression costs.
+For logical expressions @&&@ and @||@, arguments are implicitly cast to @bool@, and follow the rules fr cast expression above.
+
+For the ternary conditional expression, the condition is implicitly cast to @bool@, and the branch expressions must have compatible types.
+Each pair of compatible branch expression types produce a possible interpretation, and the cost is defined as the sum of the expression costs plus the sum of conversion costs to the common type.
+
+
+\subsection{Conversion and Application Cost}
+
+There were some unclear parts in the previous documentation in the cost system, as described in the Moss thesis~\cite{Moss19}, section 4.1.2.
+Some clarification are presented in this section.
+
+\begin{enumerate}
+\item
+Conversion to a type denoted by parameter may incur additional cost if the match is not exact.
+For example, if a function is declared to accept @(T, T)@ and receives @(int, long)@, @T@ is deducted @long@ and an additional widening conversion cost is added for @int@ to @T@.
+
+\item
+The specialization level of a function is the sum of the least depth of an appearance of a type parameter (counting pointers, references and parameterized types), plus the number of assertions.
+A higher specialization level is favoured if argument conversion costs are equal.
+
+\item
+Coercion of pointer types is only allowed in explicit cast expressions;
+the only allowed implicit pointer casts are adding qualifiers to the base type and cast to @void*@, and these counts as safe conversions.
+Note that implicit cast from @void *@ to other pointer types is no longer valid, as opposed to standard C. 
+\end{enumerate}
 
 
 \subsection{Assertion Satisfaction}
 
-The resolver tries to satisfy assertions on expressions only when it is needed: either while
-selecting from multiple alternatives of a same result type for a function call (step 4 of
-resolving function calls), or upon reaching the top level of an expression statement.
-
-Unsatisfiable alternatives are discarded. Satisfiable alternatives receive \textbf{implicit
-parameters}: in Cforall, parametric functions are designed such that they can be compiled
-separately, as opposed to \CC templates which are only compiled at instantiation. Given a
-parametric function definition:
+The resolver tries to satisfy assertions on expressions only when it is needed: either while selecting from multiple alternatives of a same result type for a function call (step \ref{p:returntype} of resolving function calls) or upon reaching the top level of an expression statement.
+
+Unsatisfiable alternatives are discarded.
+Satisfiable alternatives receive \textbf{implicit parameters}: in \CFA, parametric functions may be separately compiled, as opposed to \CC templates which are only compiled at instantiation.
+Given the parametric function-definition:
 \begin{C++}
 forall (otype T | {void foo(T);})
 void bar (T t) { foo(t); }
 \end{C++}
-The function bar does not know which @foo@ to call when compiled without knowing the call
-site, so it requests a function pointer to be passed as an extra argument. At the call site,
-implicit parameters are automatically inserted by the compiler.
-
-\textbf{TODO}: Explain how recursive assertion satisfaction and polymorphic recursion work.
-
+the function @bar@ does not know which @foo@ to call when compiled without knowing the call site, so it requests a function pointer to be passed as an extra argument.
+At the call site, implicit parameters are automatically inserted by the compiler.
+
+Implementation of implicit parameters is discussed in \VRef[Appendix]{s:ImplementationParametricFunctions}.
 
 \section{Tests}
@@ -605,27 +574,26 @@
 \subsection{Test Suites}
 
-Automatic test suites are located under the @tests/@ directory. A test case consists of an
-input CFA source file (name ending with @.cfa@), and an expected output file located
-in @.expect/@ directory relative to the source file, with the same file name ending with @.txt@.
-So a test named @tuple/tupleCast@ has the following files, for example:
+Automatic test suites are located under the @tests/@ directory.
+A test case consists of an input CFA source file (suffix @.cfa@), and an expected output file located in the @tests/.expect/@ directory, with the same file name ending with suffix @.txt@.
+For example, the test named @tests/tuple/tupleCast.cfa@ has the following files, for example:
 \begin{C++}
 tests/
-..     tuple/
-......     .expect/
-..........       tupleCast.txt
-......     tupleCast.cfa
-\end{C++}
-If compilation fails, the error output is compared to the expect file. If compilation succeeds,
-the built program is run and its output compared to the expect file.
-To run the tests, execute the test script @test.py@ under the @tests/@ directory, with a list of
-test names to be run, or @--all@ to run all tests. The test script reports test cases
-fail/success, compilation time and program run time.
+	tuple/
+		.expect/
+			tupleCast.txt
+		tupleCast.cfa
+\end{C++}
+If compilation fails, the error output is compared to the expect file.
+If the compilation succeeds but does not generate an executable, the compilation output is compared to the expect file.
+If the compilation succeeds and generates an executable, the executable is run and its output is compared to the expect file.
+To run the tests, execute the test script @test.py@ under the @tests/@ directory, with a list of test names to be run, or @--all@ (or @make all-tests@) to run all tests.
+The test script reports test cases fail/success, compilation time and program run time.
+To see all the options available for @test.py@ using the @--help@ option.
 
 
 \subsection{Performance Reports}
 
-To turn on performance reports, pass @-S@ flag to the compiler.
-
-3 kinds of performance reports are available:
+To turn on performance reports, pass the @-XCFA -S@ flag to the compiler.
+Three kinds of performance reports are available:
 \begin{enumerate}
 \item
@@ -639,6 +607,124 @@
 @Common/Stats/Counter.h@.
 \end{enumerate}
-It is suggested to run performance tests with optimized build (@g++@ flag @-O3@)
-
+It is suggested to run performance tests with optimization (@g++@ flag @-O3@).
+
+
+\appendix
+\section{Appendix}
+
+\subsection{Kinds of Type Parameters}
+\label{s:KindsTypeParameters}
+
+A type parameter in a @forall@ clause has 3 kinds:
+\begin{enumerate}[listparindent=0pt]
+\item
+@dtype@: any data type (built-in or user defined) that is not a concrete type.
+
+A non-concrete type is an incomplete type such as an opaque type or pointer/reference with an implicit (pointer) size and implicitly generated reference and dereference operations.
+\item
+@otype@: any data type (built-in or user defined) that is concrete type.
+
+A concrete type is a complete type, \ie types that can be used to create a variable, which also implicitly asserts the existence of default and copy constructors, assignment, and destructor\footnote{\CFA implements the same automatic resource management (RAII) semantics as \CC.}.
+% \item
+% @ftype@: any function type.
+% 
+% @ftype@ provides two purposes:
+% \begin{itemize}
+% \item
+% Differentiate function pointer from data pointer because (in theory) some systems have different sizes for these pointers.
+% \item
+% Disallow a function pointer to match an overloaded data pointer, since variables and functions can have the same names.
+% \end{itemize}
+
+\item
+@ttype@: tuple (variadic) type.
+
+Restricted to the type for the last parameter in a function, it provides a type-safe way to implement variadic functions.
+Note however, that it has certain restrictions, as described in the implementation section below.
+\end{enumerate}
+
+
+\subsection{GNU C Nested Functions}
+
+\CFA is designed to be mostly compatible with GNU C, an extension to ISO C99 and C11 standards. The \CFA compiler also implements some language features by GCC extensions, most notably nested functions.
+
+In ISO C, function definitions are not allowed to be nested. GCC allows nested functions with full lexical scoping. The following example is taken from GCC documentation\footnote{\url{https://gcc.gnu.org/onlinedocs/gcc/Nested-Functions.html}}:
+\begin{C++}
+void bar( int * array, int offset, int size ) {
+	int access( int * array, int index ) { return array[index + offset]; }
+	int i;
+	/* ... */
+	for ( i = 0; i < size; i++ )
+		/* ... */ access (array, i) /* ... */
+}
+\end{C++}
+GCC nested functions behave identically to \CC lambda functions with default by-reference capture (stack-allocated, lifetime ends upon exiting the declared block), while also possible to be passed as arguments with standard function pointer types.
+
+
+\subsection{Implementation of Parametric Functions}
+\label{s:ImplementationParametricFunctions}
+
+\CFA implements parametric functions using the implicit parameter approach: required assertions are passed to the callee by function pointers;
+size of a parametric type must also be known if referenced directly (\ie not as a pointer). 
+
+The implementation is similar to the one from Scala\footnote{\url{https://www.scala-lang.org/files/archive/spec/2.13/07-implicits.html}}, with some notable differences in resolution:
+\begin{enumerate}
+\item
+All types, variables, and functions are candidates of implicit parameters
+\item
+The parameter (assertion) name must match the actual declarations.
+\end{enumerate}
+
+For example, the \CFA function declaration
+\begin{cfa}
+forall( otype T | { int foo( T, int ); } )
+int bar(T);
+\end{cfa}
+after implicit parameter expansion, has the actual signature\footnote{\textbf{otype} also requires the type to have constructor and destructor, which are the first two function pointers preceding the one for \textbf{foo}.}
+\begin{C++}
+int bar( T, size_t, void (*)(T&), void (*)(T&), int (*)(T, int) );
+\end{C++}
+The implicit parameter approach has an apparent issue: when the satisfying declaration is also parametric, it may require its own implicit parameters too.
+That also causes the supplied implicit parameter to have a different \textbf{actual} type than the \textbf{nominal} type, so it cannot be passed directly.
+Therefore, a wrapper with matching actual type must be created, and it is here where GCC nested functions are used internally by the compiler.
+
+Consider the following program:
+\begin{cfa}
+int assertion(int);
+
+forall( otype T | { int assertion(T); } )
+void foo(T);
+
+forall(otype T | { void foo(T); } )
+void bar(T t) {
+	foo(t);
+}
+\end{cfa}
+The \CFA compiler translates the program to non-parametric form\footnote{In the final code output, \lstinline@T@ needs to be replaced by an opaque type, and arguments must be accessed by a frame pointer offset table, due to the unknown sizes. The presented code here is simplified for better understanding.}
+\begin{C++}
+// ctor, dtor and size arguments are omitted
+void foo(T, int (*)(T));
+
+void bar(T t, void (*foo)(T)) {
+	foo(t);
+}
+\end{C++}
+However, when @bar(1)@ is called, @foo@ cannot be directly provided as an argument:
+\begin{C++}
+bar(1, foo); // WRONG: foo has different actual type
+\end{C++}
+and an additional step is required:
+\begin{C++}
+{
+	void _foo_wrapper(int t) {
+		foo( t, assertion );
+	}
+	bar( 1, _foo_wrapper );
+}
+\end{C++}
+Nested assertions and implicit parameter creation may continue indefinitely.
+This issue is a limitation of implicit parameter implementation.
+In particular, polymorphic variadic recursion must be structural (\ie the number of arguments decreases in any possible recursive calls), otherwise code generation gets into an infinite loop.
+The \CFA compiler sets a limit on assertion depth and reports an error if assertion resolution does not terminate within the limit (as for \lstinline[language=C++]@templates@ in \CC).
 
 \bibliographystyle{plain}
Index: c/theses/thierry_delisle_PhD/code/Makefile
===================================================================
--- doc/theses/thierry_delisle_PhD/code/Makefile	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,22 +1,0 @@
-
-
-CXXFLAGS = -O3 -g -Wall -Wextra -std=c++17
-LDFLAGS = -pthread -latomic
-
-push:
-	clang++ relaxed_list.cpp -g -Wall -Wextra -std=c++17 -fsyntax-only &&  rsync -av relaxed_list.cpp relaxed_list.hpp utils.hpp assert.hpp scale.sh plg7b:~/workspace/sched/.
-
-relaxed_list: $(firstword $(MAKEFILE_LIST)) | build
-	clang++ relaxed_list.cpp $(CXXFLAGS) $(LDFLAGS) -lpng -MMD -MF build/$(@).d -o $(@)
-
--include build/relaxed_list.d
-
-layout.ast: $(firstword $(MAKEFILE_LIST)) | build
-	clang++ relaxed_list_layout.cpp $(CXXFLAGS) -MMD -MF build/$(@).d -MT $(@) -E -o build/$(@).ii
-	clang++ -Xclang -fdump-record-layouts -fsyntax-only $(CXXFLAGS) build/$(@).ii > build/layout.ast.raw
-	cat build/$(@).raw > $(@)
-
--include build/layout.ast.d
-
-build:
-	mkdir -p build
Index: c/theses/thierry_delisle_PhD/code/assert.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/assert.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,22 +1,0 @@
-#pragma once
-
-#ifndef NDEBUG
-#include <cassert>
-#include <cstdlib>
-
-#define sstr(s) #s
-#define xstr(s) sstr(s)
-
-extern const char * __my_progname;
-
-#define assertf(cond, ...) ({             \
-	if(!(cond)) {                       \
-		fprintf(stderr, "%s: " __FILE__ ":" xstr(__LINE__) ": %s: Assertion '" xstr(cond) "' failed.\n", __my_progname, __PRETTY_FUNCTION__); \
-		fprintf(stderr, __VA_ARGS__); \
-		fprintf(stderr, "\n"); \
-		std::abort();                 \
-	}                                   \
-})
-#else
-#define assertf(cond, ...)
-#endif
Index: c/theses/thierry_delisle_PhD/code/bitbench/select.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/bitbench/select.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,186 +1,0 @@
-
-#include "../utils.hpp"
-
-void consume(int i, int j) __attribute__((noinline));
-void consume(int i, int j) {
-	asm volatile("":: "rm" (i), "rm" (i) );
-}
-
-static inline unsigned rand_bit_sw(unsigned rnum, size_t mask) {
-	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
-	uint64_t v = mask;   // Input value to find position with rank r.
-	unsigned int r = bit + 1;// Input: bit's desired rank [1-64].
-	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
-	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
-	unsigned int t;      // Bit count temporary.
-
-	// Do a normal parallel bit count for a 64-bit integer,
-	// but store all intermediate steps.
-	a =  v - ((v >> 1) & ~0UL/3);
-	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
-	c = (b + (b >> 4)) & ~0UL/0x11;
-	d = (c + (c >> 8)) & ~0UL/0x101;
-
-
-	t = (d >> 32) + (d >> 48);
-	// Now do branchless select!
-	s  = 64;
-	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
-	t  = (d >> (s - 16)) & 0xff;
-	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
-	t  = (c >> (s - 8)) & 0xf;
-	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
-	t  = (b >> (s - 4)) & 0x7;
-	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
-	t  = (a >> (s - 2)) & 0x3;
-	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
-	t  = (v >> (s - 1)) & 0x1;
-	s -= ((t - r) & 256) >> 8;
-	return s - 1;
-}
-
-static inline unsigned rand_bit_hw(unsigned rnum, size_t mask) {
-	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
-	uint64_t picked = _pdep_u64(1ul << bit, mask);
-	return picked ? __builtin_ctzl(picked) : 0;
-}
-
-struct TLS {
-	Random rng = { 6 };
-} tls;
-
-const unsigned numLists = 64;
-
-static inline void blind() {
-	int i = tls.rng.next() % numLists;
-	int j = tls.rng.next() % numLists;
-
-	consume(i, j);
-}
-
-std::atomic_size_t list_mask[7];
-static inline void bitmask_sw() {
-	unsigned i, j;
-	{
-		// Pick two lists at random
-		unsigned num = ((numLists - 1) >> 6) + 1;
-
-		unsigned ri = tls.rng.next();
-		unsigned rj = tls.rng.next();
-
-		unsigned wdxi = (ri >> 6u) % num;
-		unsigned wdxj = (rj >> 6u) % num;
-
-		size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
-		size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
-
-		unsigned bi = rand_bit_sw(ri, maski);
-		unsigned bj = rand_bit_sw(rj, maskj);
-
-		i = bi | (wdxi << 6);
-		j = bj | (wdxj << 6);
-	}
-
-	consume(i, j);
-}
-
-static inline void bitmask_hw() {
-	#if !defined(__BMI2__)
-		#warning NO bmi2 for pdep rand_bit
-		return;
-	#endif
-	unsigned i, j;
-	{
-		// Pick two lists at random
-		unsigned num = ((numLists - 1) >> 6) + 1;
-
-		unsigned ri = tls.rng.next();
-		unsigned rj = tls.rng.next();
-
-		unsigned wdxi = (ri >> 6u) % num;
-		unsigned wdxj = (rj >> 6u) % num;
-
-		size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
-		size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
-
-		unsigned bi = rand_bit_hw(ri, maski);
-		unsigned bj = rand_bit_hw(rj, maskj);
-
-		i = bi | (wdxi << 6);
-		j = bj | (wdxj << 6);
-	}
-
-	consume(i, j);
-}
-
-struct {
-	const unsigned mask = 7;
-	const unsigned depth = 3;
-	const uint64_t indexes = 0x0706050403020100;
-	uint64_t masks( unsigned node ) {
-		return 0xff00ffff00ff;
-	}
-} snzm;
-static inline void sparsemask() {
-	#if !defined(__BMI2__)
-		#warning NO bmi2 for sparse mask
-		return;
-	#endif
-	unsigned i, j;
-	{
-		// Pick two random number
-		unsigned ri = tls.rng.next();
-		unsigned rj = tls.rng.next();
-
-		// Pick two nodes from it
-		unsigned wdxi = ri & snzm.mask;
-		unsigned wdxj = rj & snzm.mask;
-
-		// Get the masks from the nodes
-		size_t maski = snzm.masks(wdxi);
-		size_t maskj = snzm.masks(wdxj);
-
-		uint64_t idxsi = _pext_u64(snzm.indexes, maski);
-		uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
-
-		auto pi = __builtin_popcountll(maski);
-		auto pj = __builtin_popcountll(maskj);
-
-		ri = pi ? ri & ((pi >> 3) - 1) : 0;
-		rj = pj ? rj & ((pj >> 3) - 1) : 0;
-
-		unsigned bi = (idxsi >> (ri << 3)) & 0xff;
-		unsigned bj = (idxsj >> (rj << 3)) & 0xff;
-
-		i = (bi << snzm.depth) | wdxi;
-		j = (bj << snzm.depth) | wdxj;
-	}
-
-	consume(i, j);
-}
-
-template<typename T>
-void benchmark( T func, const std::string & name ) {
-	std::cout << "Starting " << name << std::endl;
-	auto before = Clock::now();
-	const int N = 250'000'000;
-	for(int i = 0; i < N; i++) {
-		func();
-	}
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	double duration = durr.count();
-	std::cout << "Duration(s) : " << duration << std::endl;
-	std::cout << "Ops/sec     : " << uint64_t(N / duration) << std::endl;
-	std::cout << "ns/Op       : " << double(duration * 1'000'000'000.0 / N) << std::endl;
-	std::cout << std::endl;
-}
-
-int main() {
-	std::cout.imbue(std::locale(""));
-
-	benchmark(blind, "Blind guess");
-	benchmark(bitmask_sw, "Dense bitmask");
-	benchmark(bitmask_hw, "Dense bitmask with Parallel Deposit");
-	benchmark(sparsemask, "Parallel Extract bitmask");
-}
Index: c/theses/thierry_delisle_PhD/code/bts.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/bts.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,279 +1,0 @@
-#include <array>
-#include <iomanip>
-#include <iostream>
-#include <locale>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/sysinfo.h>
-
-#include "utils.hpp"
-
-// ================================================================================================
-//                        UTILS
-// ================================================================================================
-
-struct local_stat_t {
-	size_t cnt = 0;
-};
-
-struct global_stat_t {
-	std::atomic_size_t cnt = { 0 };
-};
-
-void atomic_max(std::atomic_size_t & target, size_t value) {
-	for(;;) {
-		size_t expect = target.load(std::memory_order_relaxed);
-		if(value <= expect) return;
-		bool success = target.compare_exchange_strong(expect, value);
-		if(success) return;
-	}
-}
-
-void atomic_min(std::atomic_size_t & target, size_t value) {
-	for(;;) {
-		size_t expect = target.load(std::memory_order_relaxed);
-		if(value >= expect) return;
-		bool success = target.compare_exchange_strong(expect, value);
-		if(success) return;
-	}
-}
-
-void tally_stats(global_stat_t & global, local_stat_t & local) {
-	global.cnt   += local.cnt;
-}
-
-void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) {
-	std::cout << "Starting" << std::endl;
-	auto before = Clock::now();
-	barrier.wait(0);
-
-	while(true) {
-		usleep(100000);
-		auto now = Clock::now();
-		duration_t durr = now - before;
-		if( durr.count() > duration ) {
-			done = true;
-			break;
-		}
-		std::cout << "\r" << std::setprecision(4) << durr.count();
-		std::cout.flush();
-	}
-
-	barrier.wait(0);
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	duration = durr.count();
-	std::cout << "\rClosing down" << std::endl;
-}
-
-void waitfor(double & duration, barrier_t & barrier, const std::atomic_size_t & count) {
-	std::cout << "Starting" << std::endl;
-	auto before = Clock::now();
-	barrier.wait(0);
-
-	while(true) {
-		usleep(100000);
-		size_t c = count.load();
-		if( c == 0 ) {
-			break;
-		}
-		std::cout << "\r" << c;
-		std::cout.flush();
-	}
-
-	barrier.wait(0);
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	duration = durr.count();
-	std::cout << "\rClosing down" << std::endl;
-}
-
-void print_stats(double duration, unsigned nthread, global_stat_t & global) {
-	std::cout << "Done" << std::endl;
-
-	size_t ops = global.cnt;
-	size_t ops_sec = size_t(double(ops) / duration);
-	size_t ops_thread = ops_sec / nthread;
-	auto dur_nano = duration_cast<std::nano>(1.0);
-
-	std::cout << "Duration      : " << duration << "s\n";
-	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
-	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
-	std::cout << "Ops/sec       : " << ops_sec << "\n";
-	std::cout << "Total ops     : " << ops << "\n";
-}
-
-static inline bool bts(std::atomic_size_t & target, size_t bit ) {
-	/*
-	int result = 0;
-	asm volatile(
-		"LOCK btsq %[bit], %[target]\n\t"
-		:"=@ccc" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
- 	return result != 0;
-	/*/
-	size_t mask = 1ul << bit;
-	size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
-	return (ret & mask) != 0;
-	//*/
-}
-
-static inline bool btr(std::atomic_size_t & target, size_t bit ) {
-	/*
-	int result = 0;
-	asm volatile(
-		"LOCK btrq %[bit], %[target]\n\t"
-		:"=@ccc" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
- 	return result != 0;
-	/*/
-	size_t mask = 1ul << bit;
-	size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
-	return (ret & mask) != 0;
-	//*/
-}
-
-// ================================================================================================
-//                        EXPERIMENTS
-// ================================================================================================
-
-// ================================================================================================
-__attribute__((noinline)) void runPingPong_body(
-	std::atomic<bool>& done,
-	local_stat_t & local,
-	std::atomic_size_t & target,
-	size_t id
-) {
-	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
-
-		bool ret;
-		ret = bts(target, id);
-		assert(!ret);
-
-		// -----
-
-		ret = btr(target, id);
-		assert(ret);
-		local.cnt++;
-	}
-}
-
-void run(unsigned nthread, double duration) {
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Data to check everything is OK
-	global_stat_t global;
-
-	// Flag to signal termination
-	std::atomic_bool done  = { false };
-
-	std::cout << "Initializing ";
-	// List being tested
-	std::atomic_size_t word = { 0 };
-	{
-		std::thread * threads[nthread];
-		unsigned i = 1;
-		for(auto & t : threads) {
-			t = new std::thread([&done, &word, &barrier, &global](unsigned tid) {
-				local_stat_t local;
-
-				// affinity(tid);
-
-				barrier.wait(tid);
-
-				// EXPERIMENT START
-
-				runPingPong_body(done, local, word, tid - 1);
-
-				// EXPERIMENT END
-
-				barrier.wait(tid);
-
-				tally_stats(global, local);
-			}, i++);
-		}
-
-		waitfor(duration, barrier, done);
-
-		for(auto t : threads) {
-			t->join();
-			delete t;
-		}
-	}
-
-	print_stats(duration, nthread, global);
-}
-
-// ================================================================================================
-
-int main(int argc, char * argv[]) {
-
-	double duration   = 5.0;
-	unsigned nthreads = 2;
-
-	std::cout.imbue(std::locale(""));
-
-	for(;;) {
-		static struct option options[] = {
-			{"duration",  required_argument, 0, 'd'},
-			{"nthreads",  required_argument, 0, 't'},
-			{0, 0, 0, 0}
-		};
-
-		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:t:", options, &idx);
-
-		std::string arg = optarg ? optarg : "";
-		size_t len = 0;
-		switch(opt) {
-			case -1:
-				if(optind != argc) {
-					std::cerr << "Too many arguments " << argc << " " << idx << std::endl;
-					goto usage;
-				}
-				goto run;
-			// Numeric Arguments
-			case 'd':
-				try {
-					duration = std::stod(optarg, &len);
-					if(len != arg.size()) { throw std::invalid_argument(""); }
-				} catch(std::invalid_argument &) {
-					std::cerr << "Duration must be a valid double, was " << arg << std::endl;
-					goto usage;
-				}
-				break;
-			case 't':
-				try {
-					nthreads = std::stoul(optarg, &len);
-					if(len != arg.size() || nthreads > (8 * sizeof(size_t))) { throw std::invalid_argument(""); }
-				} catch(std::invalid_argument &) {
-					std::cerr << "Number of threads must be a positive integer less than or equal to " << sizeof(size_t) * 8 << ", was " << arg << std::endl;
-					goto usage;
-				}
-				break;
-			// Other cases
-			default: /* ? */
-				std::cerr << opt << std::endl;
-			usage:
-				std::cerr << "Usage: " << argv[0] << ": [options]" << std::endl;
-				std::cerr << std::endl;
-				std::cerr << "  -d, --duration=DURATION  Duration of the experiment, in seconds" << std::endl;
-				std::cerr << "  -t, --nthreads=NTHREADS  Number of kernel threads" << std::endl;
-				std::exit(1);
-		}
-	}
-	run:
-
-	check_cache_line_size();
-
-	std::cout << "Running " << nthreads << " threads for " << duration << " seconds" << std::endl;
-	run(nthreads, duration);
-	return 0;
-}
Index: c/theses/thierry_delisle_PhD/code/bts_test.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/bts_test.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,32 +1,0 @@
-#include <cassert>
-#include <iostream>
-
-bool bts(volatile size_t & target, size_t bit ) {
-	bool result = false;
-	asm volatile(
-		"LOCK btsq %[bit], %[target]\n\t"
-		:"=c" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
- 	return result;
-}
-
-bool btr(volatile size_t & target, size_t bit ) {
-	bool result = false;
-	asm volatile(
-		"LOCK btrq %[bit], %[target]\n\t"
-		:"=c" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
- 	return result;
-}
-
-int main() {
-	volatile size_t i = 0;
-	std::cout << std::hex << i << std::endl;
-	assert(bts(i, 31));
-	std::cout << std::hex << i << std::endl;
-	assert(btr(i, 31));
-	std::cout << std::hex << i << std::endl;
-	return 0;
-}
Index: c/theses/thierry_delisle_PhD/code/links.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/links.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,122 +1,0 @@
-#pragma once
-
-#include "assert.hpp"
-#include "utils.hpp"
-
-template<typename node_t>
-struct _LinksFields_t {
-	node_t * prev = nullptr;
-	node_t * next = nullptr;
-	volatile unsigned long long ts = 0;
-	unsigned hint = (unsigned)-1;
-};
-
-template<typename node_t>
-class __attribute__((aligned(128))) intrusive_queue_t {
-public:
-	typedef spinlock_t lock_t;
-
-	struct stat {
-		ssize_t diff = 0;
-		size_t  push = 0;
-		size_t  pop  = 0;
-	};
-
-private:
-	struct sentinel_t {
-		_LinksFields_t<node_t> _links;
-	};
-
-public:
-	lock_t lock;
-
-private:
-	sentinel_t before;
-	sentinel_t after;
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Winvalid-offsetof"
-	static constexpr auto fields_offset = offsetof( node_t, _links );
-#pragma GCC diagnostic pop
-public:
-	intrusive_queue_t()
-		: before{{ nullptr, tail() }}
-		, after {{ head(), nullptr }}
-	{
-		/* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
-		/* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
-		/* paranoid */ assert(head()->_links.prev == nullptr);
-		/* paranoid */ assert(head()->_links.next == tail() );
-		/* paranoid */ assert(tail()->_links.next == nullptr);
-		/* paranoid */ assert(tail()->_links.prev == head() );
-		/* paranoid */ assert(sizeof(*this) == 128);
-		/* paranoid */ assert((intptr_t(this) % 128) == 0);
-	}
-
-	~intrusive_queue_t() = default;
-
-	inline node_t * head() const {
-		node_t * rhead = reinterpret_cast<node_t *>(
-			reinterpret_cast<uintptr_t>( &before ) - fields_offset
-		);
-		assert(rhead);
-		return rhead;
-	}
-
-	inline node_t * tail() const {
-		node_t * rtail = reinterpret_cast<node_t *>(
-			reinterpret_cast<uintptr_t>( &after ) - fields_offset
-		);
-		assert(rtail);
-		return rtail;
-	}
-
-	inline bool push(node_t * node) {
-		assert(lock);
-		assert(node->_links.ts != 0);
-		node_t * tail = this->tail();
-
-		node_t * prev = tail->_links.prev;
-		// assertf(node->_links.ts >= prev->_links.ts,
-		// 	"New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
-		node->_links.next = tail;
-		node->_links.prev = prev;
-		prev->_links.next = node;
-		tail->_links.prev = node;
-
-		if(before._links.ts == 0l) {
-			before._links.ts = node->_links.ts;
-			assert(node->_links.prev == this->head());
-			return true;
-		}
-		return false;
-	}
-
-	inline std::pair<node_t *, bool> pop() {
-		assert(lock);
-		node_t * head = this->head();
-		node_t * tail = this->tail();
-
-		node_t * node = head->_links.next;
-		node_t * next = node->_links.next;
-		if(node == tail) return {nullptr, false};
-
-		head->_links.next = next;
-		next->_links.prev = head;
-
-		if(next == tail) {
-			before._links.ts = 0l;
-			return {node, true};
-		}
-		else {
-			assert(next->_links.ts != 0);
-			before._links.ts = next->_links.ts;
-			assert(before._links.ts != 0);
-			return {node, false};
-		}
-	}
-
-	long long ts() const {
-		return before._links.ts;
-	}
-};
Index: c/theses/thierry_delisle_PhD/code/prefetch.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/prefetch.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,106 +1,0 @@
-#include <algorithm>
-#include <array>
-#include <chrono>
-#include <iostream>
-#include <locale>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include <cassert>
-
-struct __attribute__((aligned(64))) element {
-	size_t value;
-};
-
-using block = std::array<element, 100>;
-
-block * create() {
-	block * b = new block();
-	for(auto & e : *b) {
-		e.value = rand();
-	}
-	b->back().value = b->size();
-
-	return b;
-}
-
-static inline size_t find(const block & b) {
-	size_t r = 0;
-	for(; r < b.size(); r++) {
-		if(__builtin_expect(b[r].value == b.size(), false)) break;
-	}
-
-	return r;
-}
-
-void usage(char * argv[]) {
-	std::cerr << argv[0] << ": [DURATION (FLOAT:SEC)] [NBLOCKS]" << std::endl;;
-	std::exit(1);
-}
-
-int main(int argc, char * argv[]) {
-	size_t nblocks = 1000;
-	double duration = 5;
-
-	std::cout.imbue(std::locale(""));
-
-	switch (argc)
-	{
-	case 3:
-		nblocks = std::stoul(argv[2]);
-		[[fallthrough]];
-	case 2:
-		duration = std::stod(argv[1]);
-		if( duration <= 0.0 ) {
-			std::cerr << "Duration must be positive, was " << argv[1] << "(" << duration << ")" << std::endl;
-			usage(argv);
-		}
-		[[fallthrough]];
-	case 1:
-		break;
-	default:
-		usage(argv);
-		break;
-	}
-
-	std::vector<std::unique_ptr<block>> blocks;
-	for(size_t i = 0; i < nblocks; i++) {
-		blocks.emplace_back( create() );
-	}
-	std::random_shuffle(blocks.begin(), blocks.end());
-
-	size_t CRC = 0;
-	size_t count = 0;
-
-	using clock = std::chrono::high_resolution_clock;
-	auto before = clock::now();
-
-	while(true) {
-		for(const auto & b : blocks) {
-			CRC += find(*b);
-			count++;
-		}
-		auto now = clock::now();
-		std::chrono::duration<double> durr = now - before;
-		if( durr.count() > duration ) {
-			break;
-		}
-	}
-
-	auto after = clock::now();
-	std::chrono::duration<double> durr = after - before;
-	duration = durr.count();
-
-	using std::chrono::duration_cast;
-	using std::chrono::nanoseconds;
-
-	size_t ops_sec = size_t(double(count) / duration);
-	auto dur_nano = duration_cast<nanoseconds>(std::chrono::duration<double>(1.0)).count();
-
-	std::cout << "CRC           : " << CRC << "\n";
-	std::cout << "Duration      : " << duration << "s\n";
-	std::cout << "Total ops     : " << count << "\n";
-	std::cout << "Ops/sec       : " << ops_sec << "\n";
-	std::cout << "ns/Op         : " << ( dur_nano / ops_sec )<< "\n";
-}
Index: c/theses/thierry_delisle_PhD/code/process.sh
===================================================================
--- doc/theses/thierry_delisle_PhD/code/process.sh	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,42 +1,0 @@
-#!/bin/bash
-
-NAME=$1
-
-if [ ! -f "raw/${NAME}.out" ]; then
-    echo "Not output for ${NAME}"
-    exit 1
-fi
-
-if [ ! -f "raw/${NAME}.data" ]; then
-    echo "Not perf record for ${NAME}"
-    exit 1
-fi
-
-echo "Processing perf data for ${NAME}"
-
-OPS=$(grep -e 'Total ops' raw/${NAME}.out)
-CPOP=$( echo "Hello $OPS" | \grep -oP ", \K[0-9,]+(?=o)" --color | tr -d ',')
-CPUSH=$(echo "Hello $OPS" | \grep -oP "\(\K[0-9,]+(?=i)" --color | tr -d ',')
-
-REPORT=''
-perf report -n --percent-limit 5 --stdio --no-children -i raw/${NAME}.data > raw/.temp
-EVENT=$(cat raw/.temp | grep -e '^# Samples'| cut -d ' ' -f 6)
-SPOP=$( cat raw/.temp | grep -e '] relaxed_list<Node>::pop'  | tr -s ' ' | cut -d ' ' -f 3)
-SPUSH=$(cat raw/.temp | grep -e '] relaxed_list<Node>::push' | tr -s ' ' | cut -d ' ' -f 3)
-SARR=$( cat raw/.temp | grep -e '] snz[i|m]_t::node::arrive_h'   | tr -s ' ' | cut -d ' ' -f 3)
-
-echo "$OPS"
-echo "Push count: $CPUSH"
-echo "Pop  count: $CPOP"
-
-echo "Pop    samples: $SPOP"
-echo "Push   samples: $SPUSH"
-echo "Arrive samples: $SARR"
-
-SpPUSH=$(bc -l <<< "scale=9; $SPUSH / $CPUSH")
-SpPOP=$( bc -l <<< "scale=9; $SPOP  / $CPOP" )
-SpARR=$( bc -l <<< "scale=9; $SARR  / $CPUSH")
-
-printf "%s per push()  : %.9f\n" $EVENT $SpPUSH | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
-printf "%s per pop()   : %.9f\n" $EVENT $SpPOP  | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
-printf "%s per arrive(): %.9f\n" $EVENT $SpARR  | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
Index: c/theses/thierry_delisle_PhD/code/processor.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/processor.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,53 +1,0 @@
-#include <atomic>
-
-struct thread {};
-
-struct cluster {
-	void add();
-	void remove();
-	thread * next();
-};
-
-struct processor {
-
-	cluster cluster;
-	std::atomic<bool> stop;
-	volatile bool idle;
-};
-
-
-void run(thread * ) {
-	// verify preemption
-
-	// run Thread
-
-	// verify preemption
-
-	// finish Running
-}
-
-void main(processor & self) {
-
-	self.cluster.add();
-
-	while(!self.stop) {
-		if(thread * t = self.cluster.next()) {
-			run(t);
-			continue;
-		}
-
-		self.set_idle();
-		std::atomic_thread_fence();
-
-		if(thread * t = self.cluster.next()) {
-			self.idle = false;
-			run(t);
-			continue;
-		}
-
-		halt();
-	}
-
-	self.cluster.remove();
-
-}
Index: c/theses/thierry_delisle_PhD/code/processor_list.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/processor_list.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,215 +1,0 @@
-#include <cassert>
-
-#include <atomic>
-#include <new>
-#include <type_traits>
-
-struct processor;
-
-struct __attribute__((aligned(64))) processor_id {
-	std::atomic<processor *> handle;
-	std::atomic<bool> lock;
-
-	processor_id() = default;
-	processor_id(processor * proc) : handle(proc), lock() {
-		/*paranoid*/ assert(std::atomic_is_lock_free(&lock));
-	}
-};
-
-extern unsigned num();
-
-#define ERROR throw 1
-
-class processor_list {
-private:
-
-	static const constexpr std::size_t cache_line_size = 64;
-
-	static_assert(sizeof (processor_id) <= cache_line_size, "ERROR: Instances must fit in one cache line" );
-	static_assert(alignof(processor_id) == cache_line_size, "ERROR: Instances must aligned to one cache line" );
-
-	const unsigned max;     // total cachelines allocated
-	std::atomic_uint alloc; // cachelines currently in use
-	std::atomic_uint ready; // cachelines ready to iterate over (!= to alloc when thread is in second half of doregister)
-	std::atomic<bool> lock; // writerlock
-	processor_id * data;    // data pointer
-
-private:
-	inline void acquire(std::atomic<bool> & ll) {
-		while( __builtin_expect(ll.exchange(true),false) ) {
-			while(ll.load(std::memory_order_relaxed))
-				asm volatile("pause");
-		}
-		/* paranoid */ assert(ll);
-	}
-
-public:
-	processor_list()
-		: max(num())
-		, alloc(0)
-		, ready(0)
-		, lock{false}
-		, data( new processor_id[max] )
-	{
-		/*paranoid*/ assert(num() == max);
-		/*paranoid*/ assert(std::atomic_is_lock_free(&alloc));
-		/*paranoid*/ assert(std::atomic_is_lock_free(&ready));
-	}
-
-	~processor_list() {
-		delete[] data;
-	}
-
-	//=======================================================================
-	// Lock-Free registering/unregistering of threads
-	unsigned doregister(processor * proc) {
-		// Step - 1 : check if there is already space in the data
-		uint_fast32_t s = ready;
-
-		// Check among all the ready
-		for(uint_fast32_t i = 0; i < s; i++) {
-			processor * null = nullptr; // Re-write every loop since compare thrashes it
-			if( data[i].handle.load(std::memory_order_relaxed) == null
-			 && data[i].handle.compare_exchange_strong(null, proc)) {
-				/*paranoid*/ assert(i < ready);
-				/*paranoid*/ assert(alignof(decltype(data[i])) == cache_line_size);
-				/*paranoid*/ assert((uintptr_t(&data[i]) % cache_line_size) == 0);
-				return i;
-			}
-		}
-
-		if(max <= alloc) ERROR;
-
-		// Step - 2 : F&A to get a new spot in the array.
-		uint_fast32_t n = alloc++;
-		if(max <= n) ERROR;
-
-		// Step - 3 : Mark space as used and then publish it.
-		void * storage = &data[n];
-		new (storage) processor_id( proc );
-		while(true) {
-			unsigned copy = n;
-			if( ready.load(std::memory_order_relaxed) == n
-			 && ready.compare_exchange_weak(copy, n + 1) )
-			 	break;
-			asm volatile("pause");
-		}
-
-		// Return new spot.
-		/*paranoid*/ assert(n < ready);
-		/*paranoid*/ assert(alignof(decltype(data[n])) == cache_line_size);
-		/*paranoid*/ assert((uintptr_t(&data[n]) % cache_line_size) == 0);
-		return n;
-	}
-
-	processor * unregister(unsigned iproc) {
-		/*paranoid*/ assert(iproc < ready);
-		auto ret = data[iproc].handle.load(std::memory_order_relaxed);
-		data[iproc].handle = nullptr;
-		return ret;
-	}
-
-	// Reset all registration
-	// Unsafe in most cases, use for testing only.
-	void reset() {
-		alloc = 0;
-		ready = 0;
-	}
-
-	processor * get(unsigned iproc) {
-		return data[iproc].handle.load(std::memory_order_relaxed);
-	}
-
-	//=======================================================================
-	// Reader-writer lock implementation
-	// Concurrent with doregister/unregister,
-	//    i.e., threads can be added at any point during or between the entry/exit
-
-	//-----------------------------------------------------------------------
-	// Reader side
-	void read_lock(unsigned iproc) {
-		/*paranoid*/ assert(iproc < ready);
-
-		// Step 1 : make sure no writer are in the middle of the critical section
-		while(lock.load(std::memory_order_relaxed))
-			asm volatile("pause");
-
-		// Fence needed because we don't want to start trying to acquire the lock
-		// before we read a false.
-		// Not needed on x86
-		// std::atomic_thread_fence(std::memory_order_seq_cst);
-
-		// Step 2 : acquire our local lock
-		acquire( data[iproc].lock );
-		/*paranoid*/ assert(data[iproc].lock);
-	}
-
-	void read_unlock(unsigned iproc) {
-		/*paranoid*/ assert(iproc < ready);
-		/*paranoid*/ assert(data[iproc].lock);
-		data[iproc].lock.store(false, std::memory_order_release);
-	}
-
-	//-----------------------------------------------------------------------
-	// Writer side
-	uint_fast32_t write_lock() {
-		// Step 1 : lock global lock
-		// It is needed to avoid processors that register mid Critical-Section
-		//   to simply lock their own lock and enter.
-		acquire(lock);
-
-		// Step 2 : lock per-proc lock
-		// Processors that are currently being registered aren't counted
-		//   but can't be in read_lock or in the critical section.
-		// All other processors are counted
-		uint_fast32_t s = ready;
-		for(uint_fast32_t i = 0; i < s; i++) {
-			acquire( data[i].lock );
-		}
-
-		return s;
-	}
-
-	void write_unlock(uint_fast32_t last_s) {
-		// Step 1 : release local locks
-		// This must be done while the global lock is held to avoid
-		//   threads that where created mid critical section
-		//   to race to lock their local locks and have the writer
-		//   immidiately unlock them
-		// Alternative solution : return s in write_lock and pass it to write_unlock
-		for(uint_fast32_t i = 0; i < last_s; i++) {
-			assert(data[i].lock);
-			data[i].lock.store(false, std::memory_order_release);
-		}
-
-		// Step 2 : release global lock
-		/*paranoid*/ assert(true == lock);
-		lock.store(false, std::memory_order_release);
-	}
-
-	//-----------------------------------------------------------------------
-	// Checking support
-	uint_fast32_t epoch_check() {
-		// Step 1 : lock global lock
-		// It is needed to avoid processors that register mid Critical-Section
-		//   to simply lock their own lock and enter.
-		while(lock.load(std::memory_order_relaxed))
-			asm volatile("pause");
-
-		// Step 2 : lock per-proc lock
-		// Processors that are currently being registered aren't counted
-		//   but can't be in read_lock or in the critical section.
-		// All other processors are counted
-		uint_fast32_t s = ready;
-		for(uint_fast32_t i = 0; i < s; i++) {
-			while(data[i].lock.load(std::memory_order_relaxed))
-				asm volatile("pause");
-		}
-
-		return s;
-	}
-
-public:
-};
-
-#undef ERROR
Index: c/theses/thierry_delisle_PhD/code/processor_list_fast.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/processor_list_fast.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,173 +1,0 @@
-#include "processor_list.hpp"
-
-#include <array>
-#include <iomanip>
-#include <iostream>
-#include <locale>
-#include <string>
-#include <thread>
-
-#include "utils.hpp"
-
-unsigned num() {
-	return 0x1000000;
-}
-
-//-------------------
-
-struct processor {
-	unsigned id;
-};
-void run(unsigned nthread, double duration, unsigned writes, unsigned epochs) {
-	assert(writes < 100);
-
-	// List being tested
-	processor_list list = {};
-
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Data to check everything is OK
-	size_t write_committed = 0ul;
-	struct {
-		std::atomic_size_t write = { 0ul };
-		std::atomic_size_t read  = { 0ul };
-		std::atomic_size_t epoch = { 0ul };
-	} lock_cnt;
-
-	// Flag to signal termination
-	std::atomic_bool done = { false };
-
-	std::thread * threads[nthread];
-	unsigned i = 1;
-	for(auto & t : threads) {
-		t = new std::thread([&done, &list, &barrier, &write_committed, &lock_cnt, writes, epochs](unsigned tid) {
-			Random rand(tid + rdtscl());
-			processor proc;
-			proc.id = list.doregister(&proc);
-			size_t writes_cnt = 0;
-			size_t reads_cnt = 0;
-			size_t epoch_cnt = 0;
-
-			affinity(tid);
-
-			barrier.wait(tid);
-
-			while(__builtin_expect(!done, true)) {
-				auto r = rand.next() % 100;
-				if (r < writes) {
-					auto n = list.write_lock();
-					write_committed++;
-					writes_cnt++;
-					assert(writes_cnt < -2ul);
-					list.write_unlock(n);
-				}
-				else if(r < epochs) {
-					list.epoch_check();
-					epoch_cnt++;
-				}
-				else {
-					list.read_lock(proc.id);
-					reads_cnt++;
-					assert(reads_cnt < -2ul);
-					list.read_unlock(proc.id);
-				}
-			}
-
-			barrier.wait(tid);
-
-			auto p = list.unregister(proc.id);
-			assert(&proc == p);
-			lock_cnt.write += writes_cnt;
-			lock_cnt.read  += reads_cnt;
-			lock_cnt.epoch += epoch_cnt;
-		}, i++);
-	}
-
-	auto before = Clock::now();
-	barrier.wait(0);
-
-	while(true) {
-		usleep(1000);
-		auto now = Clock::now();
-		duration_t durr = now - before;
-		if( durr.count() > duration ) {
-			done = true;
-			break;
-		}
-	}
-
-	barrier.wait(0);
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	duration = durr.count();
-
-	for(auto t : threads) {
-		t->join();
-		delete t;
-	}
-
-	assert(write_committed == lock_cnt.write);
-
-	size_t totalop = lock_cnt.read + lock_cnt.write + lock_cnt.epoch;
-	size_t ops_sec = size_t(double(totalop) / duration);
-	size_t ops_thread = ops_sec / nthread;
-	double dur_nano = duration_cast<std::nano>(1.0);
-
-	std::cout << "Duration      : " << duration << "s\n";
-	std::cout << "Total ops     : " << totalop << "(" << lock_cnt.read << "r, " << lock_cnt.write << "w, " << lock_cnt.epoch << "e)\n";
-	std::cout << "Ops/sec       : " << ops_sec << "\n";
-	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
-	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
-}
-
-void usage(char * argv[]) {
-	std::cerr << argv[0] << ": [DURATION (FLOAT:SEC)] [NTHREADS] [%WRITES]" << std::endl;;
-	std::exit(1);
-}
-
-int main(int argc, char * argv[]) {
-
-	double duration   = 5.0;
-	unsigned nthreads = 2;
-	unsigned writes   = 0;
-	unsigned epochs   = 0;
-
-	std::cout.imbue(std::locale(""));
-
-	switch (argc)
-	{
-	case 5:
-		epochs = std::stoul(argv[4]);
-		[[fallthrough]];
-	case 4:
-		writes = std::stoul(argv[3]);
-		if( (writes + epochs) > 100 ) {
-			std::cerr << "Writes + Epochs must be valid percentage, was " << argv[3] << " + " << argv[4] << "(" << writes << " + " << epochs << ")" << std::endl;
-			usage(argv);
-		}
-		[[fallthrough]];
-	case 3:
-		nthreads = std::stoul(argv[2]);
-		[[fallthrough]];
-	case 2:
-		duration = std::stod(argv[1]);
-		if( duration <= 0.0 ) {
-			std::cerr << "Duration must be positive, was " << argv[1] << "(" << duration << ")" << std::endl;
-			usage(argv);
-		}
-		[[fallthrough]];
-	case 1:
-		break;
-	default:
-		usage(argv);
-		break;
-	}
-
-	check_cache_line_size();
-
-	std::cout << "Running " << nthreads << " threads for " << duration << " seconds with " << writes << "% writes and " << epochs << "% epochs" << std::endl;
-	run(nthreads, duration, writes, epochs + writes);
-
-	return 0;
-}
Index: c/theses/thierry_delisle_PhD/code/processor_list_good.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/processor_list_good.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,269 +1,0 @@
-#include "processor_list.hpp"
-
-#include <iostream>
-#include <string>
-#include <thread>
-
-unsigned num() {
-	return 0x1000000;
-}
-
-// Barrier from
-class barrier_t {
-public:
-	barrier_t(size_t total)
-		: waiting(0)
-		, total(total)
-	{}
-
-	void wait(unsigned) {
-		size_t target = waiting++;
-		target = (target - (target % total)) + total;
-		while(waiting < target)
-			asm volatile("pause");
-
-		assert(waiting < (1ul << 60));
-    	}
-
-private:
-	std::atomic<size_t> waiting;
-	size_t total;
-};
-
-class Random {
-private:
-	unsigned int seed;
-public:
-	Random(int seed) {
-		this->seed = seed;
-	}
-
-	/** returns pseudorandom x satisfying 0 <= x < n. **/
-	unsigned int next() {
-		seed ^= seed << 6;
-		seed ^= seed >> 21;
-		seed ^= seed << 7;
-		return seed;
-    	}
-};
-
-//-------------------
-
-struct processor {
-	unsigned id;
-};
-
-// Stage 1
-// Make sure that the early registration works correctly
-// Registration uses a different process if the act of
-// registering the processor makes it the highest processor count
-// seen yet.
-void stage1(unsigned nthread, unsigned repeats) {
-	const int n = repeats;
-	const int nproc = 10;
-
-	// List being tested
-	processor_list list;
-
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Seen values to detect duplicattion
-	std::atomic<processor *> ids[nthread * nproc];
-	for(auto & i : ids) {
-		i = nullptr;
-	}
-
-	// Can't pass VLA to lambda
-	std::atomic<processor *> * idsp = ids;
-
-	// Threads which will run the code
-	std::thread * threads[nthread];
-	unsigned i = 1;
-	for(auto & t : threads) {
-		// Each thread will try to register a processor then add it to the
-		// list of registerd processor
-		t = new std::thread([&list, &barrier, idsp, n](unsigned tid){
-			processor proc[nproc];
-			for(int i = 0; i < n; i++) {
-				for(auto & p : proc) {
-					// Register the thread
-					p.id = list.doregister(&p);
-				}
-
-				for(auto & p : proc) {
-					// Make sure no one got this id before
-					processor * prev = idsp[p.id].exchange(&p);
-					assert(nullptr == prev);
-
-					// Make sure id is still consistend
-					assert(&p == list.get(p.id));
-				}
-
-				// wait for round to finish
-				barrier.wait(tid);
-
-				// wait for reset
-				barrier.wait(tid);
-			}
-		}, i++);
-	}
-
-	for(int i = 0; i < n; i++) {
-		//Wait for round to finish
-		barrier.wait(0);
-
-		// Reset list
-		list.reset();
-
-		std::cout << i << "\r";
-
-		// Reset seen values
-		for(auto & i : ids) {
-			i = nullptr;
-		}
-
-		// Start next round
-		barrier.wait(0);
-	}
-
-	for(auto t : threads) {
-		t->join();
-		delete t;
-	}
-}
-
-// Stage 2
-// Check that once churning starts, registration is still consistent.
-void stage2(unsigned nthread, unsigned repeats) {
-	// List being tested
-	processor_list list;
-
-	// Threads which will run the code
-	std::thread * threads[nthread];
-	unsigned i = 1;
-	for(auto & t : threads) {
-		// Each thread will try to register a few processors and
-		// unregister them, making sure that the registration is
-		// consistent
-		t = new std::thread([&list, repeats](unsigned tid){
-			processor procs[10];
-			for(unsigned i = 0; i < repeats; i++) {
-				// register the procs and note the id
-				for(auto & p : procs) {
-					p.id = list.doregister(&p);
-				}
-
-				if(1 == tid) std::cout << i << "\r";
-
-				// check the id is still consistent
-				for(const auto & p : procs) {
-					assert(&p == list.get(p.id));
-				}
-
-				// unregister and check the id is consistent
-				for(const auto & p : procs) {
-					assert(&p == list.unregister(p.id));
-				}
-			}
-		}, i++);
-	}
-
-	for(auto t : threads) {
-		t->join();
-		delete t;
-	}
-}
-
-bool is_writer();
-
-// Stage 3
-// Check that the reader writer lock works.
-void stage3(unsigned nthread, unsigned repeats) {
-	// List being tested
-	processor_list list;
-
-	size_t before = 0;
-
-	std::unique_ptr<size_t> after( new size_t(0) );
-
-	std::atomic<bool> done ( false );
-
-	// Threads which will run the code
-	std::thread * threads[nthread];
-	unsigned i = 1;
-	for(auto & t : threads) {
-		// Each thread will try to register a few processors and
-		// unregister them, making sure that the registration is
-		// consistent
-		t = new std::thread([&list, repeats, &before, &after, &done](unsigned tid){
-			Random rng(tid);
-			processor proc;
-			proc.id = list.doregister(&proc);
-			while(!done) {
-
-				if( (rng.next() % 100) == 0 ) {
-					auto r = list.write_lock();
-
-					auto b = before++;
-
-					std::cout << b << "\r";
-
-					(*after)++;
-
-					if(b >= repeats) done = true;
-
-					list.write_unlock(r);
-				}
-				else {
-					list.read_lock(proc.id);
-					assert(before == *after);
-					list.read_unlock(proc.id);
-				}
-
-			}
-
-			list.unregister(proc.id);
-		}, i++);
-	}
-
-	for(auto t : threads) {
-		t->join();
-		delete t;
-	}
-}
-
-int main(int argc, char * argv[]) {
-
-	unsigned nthreads = 1;
-	if( argc >= 3 ) {
-		size_t idx;
-		nthreads = std::stoul(argv[2], &idx);
-		assert('\0' == argv[2][idx]);
-	}
-
-	unsigned repeats = 100;
-	if( argc >= 2 ) {
-		size_t idx;
-		repeats = std::stoul(argv[1], &idx);
-		assert('\0' == argv[1][idx]);
-	}
-
-	processor_list::check_cache_line_size();
-
-	std::cout << "Running " << repeats << " repetitions on " << nthreads << " threads" << std::endl;
-	std::cout << "Checking registration - early" << std::endl;
-	stage1(nthreads, repeats);
-	std::cout << "Done                         " << std::endl;
-
-	std::cout << "Checking registration - churn" << std::endl;
-	stage2(nthreads, repeats);
-	std::cout << "Done                         " << std::endl;
-
-	std::cout << "Checking RW lock             " << std::endl;
-	stage3(nthreads, repeats);
-	std::cout << "Done                         " << std::endl;
-
-
-	return 0;
-}
Index: c/theses/thierry_delisle_PhD/code/randbit.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/randbit.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,236 +1,0 @@
-#include <cstddef>
-#include <cstdint>
-#include <x86intrin.h>
-
-__attribute__((noinline)) unsigned nthSetBit(size_t mask, unsigned bit) {
-	uint64_t v = mask;   // Input value to find position with rank r.
-	unsigned int r = bit;// Input: bit's desired rank [1-64].
-	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
-	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
-	unsigned int t;      // Bit count temporary.
-
-	// Do a normal parallel bit count for a 64-bit integer,
-	// but store all intermediate steps.
-	// a = (v & 0x5555...) + ((v >> 1) & 0x5555...);
-	a =  v - ((v >> 1) & ~0UL/3);
-	// b = (a & 0x3333...) + ((a >> 2) & 0x3333...);
-	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
-	// c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...);
-	c = (b + (b >> 4)) & ~0UL/0x11;
-	// d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...);
-	d = (c + (c >> 8)) & ~0UL/0x101;
-
-
-	t = (d >> 32) + (d >> 48);
-	// Now do branchless select!
-	s  = 64;
-	// if (r > t) {s -= 32; r -= t;}
-	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
-	t  = (d >> (s - 16)) & 0xff;
-	// if (r > t) {s -= 16; r -= t;}
-	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
-	t  = (c >> (s - 8)) & 0xf;
-	// if (r > t) {s -= 8; r -= t;}
-	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
-	t  = (b >> (s - 4)) & 0x7;
-	// if (r > t) {s -= 4; r -= t;}
-	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
-	t  = (a >> (s - 2)) & 0x3;
-	// if (r > t) {s -= 2; r -= t;}
-	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
-	t  = (v >> (s - 1)) & 0x1;
-	// if (r > t) s--;
-	s -= ((t - r) & 256) >> 8;
-	// s = 65 - s;
-	return s;
-}
-
-unsigned rand_bit(unsigned rnum, uint64_t mask) {
-	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
-#if defined(BRANCHLESS)
-	uint64_t v = mask;   // Input value to find position with rank r.
-	unsigned int r = bit + 1;// Input: bit's desired rank [1-64].
-	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
-	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
-	unsigned int t;      // Bit count temporary.
-
-	// Do a normal parallel bit count for a 64-bit integer,
-	// but store all intermediate steps.
-	// a = (v & 0x5555...) + ((v >> 1) & 0x5555...);
-	a =  v - ((v >> 1) & ~0UL/3);
-	// b = (a & 0x3333...) + ((a >> 2) & 0x3333...);
-	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
-	// c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...);
-	c = (b + (b >> 4)) & ~0UL/0x11;
-	// d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...);
-	d = (c + (c >> 8)) & ~0UL/0x101;
-
-
-	t = (d >> 32) + (d >> 48);
-	// Now do branchless select!
-	s  = 64;
-	// if (r > t) {s -= 32; r -= t;}
-	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
-	t  = (d >> (s - 16)) & 0xff;
-	// if (r > t) {s -= 16; r -= t;}
-	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
-	t  = (c >> (s - 8)) & 0xf;
-	// if (r > t) {s -= 8; r -= t;}
-	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
-	t  = (b >> (s - 4)) & 0x7;
-	// if (r > t) {s -= 4; r -= t;}
-	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
-	t  = (a >> (s - 2)) & 0x3;
-	// if (r > t) {s -= 2; r -= t;}
-	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
-	t  = (v >> (s - 1)) & 0x1;
-	// if (r > t) s--;
-	s -= ((t - r) & 256) >> 8;
-	// s = 65 - s;
-	return s - 1;
-#elif defined(LOOP)
-	for(unsigned i = 0; i < bit; i++) {
-		mask ^= (1ul << (__builtin_ffsl(mask) - 1ul));
-	}
-	return __builtin_ffsl(mask) - 1ul;
-#elif defined(PDEP)
-	uint64_t picked = _pdep_u64(1ul << bit, mask);
-	return __builtin_ffsl(picked) - 1ul;
-#else
-#error must define LOOP, PDEP or BRANCHLESS
-#endif
-}
-
-#include <cassert>
-#include <atomic>
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <locale>
-#include <thread>
-
-#include <unistd.h>
-
-class barrier_t {
-public:
-	barrier_t(size_t total)
-		: waiting(0)
-		, total(total)
-	{}
-
-	void wait(unsigned) {
-		size_t target = waiting++;
-		target = (target - (target % total)) + total;
-		while(waiting < target)
-			asm volatile("pause");
-
-		assert(waiting < (1ul << 60));
-    	}
-
-private:
-	std::atomic<size_t> waiting;
-	size_t total;
-};
-
-class Random {
-private:
-	unsigned int seed;
-public:
-	Random(int seed) {
-		this->seed = seed;
-	}
-
-	/** returns pseudorandom x satisfying 0 <= x < n. **/
-	unsigned int next() {
-		seed ^= seed << 6;
-		seed ^= seed >> 21;
-		seed ^= seed << 7;
-		return seed;
-    	}
-};
-
-using Clock = std::chrono::high_resolution_clock;
-using duration_t = std::chrono::duration<double>;
-using std::chrono::nanoseconds;
-
-template<typename Ratio, typename T>
-T duration_cast(T seconds) {
-	return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
-}
-
-void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) {
-
-
-	std::cout << "Starting" << std::endl;
-	auto before = Clock::now();
-	barrier.wait(0);
-
-	while(true) {
-		usleep(100000);
-		auto now = Clock::now();
-		duration_t durr = now - before;
-		if( durr.count() > duration ) {
-			done = true;
-			break;
-		}
-		std::cout << "\r" << std::setprecision(4) << durr.count();
-		std::cout.flush();
-	}
-
-	barrier.wait(0);
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	duration = durr.count();
-	std::cout << "\rClosing down" << std::endl;
-}
-
-__attribute__((noinline)) void body(Random & rand) {
-	uint64_t mask = (uint64_t(rand.next()) << 32ul) | uint64_t(rand.next());
-	unsigned idx = rand.next();
-
-	unsigned bit = rand_bit(idx, mask);
-
-	if(__builtin_expect(((1ul << bit) & mask) == 0, false)) {
-		std::cerr << std::hex <<  "Rand " << idx << " from " << mask;
-		std::cerr << " gave " << (1ul << bit) << "(" << std::dec << bit << ")" << std::endl;
-		std::abort();
-	}
-}
-
-void runRandBit(double duration) {
-
-	std::atomic_bool done  = { false };
-	barrier_t barrier(2);
-
-	size_t count = 0;
-	std::thread thread([&done, &barrier, &count]() {
-
-		Random rand(22);
-
-		barrier.wait(1);
-
-		for(;!done; count++) {
-			body(rand);
-		}
-
-		barrier.wait(1);
-	});
-
-	waitfor(duration, barrier, done);
-	thread.join();
-
-	size_t ops = count;
-	size_t ops_sec = size_t(double(ops) / duration);
-	auto dur_nano = duration_cast<std::nano>(1.0);
-
-	std::cout << "Duration      : " << duration << "s\n";
-	std::cout << "ns/Op         : " << ( dur_nano / ops )<< "\n";
-	std::cout << "Ops/sec       : " << ops_sec << "\n";
-	std::cout << "Total ops     : " << ops << std::endl;
-
-}
-
-int main() {
-	std::cout.imbue(std::locale(""));
-	runRandBit(5);
-}
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/Makefile
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,6 @@
+all: gui-proto
+
+CXXFLAGS = -fpic -g -O0 -I.
+
+gui-proto: proto-gui/main.o thrdlib/thread.o
+	$(CXX) -pthread -ldl -o ${@} ${^} -ftls-model=initial-exec
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/proto-gui/main.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/proto-gui/main.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/proto-gui/main.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,310 @@
+#include "thrdlib/thread.hpp"
+
+#include <cassert>
+
+#include <algorithm>
+#include <atomic>
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include <getopt.h>
+using thrdlib::thread_t;
+
+
+extern __attribute__((aligned(128))) thread_local struct {
+	void * volatile this_thread;
+	void * volatile this_processor;
+	void * volatile this_stats;
+
+	struct {
+		volatile unsigned short disable_count;
+		volatile bool enabled;
+		volatile bool in_progress;
+	} preemption_state;
+
+	#if defined(__SIZEOF_INT128__)
+		__uint128_t rand_seed;
+	#else
+		uint64_t rand_seed;
+	#endif
+	struct {
+		uint64_t fwd_seed;
+		uint64_t bck_seed;
+	} ready_rng;
+} kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
+
+//--------------------
+// Constants
+unsigned nframes;
+unsigned fsize;
+unsigned nproduce;
+
+//--------------------
+// Frame management
+
+class Frame {
+	static const thread_t reset;
+	static const thread_t set;
+	std::atomic<thread_t> rdy_state = { reset };
+	std::atomic<thread_t> rnd_state = { set };
+public:
+	unsigned number;
+	std::unique_ptr<unsigned char[]> data;
+
+private:
+	inline bool wait( thread_t self, std::atomic<thread_t> & state, std::atomic<thread_t> & other ) {
+		bool ret;
+		while(true) {
+			thread_t expected = state;
+			if( expected == set ) { ret = false; goto END; }
+			assert( expected == reset );
+			if( std::atomic_compare_exchange_strong( &state, &expected, self) ) {
+				thrdlib::park( self );
+				ret = true;
+				goto END;
+			}
+		}
+		END:
+		assert( state == set );
+		assert( other != set );
+		state = reset;
+		return ret;
+	}
+
+	inline bool publish(  std::atomic<thread_t> & state ) {
+		thread_t got = std::atomic_exchange( &state, set );
+		assert( got != set );
+
+		if( got == reset ) return false;
+
+		thrdlib::unpark( got );
+		return true;
+	}
+
+public:
+	inline bool wait_rendered( thread_t self ) {
+		return wait( self, rnd_state, rdy_state );
+	}
+
+	inline bool wait_ready   ( thread_t self ) {
+		return wait( self, rdy_state, rnd_state );
+	}
+
+	inline bool publish() {
+		return publish( rdy_state );
+	}
+
+	inline bool release() {
+		return publish( rnd_state );
+	}
+};
+
+const thread_t Frame::reset = nullptr;
+const thread_t Frame::set   = reinterpret_cast<thread_t>(1);
+
+std::unique_ptr<Frame[]> frames;
+volatile unsigned last_produced = 0;
+
+//--------------------
+// Threads
+thread_t volatile the_stats_thread = nullptr;
+
+inline void fence(void) {
+	std::atomic_thread_fence(std::memory_order_seq_cst);
+}
+
+struct {
+	struct {
+		volatile unsigned long long   parks = 0;
+		volatile unsigned long long unparks = 0;
+	} sim;
+	struct {
+		volatile unsigned long long   parks = 0;
+		volatile unsigned long long unparks = 0;
+	} rend;
+
+	struct {
+		volatile unsigned long long ran = 0;
+		volatile unsigned long long saw = 0;
+	} stats;
+} thrd_stats;
+
+void Stats( thread_t self ) {
+	the_stats_thread = self;
+	fence();
+	thrdlib::park( self );
+
+	std::vector<bool> seen;
+	seen.resize(nproduce, false);
+
+	while(last_produced < nproduce) {
+		thrdlib::yield();
+		thrd_stats.stats.ran++;
+		if( last_produced > 0 ) seen.at(last_produced - 1) = true;
+	}
+
+	thrd_stats.stats.saw = std::count(seen.begin(), seen.end(), true);
+}
+
+void Simulator( thread_t self ) {
+	for(unsigned i = 0; i < nproduce; i++) {
+		auto & frame = frames[i % nframes];
+		// Wait for the frames to be rendered
+		if( frame.wait_rendered( self ) ) {
+			thrd_stats.sim.parks++;
+		}
+
+		// Write the frame information
+		frame.number = i;
+		for( unsigned x = 0; x < fsize; x++ ) {
+			frame.data[x] = i;
+		}
+		std::cout << "Simulated " << i << std::endl;
+		last_produced = i+1;
+
+		// Publish it
+		if( frame.publish()  ) {
+			thrd_stats.sim.unparks++;
+		}
+	}
+}
+
+void Renderer( thread_t self ) {
+	thrdlib::unpark( the_stats_thread );
+	for(unsigned i = 0; i < nproduce; i++) {
+		auto & frame = frames[i % nframes];
+		// Wait for the frames to be ready
+		if( frame.wait_ready( self ) ) {
+			thrd_stats.rend.parks++;
+		}
+
+		// Render the frame
+		unsigned total = 0;
+		for( unsigned x = 0; x < fsize; x++ ) {
+			total += frame.data[x];
+		}
+
+		std::cout << "Rendered " << i << std::endl;
+		assert(total == i * fsize);
+
+		// Release
+		if( frame.release() ) {
+			thrd_stats.rend.unparks++;
+		}
+	}
+
+}
+
+
+
+int main(int argc, char * argv[]) {
+	nframes  = 3;
+	fsize    = 1000;
+	nproduce = 60;
+
+	const char * framework;
+
+	for(;;) {
+		static struct option options[] = {
+			{"buff",  required_argument, 0, 'b'},
+			{"nprod",  required_argument, 0, 'p'},
+			{"fsize",   required_argument, 0, 'f'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, "b:p:f:", options, &idx);
+
+		std::string arg = optarg ? optarg : "";
+		size_t len = 0;
+		switch(opt) {
+			// Exit Case
+			case -1:
+				/* paranoid */ assert(optind <= argc);
+				if( optind == argc ) {
+					std::cerr << "Must specify a framework" << std::endl;
+					goto usage;
+
+				}
+				framework = argv[optind];
+				goto run;
+			case 'b':
+				try {
+					nframes = std::stoul(optarg, &len);
+					if(nframes == 0 || len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Number of buffered frames must be at least 1, was" << arg << std::endl;
+					goto usage;
+				}
+				break;
+			case 'p':
+				try {
+					nproduce = std::stoul(optarg, &len);
+					if(nproduce == 0 || len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Number of produced frames must be at least 1, was" << arg << std::endl;
+					goto usage;
+				}
+				break;
+			case 'f':
+				try {
+					fsize = std::stoul(optarg, &len);
+					if(fsize == 0 || len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Size of produced frames must be at least 1, was" << arg << std::endl;
+					goto usage;
+				}
+				break;
+			// Other cases
+			default: /* ? */
+				std::cerr << opt << std::endl;
+			usage:
+				std::cerr << "Usage: " << argv[0] << " [options] framework" << std::endl;
+				std::cerr << std::endl;
+				std::cerr << "  -b, --buff=COUNT    Number of frames to buffer" << std::endl;
+				std::cerr << "  -p, --nprod=COUNT   Number of frames to produce" << std::endl;
+				std::cerr << "  -f, --fsize=SIZE    Size of each frame in bytes" << std::endl;
+				std::exit(1);
+		}
+	}
+	run:
+	assert( framework );
+
+	frames.reset(new Frame[nframes]);
+	for(unsigned i = 0; i < nframes; i++) {
+		frames[i].number = 0;
+		frames[i].data.reset(new unsigned char[fsize]);
+	}
+	std::cout << "Created frames of " << fsize << " bytes" << std::endl;
+	std::cout << "(Buffering " << nframes << ")" << std::endl;
+
+	thrdlib::init( framework, 2 );
+
+	thread_t stats     = thrdlib::create( Stats );
+	std::cout << "Created Stats Thread" << std::endl;
+	while( the_stats_thread == nullptr ) thrdlib::yield();
+
+	std::cout << "Creating Main Threads" << std::endl;
+	thread_t renderer  = thrdlib::create( Renderer  );
+	thread_t simulator = thrdlib::create( Simulator );
+
+	std::cout << "Running" << std::endl;
+
+	thrdlib::join( simulator );
+	thrdlib::join( renderer  );
+	thrdlib::join( stats     );
+
+	thrdlib::clean();
+
+	std::cout << "----------" << std::endl;
+	std::cout << "# Parks" << std::endl;
+	std::cout << "  Renderer   park: " << thrd_stats. sim.  parks << std::endl;
+	std::cout << "  Renderer unpark: " << thrd_stats. sim.unparks << std::endl;
+	std::cout << " Simulator   park: " << thrd_stats.rend.  parks << std::endl;
+	std::cout << " Simulator unpark: " << thrd_stats.rend.unparks << std::endl;
+
+	std::cout << "Stats thread" << std::endl;
+	std::cout << " Ran             : " << thrd_stats.stats.ran << " times" << std::endl;
+	std::cout << " Saw             : " << thrd_stats.stats.saw << " (" << ((100.f * thrd_stats.stats.saw) / nproduce) << "%)" << std::endl;
+}
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/Makefile
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,19 @@
+all: fibre.so pthread.so cforall.so
+
+clean:
+	rm -rf fibre.so pthread.so
+
+CXXFLAGS=-Wall -Wextra -O3 -g -fpic -std=c++17 -pthread -ftls-model=initial-exec
+
+pthread.so: pthread.cpp Makefile
+	$(CXX) $(CXXFLAGS) -shared -o ${@} ${<}
+
+fibre.so: fibre.cpp Makefile
+	$(CXX) $(CXXFLAGS) -shared -o ${@} ${<} -lfibre
+
+CFAINC=${HOME}/local/include/cfa-dev
+CFALIB=${HOME}/local/lib/cfa-dev/x64-debug
+CFAFLAGS=-z execstack -I${CFAINC} -I${CFAINC}/concurrency -L${CFALIB} -Wl,-rpath,${CFALIB}
+
+cforall.so: cforall.cpp Makefile
+	$(CXX) $(CXXFLAGS) $(CFAFLAGS) -shared -o ${@} ${<} -lcfathread -lcfa -ldl -lm
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/cforall.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/cforall.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/cforall.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,43 @@
+#include <cassert>
+#include <clib/cfathread.h>
+
+typedef cfathread_t thread_t;
+static_assert(sizeof(thread_t) == sizeof(void*), "thread_t musst be of same size as void*");
+
+#if !defined(__cplusplus)
+#error no __cplusplus define!
+#endif
+
+extern "C" {
+	//--------------------
+	// Basic thread support
+	thread_t thrdlib_create( void (*the_main)( thread_t ) ) {
+		return cfathread_create( the_main );
+	}
+
+	void thrdlib_join( thread_t handle ) {
+		cfathread_join( handle );
+	}
+
+	void thrdlib_park( thread_t ) {
+		cfathread_park();
+	}
+
+	void thrdlib_unpark( thread_t handle ) {
+		cfathread_unpark( handle );
+	}
+
+	void thrdlib_yield( void ) {
+		cfathread_yield();
+	}
+
+	//--------------------
+	// Basic kernel features
+	void thrdlib_init( int procs ) {
+		cfathread_setproccnt(procs);
+	}
+
+	void thrdlib_clean( void ) {
+		cfathread_setproccnt(1);
+	}
+}
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/fibre.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/fibre.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/fibre.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,48 @@
+#include <cassert>
+#include <libfibre/cfibre.h>
+
+typedef cfibre_t thread_t;
+static_assert(sizeof(thread_t) == sizeof(void*), "thread_t musst be of same size as void*");
+
+void * fibre_runner(void * arg) {
+	auto the_main = (void (*)( thread_t ))arg;
+	the_main( cfibre_self() );
+	return nullptr;
+}
+
+extern "C" {
+	//--------------------
+	// Basic thread support
+	thread_t thrdlib_create( void (*the_main)( thread_t ) ) {
+		thread_t fibre;
+		cfibre_create( &fibre, nullptr, fibre_runner, (void*)the_main );
+		return fibre;
+	}
+
+	void thrdlib_join( thread_t handle ) {
+		cfibre_join( handle, nullptr );
+	}
+
+	void thrdlib_park( thread_t handle ) {
+		assert( handle == cfibre_self() );
+		cfibre_park();
+	}
+
+	void thrdlib_unpark( thread_t handle ) {
+		cfibre_unpark( handle );
+	}
+
+	void thrdlib_yield( void ) {
+		cfibre_yield();
+	}
+
+	//--------------------
+	// Basic kernel features
+	void thrdlib_init( int procs ) {
+		cfibre_init_n(1, procs );
+	}
+
+	void thrdlib_clean( void ) {
+
+	}
+}
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/pthread.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/pthread.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/pthread.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,99 @@
+#include <pthread.h>
+#include <errno.h>
+#include <cstring>
+#include <cstdio>
+#include <iostream>
+
+#define CHECKED(x) { int err = x; if( err != 0 ) { std::cerr << "KERNEL ERROR: Operation \"" #x "\" return error " << err << " - " << strerror(err) << std::endl; std::abort(); } }
+
+struct __bin_sem_t {
+	pthread_mutex_t 	lock;
+	pthread_cond_t  	cond;
+	int     		val;
+
+	__bin_sem_t() {
+		// Create the mutex with error checking
+		pthread_mutexattr_t mattr;
+		pthread_mutexattr_init( &mattr );
+		pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
+		pthread_mutex_init(&lock, &mattr);
+
+		pthread_cond_init (&cond, nullptr);
+		val = 0;
+	}
+
+	~__bin_sem_t() {
+		CHECKED( pthread_mutex_destroy(&lock) );
+		CHECKED( pthread_cond_destroy (&cond) );
+	}
+
+	void wait() {
+		CHECKED( pthread_mutex_lock(&lock) );
+			while(val < 1) {
+				pthread_cond_wait(&cond, &lock);
+			}
+			val -= 1;
+		CHECKED( pthread_mutex_unlock(&lock) );
+	}
+
+	bool post() {
+		bool needs_signal = false;
+
+		CHECKED( pthread_mutex_lock(&lock) );
+			if(val < 1) {
+				val += 1;
+				pthread_cond_signal(&cond);
+				needs_signal = true;
+			}
+		CHECKED( pthread_mutex_unlock(&lock) );
+
+		return needs_signal;
+	}
+};
+
+#undef CHECKED
+
+//--------------------
+// Basic types
+struct pthread_runner_t {
+	pthread_t handle;
+	__bin_sem_t sem;
+};
+typedef pthread_runner_t * thread_t;
+
+static_assert(sizeof(thread_t) == sizeof(void*), "thread_t musst be of same size as void*");
+
+extern "C" {
+	//--------------------
+	// Basic thread support
+	thread_t thrdlib_create( void (*main)( thread_t ) ) {
+		thread_t thrd = new pthread_runner_t();
+		int r = pthread_create( &thrd->handle, nullptr, (void *(*)(void *))main, thrd );
+		if( r != 0 ) std::abort();
+		return thrd;
+	}
+
+	void thrdlib_join( thread_t handle ) {
+		void * ret;
+		int r = pthread_join( handle->handle, &ret );
+		if( r != 0 ) std::abort();
+		delete handle;
+	}
+
+	void thrdlib_park( thread_t handle ) {
+		handle->sem.wait();
+	}
+
+	void thrdlib_unpark( thread_t handle ) {
+		handle->sem.post();
+	}
+
+	void thrdlib_yield( void ) {
+		int r = pthread_yield();
+		if( r != 0 ) std::abort();
+	}
+
+	//--------------------
+	// Basic kernel features
+	void thrdlib_init( int ) {}
+}
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/thread.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/thread.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/thread.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,68 @@
+#include "thread.hpp"
+
+#include <cstdarg>										// va_start, va_end
+#include <cstdio>
+#include <cstring>										// strlen
+extern "C" {
+	#include <unistd.h>										// _exit, getpid
+	#include <signal.h>
+	#include <dlfcn.h>										// dlopen, dlsym
+	#include <execinfo.h>									// backtrace, messages
+}
+
+#include <iostream>
+#include <string>
+
+using thrdlib::thread_t;
+
+thread_t (*thrdlib::create)( void (*main)( thread_t ) ) = nullptr;
+void (*thrdlib::join)( thread_t handle ) = nullptr;
+void (*thrdlib::park)( thread_t handle ) = nullptr;
+void (*thrdlib::unpark)( thread_t handle ) = nullptr;
+void (*thrdlib::yield)( void ) = nullptr;
+void (*lib_clean)(void) = nullptr;
+
+typedef void (*fptr_t)();
+static fptr_t open_symbol( void * library, const char * symbol, bool required ) {
+	void * ptr = dlsym( library, symbol );
+
+	const char * error = dlerror();
+	if ( required && error ) {
+		std::cerr << "Fetching symbol '" << symbol << "' failed with error '" << error << "'\n";
+		std::abort();
+	}
+
+	return (fptr_t)ptr;
+}
+
+//--------------------
+// Basic kernel features
+void thrdlib::init( const char * name, int procs ) {
+	std::string file = __FILE__;
+	std::size_t found = file.find_last_of("/");
+  	std::string libname = file.substr(0,found+1) + name + ".so";
+
+	std::cout << "Use framework " << name << "(" << libname << ")\n";
+
+	void * library = dlopen( libname.c_str(), RTLD_NOW );
+	if ( const char * error = dlerror() ) {
+		std::cerr << "Could not open library '" << libname << "' from name '" << name <<"'\n";
+		std::cerr << "Error was : '" << error << "'\n";
+		std::abort();
+	}
+
+	void (*lib_init)( int ) = (void (*)( int ))open_symbol( library, "thrdlib_init", false );
+	lib_clean = open_symbol( library, "thrdlib_clean" , false );
+
+	thrdlib::create = (typeof(thrdlib::create))open_symbol( library, "thrdlib_create", true  );
+	thrdlib::join   = (typeof(thrdlib::join  ))open_symbol( library, "thrdlib_join"  , true  );
+	thrdlib::park   = (typeof(thrdlib::park  ))open_symbol( library, "thrdlib_park"  , true  );
+	thrdlib::unpark = (typeof(thrdlib::unpark))open_symbol( library, "thrdlib_unpark", true  );
+	thrdlib::yield  = (typeof(thrdlib::yield ))open_symbol( library, "thrdlib_yield" , true  );
+
+	lib_init( procs );
+}
+
+void thrdlib::clean( void ) {
+	if(lib_clean) lib_clean();
+}
Index: doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/thread.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/thread.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readQ_example/thrdlib/thread.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,18 @@
+#pragma once
+
+namespace thrdlib {
+	typedef void * thread_t;
+
+	//--------------------
+	// Basic thread support
+	extern thread_t (*create)( void (*main)( thread_t ) );
+	extern void (*join)( thread_t handle );
+	extern void (*park)( thread_t handle );
+	extern void (*unpark)( thread_t handle );
+	extern void (*yield)( void ) ;
+
+	//--------------------
+	// Basic kernel features
+	extern void init( const char * name, int procs );
+	extern void clean( void );
+};
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/Makefile
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,22 @@
+
+
+CXXFLAGS = -O3 -g -Wall -Wextra -std=c++17
+LDFLAGS = -pthread -latomic
+
+push:
+	clang++ relaxed_list.cpp -g -Wall -Wextra -std=c++17 -fsyntax-only &&  rsync -av relaxed_list.cpp relaxed_list.hpp utils.hpp assert.hpp scale.sh plg7b:~/workspace/sched/.
+
+relaxed_list: $(firstword $(MAKEFILE_LIST)) | build
+	clang++ relaxed_list.cpp $(CXXFLAGS) $(LDFLAGS) -lpng -MMD -MF build/$(@).d -o $(@)
+
+-include build/relaxed_list.d
+
+layout.ast: $(firstword $(MAKEFILE_LIST)) | build
+	clang++ relaxed_list_layout.cpp $(CXXFLAGS) -MMD -MF build/$(@).d -MT $(@) -E -o build/$(@).ii
+	clang++ -Xclang -fdump-record-layouts -fsyntax-only $(CXXFLAGS) build/$(@).ii > build/layout.ast.raw
+	cat build/$(@).raw > $(@)
+
+-include build/layout.ast.d
+
+build:
+	mkdir -p build
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/assert.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/assert.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/assert.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,22 @@
+#pragma once
+
+#ifndef NDEBUG
+#include <cassert>
+#include <cstdlib>
+
+#define sstr(s) #s
+#define xstr(s) sstr(s)
+
+extern const char * __my_progname;
+
+#define assertf(cond, ...) ({             \
+	if(!(cond)) {                       \
+		fprintf(stderr, "%s: " __FILE__ ":" xstr(__LINE__) ": %s: Assertion '" xstr(cond) "' failed.\n", __my_progname, __PRETTY_FUNCTION__); \
+		fprintf(stderr, __VA_ARGS__); \
+		fprintf(stderr, "\n"); \
+		std::abort();                 \
+	}                                   \
+})
+#else
+#define assertf(cond, ...)
+#endif
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/bitbench/select.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/bitbench/select.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/bitbench/select.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,186 @@
+
+#include "../utils.hpp"
+
+void consume(int i, int j) __attribute__((noinline));
+void consume(int i, int j) {
+	asm volatile("":: "rm" (i), "rm" (i) );
+}
+
+static inline unsigned rand_bit_sw(unsigned rnum, size_t mask) {
+	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
+	uint64_t v = mask;   // Input value to find position with rank r.
+	unsigned int r = bit + 1;// Input: bit's desired rank [1-64].
+	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
+	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
+	unsigned int t;      // Bit count temporary.
+
+	// Do a normal parallel bit count for a 64-bit integer,
+	// but store all intermediate steps.
+	a =  v - ((v >> 1) & ~0UL/3);
+	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
+	c = (b + (b >> 4)) & ~0UL/0x11;
+	d = (c + (c >> 8)) & ~0UL/0x101;
+
+
+	t = (d >> 32) + (d >> 48);
+	// Now do branchless select!
+	s  = 64;
+	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+	t  = (d >> (s - 16)) & 0xff;
+	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+	t  = (c >> (s - 8)) & 0xf;
+	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+	t  = (b >> (s - 4)) & 0x7;
+	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+	t  = (a >> (s - 2)) & 0x3;
+	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+	t  = (v >> (s - 1)) & 0x1;
+	s -= ((t - r) & 256) >> 8;
+	return s - 1;
+}
+
+static inline unsigned rand_bit_hw(unsigned rnum, size_t mask) {
+	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
+	uint64_t picked = _pdep_u64(1ul << bit, mask);
+	return picked ? __builtin_ctzl(picked) : 0;
+}
+
+struct TLS {
+	Random rng = { 6 };
+} tls;
+
+const unsigned numLists = 64;
+
+static inline void blind() {
+	int i = tls.rng.next() % numLists;
+	int j = tls.rng.next() % numLists;
+
+	consume(i, j);
+}
+
+std::atomic_size_t list_mask[7];
+static inline void bitmask_sw() {
+	unsigned i, j;
+	{
+		// Pick two lists at random
+		unsigned num = ((numLists - 1) >> 6) + 1;
+
+		unsigned ri = tls.rng.next();
+		unsigned rj = tls.rng.next();
+
+		unsigned wdxi = (ri >> 6u) % num;
+		unsigned wdxj = (rj >> 6u) % num;
+
+		size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
+		size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
+
+		unsigned bi = rand_bit_sw(ri, maski);
+		unsigned bj = rand_bit_sw(rj, maskj);
+
+		i = bi | (wdxi << 6);
+		j = bj | (wdxj << 6);
+	}
+
+	consume(i, j);
+}
+
+static inline void bitmask_hw() {
+	#if !defined(__BMI2__)
+		#warning NO bmi2 for pdep rand_bit
+		return;
+	#endif
+	unsigned i, j;
+	{
+		// Pick two lists at random
+		unsigned num = ((numLists - 1) >> 6) + 1;
+
+		unsigned ri = tls.rng.next();
+		unsigned rj = tls.rng.next();
+
+		unsigned wdxi = (ri >> 6u) % num;
+		unsigned wdxj = (rj >> 6u) % num;
+
+		size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
+		size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
+
+		unsigned bi = rand_bit_hw(ri, maski);
+		unsigned bj = rand_bit_hw(rj, maskj);
+
+		i = bi | (wdxi << 6);
+		j = bj | (wdxj << 6);
+	}
+
+	consume(i, j);
+}
+
+struct {
+	const unsigned mask = 7;
+	const unsigned depth = 3;
+	const uint64_t indexes = 0x0706050403020100;
+	uint64_t masks( unsigned node ) {
+		return 0xff00ffff00ff;
+	}
+} snzm;
+static inline void sparsemask() {
+	#if !defined(__BMI2__)
+		#warning NO bmi2 for sparse mask
+		return;
+	#endif
+	unsigned i, j;
+	{
+		// Pick two random number
+		unsigned ri = tls.rng.next();
+		unsigned rj = tls.rng.next();
+
+		// Pick two nodes from it
+		unsigned wdxi = ri & snzm.mask;
+		unsigned wdxj = rj & snzm.mask;
+
+		// Get the masks from the nodes
+		size_t maski = snzm.masks(wdxi);
+		size_t maskj = snzm.masks(wdxj);
+
+		uint64_t idxsi = _pext_u64(snzm.indexes, maski);
+		uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
+
+		auto pi = __builtin_popcountll(maski);
+		auto pj = __builtin_popcountll(maskj);
+
+		ri = pi ? ri & ((pi >> 3) - 1) : 0;
+		rj = pj ? rj & ((pj >> 3) - 1) : 0;
+
+		unsigned bi = (idxsi >> (ri << 3)) & 0xff;
+		unsigned bj = (idxsj >> (rj << 3)) & 0xff;
+
+		i = (bi << snzm.depth) | wdxi;
+		j = (bj << snzm.depth) | wdxj;
+	}
+
+	consume(i, j);
+}
+
+template<typename T>
+void benchmark( T func, const std::string & name ) {
+	std::cout << "Starting " << name << std::endl;
+	auto before = Clock::now();
+	const int N = 250'000'000;
+	for(int i = 0; i < N; i++) {
+		func();
+	}
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	double duration = durr.count();
+	std::cout << "Duration(s) : " << duration << std::endl;
+	std::cout << "Ops/sec     : " << uint64_t(N / duration) << std::endl;
+	std::cout << "ns/Op       : " << double(duration * 1'000'000'000.0 / N) << std::endl;
+	std::cout << std::endl;
+}
+
+int main() {
+	std::cout.imbue(std::locale(""));
+
+	benchmark(blind, "Blind guess");
+	benchmark(bitmask_sw, "Dense bitmask");
+	benchmark(bitmask_hw, "Dense bitmask with Parallel Deposit");
+	benchmark(sparsemask, "Parallel Extract bitmask");
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/bts.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/bts.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/bts.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,279 @@
+#include <array>
+#include <iomanip>
+#include <iostream>
+#include <locale>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+
+#include "utils.hpp"
+
+// ================================================================================================
+//                        UTILS
+// ================================================================================================
+
+struct local_stat_t {
+	size_t cnt = 0;
+};
+
+struct global_stat_t {
+	std::atomic_size_t cnt = { 0 };
+};
+
+void atomic_max(std::atomic_size_t & target, size_t value) {
+	for(;;) {
+		size_t expect = target.load(std::memory_order_relaxed);
+		if(value <= expect) return;
+		bool success = target.compare_exchange_strong(expect, value);
+		if(success) return;
+	}
+}
+
+void atomic_min(std::atomic_size_t & target, size_t value) {
+	for(;;) {
+		size_t expect = target.load(std::memory_order_relaxed);
+		if(value >= expect) return;
+		bool success = target.compare_exchange_strong(expect, value);
+		if(success) return;
+	}
+}
+
+void tally_stats(global_stat_t & global, local_stat_t & local) {
+	global.cnt   += local.cnt;
+}
+
+void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) {
+	std::cout << "Starting" << std::endl;
+	auto before = Clock::now();
+	barrier.wait(0);
+
+	while(true) {
+		usleep(100000);
+		auto now = Clock::now();
+		duration_t durr = now - before;
+		if( durr.count() > duration ) {
+			done = true;
+			break;
+		}
+		std::cout << "\r" << std::setprecision(4) << durr.count();
+		std::cout.flush();
+	}
+
+	barrier.wait(0);
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	duration = durr.count();
+	std::cout << "\rClosing down" << std::endl;
+}
+
+void waitfor(double & duration, barrier_t & barrier, const std::atomic_size_t & count) {
+	std::cout << "Starting" << std::endl;
+	auto before = Clock::now();
+	barrier.wait(0);
+
+	while(true) {
+		usleep(100000);
+		size_t c = count.load();
+		if( c == 0 ) {
+			break;
+		}
+		std::cout << "\r" << c;
+		std::cout.flush();
+	}
+
+	barrier.wait(0);
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	duration = durr.count();
+	std::cout << "\rClosing down" << std::endl;
+}
+
+void print_stats(double duration, unsigned nthread, global_stat_t & global) {
+	std::cout << "Done" << std::endl;
+
+	size_t ops = global.cnt;
+	size_t ops_sec = size_t(double(ops) / duration);
+	size_t ops_thread = ops_sec / nthread;
+	auto dur_nano = duration_cast<std::nano>(1.0);
+
+	std::cout << "Duration      : " << duration << "s\n";
+	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
+	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
+	std::cout << "Ops/sec       : " << ops_sec << "\n";
+	std::cout << "Total ops     : " << ops << "\n";
+}
+
+static inline bool bts(std::atomic_size_t & target, size_t bit ) {
+	/*
+	int result = 0;
+	asm volatile(
+		"LOCK btsq %[bit], %[target]\n\t"
+		:"=@ccc" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+ 	return result != 0;
+	/*/
+	size_t mask = 1ul << bit;
+	size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
+	return (ret & mask) != 0;
+	//*/
+}
+
+static inline bool btr(std::atomic_size_t & target, size_t bit ) {
+	/*
+	int result = 0;
+	asm volatile(
+		"LOCK btrq %[bit], %[target]\n\t"
+		:"=@ccc" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+ 	return result != 0;
+	/*/
+	size_t mask = 1ul << bit;
+	size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
+	return (ret & mask) != 0;
+	//*/
+}
+
+// ================================================================================================
+//                        EXPERIMENTS
+// ================================================================================================
+
+// ================================================================================================
+__attribute__((noinline)) void runPingPong_body(
+	std::atomic<bool>& done,
+	local_stat_t & local,
+	std::atomic_size_t & target,
+	size_t id
+) {
+	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
+
+		bool ret;
+		ret = bts(target, id);
+		assert(!ret);
+
+		// -----
+
+		ret = btr(target, id);
+		assert(ret);
+		local.cnt++;
+	}
+}
+
+void run(unsigned nthread, double duration) {
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	global_stat_t global;
+
+	// Flag to signal termination
+	std::atomic_bool done  = { false };
+
+	std::cout << "Initializing ";
+	// List being tested
+	std::atomic_size_t word = { 0 };
+	{
+		std::thread * threads[nthread];
+		unsigned i = 1;
+		for(auto & t : threads) {
+			t = new std::thread([&done, &word, &barrier, &global](unsigned tid) {
+				local_stat_t local;
+
+				// affinity(tid);
+
+				barrier.wait(tid);
+
+				// EXPERIMENT START
+
+				runPingPong_body(done, local, word, tid - 1);
+
+				// EXPERIMENT END
+
+				barrier.wait(tid);
+
+				tally_stats(global, local);
+			}, i++);
+		}
+
+		waitfor(duration, barrier, done);
+
+		for(auto t : threads) {
+			t->join();
+			delete t;
+		}
+	}
+
+	print_stats(duration, nthread, global);
+}
+
+// ================================================================================================
+
+int main(int argc, char * argv[]) {
+
+	double duration   = 5.0;
+	unsigned nthreads = 2;
+
+	std::cout.imbue(std::locale(""));
+
+	for(;;) {
+		static struct option options[] = {
+			{"duration",  required_argument, 0, 'd'},
+			{"nthreads",  required_argument, 0, 't'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, "d:t:", options, &idx);
+
+		std::string arg = optarg ? optarg : "";
+		size_t len = 0;
+		switch(opt) {
+			case -1:
+				if(optind != argc) {
+					std::cerr << "Too many arguments " << argc << " " << idx << std::endl;
+					goto usage;
+				}
+				goto run;
+			// Numeric Arguments
+			case 'd':
+				try {
+					duration = std::stod(optarg, &len);
+					if(len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Duration must be a valid double, was " << arg << std::endl;
+					goto usage;
+				}
+				break;
+			case 't':
+				try {
+					nthreads = std::stoul(optarg, &len);
+					if(len != arg.size() || nthreads > (8 * sizeof(size_t))) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Number of threads must be a positive integer less than or equal to " << sizeof(size_t) * 8 << ", was " << arg << std::endl;
+					goto usage;
+				}
+				break;
+			// Other cases
+			default: /* ? */
+				std::cerr << opt << std::endl;
+			usage:
+				std::cerr << "Usage: " << argv[0] << ": [options]" << std::endl;
+				std::cerr << std::endl;
+				std::cerr << "  -d, --duration=DURATION  Duration of the experiment, in seconds" << std::endl;
+				std::cerr << "  -t, --nthreads=NTHREADS  Number of kernel threads" << std::endl;
+				std::exit(1);
+		}
+	}
+	run:
+
+	check_cache_line_size();
+
+	std::cout << "Running " << nthreads << " threads for " << duration << " seconds" << std::endl;
+	run(nthreads, duration);
+	return 0;
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/bts_test.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/bts_test.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/bts_test.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,32 @@
+#include <cassert>
+#include <iostream>
+
+bool bts(volatile size_t & target, size_t bit ) {
+	bool result = false;
+	asm volatile(
+		"LOCK btsq %[bit], %[target]\n\t"
+		:"=c" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+ 	return result;
+}
+
+bool btr(volatile size_t & target, size_t bit ) {
+	bool result = false;
+	asm volatile(
+		"LOCK btrq %[bit], %[target]\n\t"
+		:"=c" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+ 	return result;
+}
+
+int main() {
+	volatile size_t i = 0;
+	std::cout << std::hex << i << std::endl;
+	assert(bts(i, 31));
+	std::cout << std::hex << i << std::endl;
+	assert(btr(i, 31));
+	std::cout << std::hex << i << std::endl;
+	return 0;
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/links.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/links.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/links.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,122 @@
+#pragma once
+
+#include "assert.hpp"
+#include "utils.hpp"
+
+template<typename node_t>
+struct _LinksFields_t {
+	node_t * prev = nullptr;
+	node_t * next = nullptr;
+	volatile unsigned long long ts = 0;
+	unsigned hint = (unsigned)-1;
+};
+
+template<typename node_t>
+class __attribute__((aligned(128))) intrusive_queue_t {
+public:
+	typedef spinlock_t lock_t;
+
+	struct stat {
+		ssize_t diff = 0;
+		size_t  push = 0;
+		size_t  pop  = 0;
+	};
+
+private:
+	struct sentinel_t {
+		_LinksFields_t<node_t> _links;
+	};
+
+public:
+	lock_t lock;
+
+private:
+	sentinel_t before;
+	sentinel_t after;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
+	static constexpr auto fields_offset = offsetof( node_t, _links );
+#pragma GCC diagnostic pop
+public:
+	intrusive_queue_t()
+		: before{{ nullptr, tail() }}
+		, after {{ head(), nullptr }}
+	{
+		/* paranoid */ assert((reinterpret_cast<uintptr_t>( head() ) + fields_offset) == reinterpret_cast<uintptr_t>(&before));
+		/* paranoid */ assert((reinterpret_cast<uintptr_t>( tail() ) + fields_offset) == reinterpret_cast<uintptr_t>(&after ));
+		/* paranoid */ assert(head()->_links.prev == nullptr);
+		/* paranoid */ assert(head()->_links.next == tail() );
+		/* paranoid */ assert(tail()->_links.next == nullptr);
+		/* paranoid */ assert(tail()->_links.prev == head() );
+		/* paranoid */ assert(sizeof(*this) == 128);
+		/* paranoid */ assert((intptr_t(this) % 128) == 0);
+	}
+
+	~intrusive_queue_t() = default;
+
+	inline node_t * head() const {
+		node_t * rhead = reinterpret_cast<node_t *>(
+			reinterpret_cast<uintptr_t>( &before ) - fields_offset
+		);
+		assert(rhead);
+		return rhead;
+	}
+
+	inline node_t * tail() const {
+		node_t * rtail = reinterpret_cast<node_t *>(
+			reinterpret_cast<uintptr_t>( &after ) - fields_offset
+		);
+		assert(rtail);
+		return rtail;
+	}
+
+	inline bool push(node_t * node) {
+		assert(lock);
+		assert(node->_links.ts != 0);
+		node_t * tail = this->tail();
+
+		node_t * prev = tail->_links.prev;
+		// assertf(node->_links.ts >= prev->_links.ts,
+		// 	"New node has smaller timestamp: %llu < %llu", node->_links.ts, prev->_links.ts);
+		node->_links.next = tail;
+		node->_links.prev = prev;
+		prev->_links.next = node;
+		tail->_links.prev = node;
+
+		if(before._links.ts == 0l) {
+			before._links.ts = node->_links.ts;
+			assert(node->_links.prev == this->head());
+			return true;
+		}
+		return false;
+	}
+
+	inline std::pair<node_t *, bool> pop() {
+		assert(lock);
+		node_t * head = this->head();
+		node_t * tail = this->tail();
+
+		node_t * node = head->_links.next;
+		node_t * next = node->_links.next;
+		if(node == tail) return {nullptr, false};
+
+		head->_links.next = next;
+		next->_links.prev = head;
+
+		if(next == tail) {
+			before._links.ts = 0l;
+			return {node, true};
+		}
+		else {
+			assert(next->_links.ts != 0);
+			before._links.ts = next->_links.ts;
+			assert(before._links.ts != 0);
+			return {node, false};
+		}
+	}
+
+	long long ts() const {
+		return before._links.ts;
+	}
+};
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/prefetch.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/prefetch.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/prefetch.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,106 @@
+#include <algorithm>
+#include <array>
+#include <chrono>
+#include <iostream>
+#include <locale>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <cassert>
+
+struct __attribute__((aligned(64))) element {
+	size_t value;
+};
+
+using block = std::array<element, 100>;
+
+block * create() {
+	block * b = new block();
+	for(auto & e : *b) {
+		e.value = rand();
+	}
+	b->back().value = b->size();
+
+	return b;
+}
+
+static inline size_t find(const block & b) {
+	size_t r = 0;
+	for(; r < b.size(); r++) {
+		if(__builtin_expect(b[r].value == b.size(), false)) break;
+	}
+
+	return r;
+}
+
+void usage(char * argv[]) {
+	std::cerr << argv[0] << ": [DURATION (FLOAT:SEC)] [NBLOCKS]" << std::endl;;
+	std::exit(1);
+}
+
+int main(int argc, char * argv[]) {
+	size_t nblocks = 1000;
+	double duration = 5;
+
+	std::cout.imbue(std::locale(""));
+
+	switch (argc)
+	{
+	case 3:
+		nblocks = std::stoul(argv[2]);
+		[[fallthrough]];
+	case 2:
+		duration = std::stod(argv[1]);
+		if( duration <= 0.0 ) {
+			std::cerr << "Duration must be positive, was " << argv[1] << "(" << duration << ")" << std::endl;
+			usage(argv);
+		}
+		[[fallthrough]];
+	case 1:
+		break;
+	default:
+		usage(argv);
+		break;
+	}
+
+	std::vector<std::unique_ptr<block>> blocks;
+	for(size_t i = 0; i < nblocks; i++) {
+		blocks.emplace_back( create() );
+	}
+	std::random_shuffle(blocks.begin(), blocks.end());
+
+	size_t CRC = 0;
+	size_t count = 0;
+
+	using clock = std::chrono::high_resolution_clock;
+	auto before = clock::now();
+
+	while(true) {
+		for(const auto & b : blocks) {
+			CRC += find(*b);
+			count++;
+		}
+		auto now = clock::now();
+		std::chrono::duration<double> durr = now - before;
+		if( durr.count() > duration ) {
+			break;
+		}
+	}
+
+	auto after = clock::now();
+	std::chrono::duration<double> durr = after - before;
+	duration = durr.count();
+
+	using std::chrono::duration_cast;
+	using std::chrono::nanoseconds;
+
+	size_t ops_sec = size_t(double(count) / duration);
+	auto dur_nano = duration_cast<nanoseconds>(std::chrono::duration<double>(1.0)).count();
+
+	std::cout << "CRC           : " << CRC << "\n";
+	std::cout << "Duration      : " << duration << "s\n";
+	std::cout << "Total ops     : " << count << "\n";
+	std::cout << "Ops/sec       : " << ops_sec << "\n";
+	std::cout << "ns/Op         : " << ( dur_nano / ops_sec )<< "\n";
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/process.sh
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/process.sh	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/process.sh	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+NAME=$1
+
+if [ ! -f "raw/${NAME}.out" ]; then
+    echo "Not output for ${NAME}"
+    exit 1
+fi
+
+if [ ! -f "raw/${NAME}.data" ]; then
+    echo "Not perf record for ${NAME}"
+    exit 1
+fi
+
+echo "Processing perf data for ${NAME}"
+
+OPS=$(grep -e 'Total ops' raw/${NAME}.out)
+CPOP=$( echo "Hello $OPS" | \grep -oP ", \K[0-9,]+(?=o)" --color | tr -d ',')
+CPUSH=$(echo "Hello $OPS" | \grep -oP "\(\K[0-9,]+(?=i)" --color | tr -d ',')
+
+REPORT=''
+perf report -n --percent-limit 5 --stdio --no-children -i raw/${NAME}.data > raw/.temp
+EVENT=$(cat raw/.temp | grep -e '^# Samples'| cut -d ' ' -f 6)
+SPOP=$( cat raw/.temp | grep -e '] relaxed_list<Node>::pop'  | tr -s ' ' | cut -d ' ' -f 3)
+SPUSH=$(cat raw/.temp | grep -e '] relaxed_list<Node>::push' | tr -s ' ' | cut -d ' ' -f 3)
+SARR=$( cat raw/.temp | grep -e '] snz[i|m]_t::node::arrive_h'   | tr -s ' ' | cut -d ' ' -f 3)
+
+echo "$OPS"
+echo "Push count: $CPUSH"
+echo "Pop  count: $CPOP"
+
+echo "Pop    samples: $SPOP"
+echo "Push   samples: $SPUSH"
+echo "Arrive samples: $SARR"
+
+SpPUSH=$(bc -l <<< "scale=9; $SPUSH / $CPUSH")
+SpPOP=$( bc -l <<< "scale=9; $SPOP  / $CPOP" )
+SpARR=$( bc -l <<< "scale=9; $SARR  / $CPUSH")
+
+printf "%s per push()  : %.9f\n" $EVENT $SpPUSH | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
+printf "%s per pop()   : %.9f\n" $EVENT $SpPOP  | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
+printf "%s per arrive(): %.9f\n" $EVENT $SpARR  | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,53 @@
+#include <atomic>
+
+struct thread {};
+
+struct cluster {
+	void add();
+	void remove();
+	thread * next();
+};
+
+struct processor {
+
+	cluster cluster;
+	std::atomic<bool> stop;
+	volatile bool idle;
+};
+
+
+void run(thread * ) {
+	// verify preemption
+
+	// run Thread
+
+	// verify preemption
+
+	// finish Running
+}
+
+void main(processor & self) {
+
+	self.cluster.add();
+
+	while(!self.stop) {
+		if(thread * t = self.cluster.next()) {
+			run(t);
+			continue;
+		}
+
+		self.set_idle();
+		std::atomic_thread_fence();
+
+		if(thread * t = self.cluster.next()) {
+			self.idle = false;
+			run(t);
+			continue;
+		}
+
+		halt();
+	}
+
+	self.cluster.remove();
+
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,215 @@
+#include <cassert>
+
+#include <atomic>
+#include <new>
+#include <type_traits>
+
+struct processor;
+
+struct __attribute__((aligned(64))) processor_id {
+	std::atomic<processor *> handle;
+	std::atomic<bool> lock;
+
+	processor_id() = default;
+	processor_id(processor * proc) : handle(proc), lock() {
+		/*paranoid*/ assert(std::atomic_is_lock_free(&lock));
+	}
+};
+
+extern unsigned num();
+
+#define ERROR throw 1
+
+class processor_list {
+private:
+
+	static const constexpr std::size_t cache_line_size = 64;
+
+	static_assert(sizeof (processor_id) <= cache_line_size, "ERROR: Instances must fit in one cache line" );
+	static_assert(alignof(processor_id) == cache_line_size, "ERROR: Instances must aligned to one cache line" );
+
+	const unsigned max;     // total cachelines allocated
+	std::atomic_uint alloc; // cachelines currently in use
+	std::atomic_uint ready; // cachelines ready to iterate over (!= to alloc when thread is in second half of doregister)
+	std::atomic<bool> lock; // writerlock
+	processor_id * data;    // data pointer
+
+private:
+	inline void acquire(std::atomic<bool> & ll) {
+		while( __builtin_expect(ll.exchange(true),false) ) {
+			while(ll.load(std::memory_order_relaxed))
+				asm volatile("pause");
+		}
+		/* paranoid */ assert(ll);
+	}
+
+public:
+	processor_list()
+		: max(num())
+		, alloc(0)
+		, ready(0)
+		, lock{false}
+		, data( new processor_id[max] )
+	{
+		/*paranoid*/ assert(num() == max);
+		/*paranoid*/ assert(std::atomic_is_lock_free(&alloc));
+		/*paranoid*/ assert(std::atomic_is_lock_free(&ready));
+	}
+
+	~processor_list() {
+		delete[] data;
+	}
+
+	//=======================================================================
+	// Lock-Free registering/unregistering of threads
+	unsigned doregister(processor * proc) {
+		// Step - 1 : check if there is already space in the data
+		uint_fast32_t s = ready;
+
+		// Check among all the ready
+		for(uint_fast32_t i = 0; i < s; i++) {
+			processor * null = nullptr; // Re-write every loop since compare thrashes it
+			if( data[i].handle.load(std::memory_order_relaxed) == null
+			 && data[i].handle.compare_exchange_strong(null, proc)) {
+				/*paranoid*/ assert(i < ready);
+				/*paranoid*/ assert(alignof(decltype(data[i])) == cache_line_size);
+				/*paranoid*/ assert((uintptr_t(&data[i]) % cache_line_size) == 0);
+				return i;
+			}
+		}
+
+		if(max <= alloc) ERROR;
+
+		// Step - 2 : F&A to get a new spot in the array.
+		uint_fast32_t n = alloc++;
+		if(max <= n) ERROR;
+
+		// Step - 3 : Mark space as used and then publish it.
+		void * storage = &data[n];
+		new (storage) processor_id( proc );
+		while(true) {
+			unsigned copy = n;
+			if( ready.load(std::memory_order_relaxed) == n
+			 && ready.compare_exchange_weak(copy, n + 1) )
+			 	break;
+			asm volatile("pause");
+		}
+
+		// Return new spot.
+		/*paranoid*/ assert(n < ready);
+		/*paranoid*/ assert(alignof(decltype(data[n])) == cache_line_size);
+		/*paranoid*/ assert((uintptr_t(&data[n]) % cache_line_size) == 0);
+		return n;
+	}
+
+	processor * unregister(unsigned iproc) {
+		/*paranoid*/ assert(iproc < ready);
+		auto ret = data[iproc].handle.load(std::memory_order_relaxed);
+		data[iproc].handle = nullptr;
+		return ret;
+	}
+
+	// Reset all registration
+	// Unsafe in most cases, use for testing only.
+	void reset() {
+		alloc = 0;
+		ready = 0;
+	}
+
+	processor * get(unsigned iproc) {
+		return data[iproc].handle.load(std::memory_order_relaxed);
+	}
+
+	//=======================================================================
+	// Reader-writer lock implementation
+	// Concurrent with doregister/unregister,
+	//    i.e., threads can be added at any point during or between the entry/exit
+
+	//-----------------------------------------------------------------------
+	// Reader side
+	void read_lock(unsigned iproc) {
+		/*paranoid*/ assert(iproc < ready);
+
+		// Step 1 : make sure no writer are in the middle of the critical section
+		while(lock.load(std::memory_order_relaxed))
+			asm volatile("pause");
+
+		// Fence needed because we don't want to start trying to acquire the lock
+		// before we read a false.
+		// Not needed on x86
+		// std::atomic_thread_fence(std::memory_order_seq_cst);
+
+		// Step 2 : acquire our local lock
+		acquire( data[iproc].lock );
+		/*paranoid*/ assert(data[iproc].lock);
+	}
+
+	void read_unlock(unsigned iproc) {
+		/*paranoid*/ assert(iproc < ready);
+		/*paranoid*/ assert(data[iproc].lock);
+		data[iproc].lock.store(false, std::memory_order_release);
+	}
+
+	//-----------------------------------------------------------------------
+	// Writer side
+	uint_fast32_t write_lock() {
+		// Step 1 : lock global lock
+		// It is needed to avoid processors that register mid Critical-Section
+		//   to simply lock their own lock and enter.
+		acquire(lock);
+
+		// Step 2 : lock per-proc lock
+		// Processors that are currently being registered aren't counted
+		//   but can't be in read_lock or in the critical section.
+		// All other processors are counted
+		uint_fast32_t s = ready;
+		for(uint_fast32_t i = 0; i < s; i++) {
+			acquire( data[i].lock );
+		}
+
+		return s;
+	}
+
+	void write_unlock(uint_fast32_t last_s) {
+		// Step 1 : release local locks
+		// This must be done while the global lock is held to avoid
+		//   threads that where created mid critical section
+		//   to race to lock their local locks and have the writer
+		//   immidiately unlock them
+		// Alternative solution : return s in write_lock and pass it to write_unlock
+		for(uint_fast32_t i = 0; i < last_s; i++) {
+			assert(data[i].lock);
+			data[i].lock.store(false, std::memory_order_release);
+		}
+
+		// Step 2 : release global lock
+		/*paranoid*/ assert(true == lock);
+		lock.store(false, std::memory_order_release);
+	}
+
+	//-----------------------------------------------------------------------
+	// Checking support
+	uint_fast32_t epoch_check() {
+		// Step 1 : lock global lock
+		// It is needed to avoid processors that register mid Critical-Section
+		//   to simply lock their own lock and enter.
+		while(lock.load(std::memory_order_relaxed))
+			asm volatile("pause");
+
+		// Step 2 : lock per-proc lock
+		// Processors that are currently being registered aren't counted
+		//   but can't be in read_lock or in the critical section.
+		// All other processors are counted
+		uint_fast32_t s = ready;
+		for(uint_fast32_t i = 0; i < s; i++) {
+			while(data[i].lock.load(std::memory_order_relaxed))
+				asm volatile("pause");
+		}
+
+		return s;
+	}
+
+public:
+};
+
+#undef ERROR
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_fast.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_fast.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_fast.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,173 @@
+#include "processor_list.hpp"
+
+#include <array>
+#include <iomanip>
+#include <iostream>
+#include <locale>
+#include <string>
+#include <thread>
+
+#include "utils.hpp"
+
+unsigned num() {
+	return 0x1000000;
+}
+
+//-------------------
+
+struct processor {
+	unsigned id;
+};
+void run(unsigned nthread, double duration, unsigned writes, unsigned epochs) {
+	assert(writes < 100);
+
+	// List being tested
+	processor_list list = {};
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	size_t write_committed = 0ul;
+	struct {
+		std::atomic_size_t write = { 0ul };
+		std::atomic_size_t read  = { 0ul };
+		std::atomic_size_t epoch = { 0ul };
+	} lock_cnt;
+
+	// Flag to signal termination
+	std::atomic_bool done = { false };
+
+	std::thread * threads[nthread];
+	unsigned i = 1;
+	for(auto & t : threads) {
+		t = new std::thread([&done, &list, &barrier, &write_committed, &lock_cnt, writes, epochs](unsigned tid) {
+			Random rand(tid + rdtscl());
+			processor proc;
+			proc.id = list.doregister(&proc);
+			size_t writes_cnt = 0;
+			size_t reads_cnt = 0;
+			size_t epoch_cnt = 0;
+
+			affinity(tid);
+
+			barrier.wait(tid);
+
+			while(__builtin_expect(!done, true)) {
+				auto r = rand.next() % 100;
+				if (r < writes) {
+					auto n = list.write_lock();
+					write_committed++;
+					writes_cnt++;
+					assert(writes_cnt < -2ul);
+					list.write_unlock(n);
+				}
+				else if(r < epochs) {
+					list.epoch_check();
+					epoch_cnt++;
+				}
+				else {
+					list.read_lock(proc.id);
+					reads_cnt++;
+					assert(reads_cnt < -2ul);
+					list.read_unlock(proc.id);
+				}
+			}
+
+			barrier.wait(tid);
+
+			auto p = list.unregister(proc.id);
+			assert(&proc == p);
+			lock_cnt.write += writes_cnt;
+			lock_cnt.read  += reads_cnt;
+			lock_cnt.epoch += epoch_cnt;
+		}, i++);
+	}
+
+	auto before = Clock::now();
+	barrier.wait(0);
+
+	while(true) {
+		usleep(1000);
+		auto now = Clock::now();
+		duration_t durr = now - before;
+		if( durr.count() > duration ) {
+			done = true;
+			break;
+		}
+	}
+
+	barrier.wait(0);
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	duration = durr.count();
+
+	for(auto t : threads) {
+		t->join();
+		delete t;
+	}
+
+	assert(write_committed == lock_cnt.write);
+
+	size_t totalop = lock_cnt.read + lock_cnt.write + lock_cnt.epoch;
+	size_t ops_sec = size_t(double(totalop) / duration);
+	size_t ops_thread = ops_sec / nthread;
+	double dur_nano = duration_cast<std::nano>(1.0);
+
+	std::cout << "Duration      : " << duration << "s\n";
+	std::cout << "Total ops     : " << totalop << "(" << lock_cnt.read << "r, " << lock_cnt.write << "w, " << lock_cnt.epoch << "e)\n";
+	std::cout << "Ops/sec       : " << ops_sec << "\n";
+	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
+	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
+}
+
+void usage(char * argv[]) {
+	std::cerr << argv[0] << ": [DURATION (FLOAT:SEC)] [NTHREADS] [%WRITES]" << std::endl;;
+	std::exit(1);
+}
+
+int main(int argc, char * argv[]) {
+
+	double duration   = 5.0;
+	unsigned nthreads = 2;
+	unsigned writes   = 0;
+	unsigned epochs   = 0;
+
+	std::cout.imbue(std::locale(""));
+
+	switch (argc)
+	{
+	case 5:
+		epochs = std::stoul(argv[4]);
+		[[fallthrough]];
+	case 4:
+		writes = std::stoul(argv[3]);
+		if( (writes + epochs) > 100 ) {
+			std::cerr << "Writes + Epochs must be valid percentage, was " << argv[3] << " + " << argv[4] << "(" << writes << " + " << epochs << ")" << std::endl;
+			usage(argv);
+		}
+		[[fallthrough]];
+	case 3:
+		nthreads = std::stoul(argv[2]);
+		[[fallthrough]];
+	case 2:
+		duration = std::stod(argv[1]);
+		if( duration <= 0.0 ) {
+			std::cerr << "Duration must be positive, was " << argv[1] << "(" << duration << ")" << std::endl;
+			usage(argv);
+		}
+		[[fallthrough]];
+	case 1:
+		break;
+	default:
+		usage(argv);
+		break;
+	}
+
+	check_cache_line_size();
+
+	std::cout << "Running " << nthreads << " threads for " << duration << " seconds with " << writes << "% writes and " << epochs << "% epochs" << std::endl;
+	run(nthreads, duration, writes, epochs + writes);
+
+	return 0;
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_good.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_good.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/processor_list_good.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,269 @@
+#include "processor_list.hpp"
+
+#include <iostream>
+#include <string>
+#include <thread>
+
+unsigned num() {
+	return 0x1000000;
+}
+
+// Barrier from
+class barrier_t {
+public:
+	barrier_t(size_t total)
+		: waiting(0)
+		, total(total)
+	{}
+
+	void wait(unsigned) {
+		size_t target = waiting++;
+		target = (target - (target % total)) + total;
+		while(waiting < target)
+			asm volatile("pause");
+
+		assert(waiting < (1ul << 60));
+    	}
+
+private:
+	std::atomic<size_t> waiting;
+	size_t total;
+};
+
+class Random {
+private:
+	unsigned int seed;
+public:
+	Random(int seed) {
+		this->seed = seed;
+	}
+
+	/** returns pseudorandom x satisfying 0 <= x < n. **/
+	unsigned int next() {
+		seed ^= seed << 6;
+		seed ^= seed >> 21;
+		seed ^= seed << 7;
+		return seed;
+    	}
+};
+
+//-------------------
+
+struct processor {
+	unsigned id;
+};
+
+// Stage 1
+// Make sure that the early registration works correctly
+// Registration uses a different process if the act of
+// registering the processor makes it the highest processor count
+// seen yet.
+void stage1(unsigned nthread, unsigned repeats) {
+	const int n = repeats;
+	const int nproc = 10;
+
+	// List being tested
+	processor_list list;
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Seen values to detect duplicattion
+	std::atomic<processor *> ids[nthread * nproc];
+	for(auto & i : ids) {
+		i = nullptr;
+	}
+
+	// Can't pass VLA to lambda
+	std::atomic<processor *> * idsp = ids;
+
+	// Threads which will run the code
+	std::thread * threads[nthread];
+	unsigned i = 1;
+	for(auto & t : threads) {
+		// Each thread will try to register a processor then add it to the
+		// list of registerd processor
+		t = new std::thread([&list, &barrier, idsp, n](unsigned tid){
+			processor proc[nproc];
+			for(int i = 0; i < n; i++) {
+				for(auto & p : proc) {
+					// Register the thread
+					p.id = list.doregister(&p);
+				}
+
+				for(auto & p : proc) {
+					// Make sure no one got this id before
+					processor * prev = idsp[p.id].exchange(&p);
+					assert(nullptr == prev);
+
+					// Make sure id is still consistend
+					assert(&p == list.get(p.id));
+				}
+
+				// wait for round to finish
+				barrier.wait(tid);
+
+				// wait for reset
+				barrier.wait(tid);
+			}
+		}, i++);
+	}
+
+	for(int i = 0; i < n; i++) {
+		//Wait for round to finish
+		barrier.wait(0);
+
+		// Reset list
+		list.reset();
+
+		std::cout << i << "\r";
+
+		// Reset seen values
+		for(auto & i : ids) {
+			i = nullptr;
+		}
+
+		// Start next round
+		barrier.wait(0);
+	}
+
+	for(auto t : threads) {
+		t->join();
+		delete t;
+	}
+}
+
+// Stage 2
+// Check that once churning starts, registration is still consistent.
+void stage2(unsigned nthread, unsigned repeats) {
+	// List being tested
+	processor_list list;
+
+	// Threads which will run the code
+	std::thread * threads[nthread];
+	unsigned i = 1;
+	for(auto & t : threads) {
+		// Each thread will try to register a few processors and
+		// unregister them, making sure that the registration is
+		// consistent
+		t = new std::thread([&list, repeats](unsigned tid){
+			processor procs[10];
+			for(unsigned i = 0; i < repeats; i++) {
+				// register the procs and note the id
+				for(auto & p : procs) {
+					p.id = list.doregister(&p);
+				}
+
+				if(1 == tid) std::cout << i << "\r";
+
+				// check the id is still consistent
+				for(const auto & p : procs) {
+					assert(&p == list.get(p.id));
+				}
+
+				// unregister and check the id is consistent
+				for(const auto & p : procs) {
+					assert(&p == list.unregister(p.id));
+				}
+			}
+		}, i++);
+	}
+
+	for(auto t : threads) {
+		t->join();
+		delete t;
+	}
+}
+
+bool is_writer();
+
+// Stage 3
+// Check that the reader writer lock works.
+void stage3(unsigned nthread, unsigned repeats) {
+	// List being tested
+	processor_list list;
+
+	size_t before = 0;
+
+	std::unique_ptr<size_t> after( new size_t(0) );
+
+	std::atomic<bool> done ( false );
+
+	// Threads which will run the code
+	std::thread * threads[nthread];
+	unsigned i = 1;
+	for(auto & t : threads) {
+		// Each thread will try to register a few processors and
+		// unregister them, making sure that the registration is
+		// consistent
+		t = new std::thread([&list, repeats, &before, &after, &done](unsigned tid){
+			Random rng(tid);
+			processor proc;
+			proc.id = list.doregister(&proc);
+			while(!done) {
+
+				if( (rng.next() % 100) == 0 ) {
+					auto r = list.write_lock();
+
+					auto b = before++;
+
+					std::cout << b << "\r";
+
+					(*after)++;
+
+					if(b >= repeats) done = true;
+
+					list.write_unlock(r);
+				}
+				else {
+					list.read_lock(proc.id);
+					assert(before == *after);
+					list.read_unlock(proc.id);
+				}
+
+			}
+
+			list.unregister(proc.id);
+		}, i++);
+	}
+
+	for(auto t : threads) {
+		t->join();
+		delete t;
+	}
+}
+
+int main(int argc, char * argv[]) {
+
+	unsigned nthreads = 1;
+	if( argc >= 3 ) {
+		size_t idx;
+		nthreads = std::stoul(argv[2], &idx);
+		assert('\0' == argv[2][idx]);
+	}
+
+	unsigned repeats = 100;
+	if( argc >= 2 ) {
+		size_t idx;
+		repeats = std::stoul(argv[1], &idx);
+		assert('\0' == argv[1][idx]);
+	}
+
+	processor_list::check_cache_line_size();
+
+	std::cout << "Running " << repeats << " repetitions on " << nthreads << " threads" << std::endl;
+	std::cout << "Checking registration - early" << std::endl;
+	stage1(nthreads, repeats);
+	std::cout << "Done                         " << std::endl;
+
+	std::cout << "Checking registration - churn" << std::endl;
+	stage2(nthreads, repeats);
+	std::cout << "Done                         " << std::endl;
+
+	std::cout << "Checking RW lock             " << std::endl;
+	stage3(nthreads, repeats);
+	std::cout << "Done                         " << std::endl;
+
+
+	return 0;
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/randbit.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/randbit.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/randbit.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,236 @@
+#include <cstddef>
+#include <cstdint>
+#include <x86intrin.h>
+
+__attribute__((noinline)) unsigned nthSetBit(size_t mask, unsigned bit) {
+	uint64_t v = mask;   // Input value to find position with rank r.
+	unsigned int r = bit;// Input: bit's desired rank [1-64].
+	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
+	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
+	unsigned int t;      // Bit count temporary.
+
+	// Do a normal parallel bit count for a 64-bit integer,
+	// but store all intermediate steps.
+	// a = (v & 0x5555...) + ((v >> 1) & 0x5555...);
+	a =  v - ((v >> 1) & ~0UL/3);
+	// b = (a & 0x3333...) + ((a >> 2) & 0x3333...);
+	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
+	// c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...);
+	c = (b + (b >> 4)) & ~0UL/0x11;
+	// d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...);
+	d = (c + (c >> 8)) & ~0UL/0x101;
+
+
+	t = (d >> 32) + (d >> 48);
+	// Now do branchless select!
+	s  = 64;
+	// if (r > t) {s -= 32; r -= t;}
+	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+	t  = (d >> (s - 16)) & 0xff;
+	// if (r > t) {s -= 16; r -= t;}
+	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+	t  = (c >> (s - 8)) & 0xf;
+	// if (r > t) {s -= 8; r -= t;}
+	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+	t  = (b >> (s - 4)) & 0x7;
+	// if (r > t) {s -= 4; r -= t;}
+	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+	t  = (a >> (s - 2)) & 0x3;
+	// if (r > t) {s -= 2; r -= t;}
+	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+	t  = (v >> (s - 1)) & 0x1;
+	// if (r > t) s--;
+	s -= ((t - r) & 256) >> 8;
+	// s = 65 - s;
+	return s;
+}
+
+unsigned rand_bit(unsigned rnum, uint64_t mask) {
+	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
+#if defined(BRANCHLESS)
+	uint64_t v = mask;   // Input value to find position with rank r.
+	unsigned int r = bit + 1;// Input: bit's desired rank [1-64].
+	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
+	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
+	unsigned int t;      // Bit count temporary.
+
+	// Do a normal parallel bit count for a 64-bit integer,
+	// but store all intermediate steps.
+	// a = (v & 0x5555...) + ((v >> 1) & 0x5555...);
+	a =  v - ((v >> 1) & ~0UL/3);
+	// b = (a & 0x3333...) + ((a >> 2) & 0x3333...);
+	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
+	// c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...);
+	c = (b + (b >> 4)) & ~0UL/0x11;
+	// d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...);
+	d = (c + (c >> 8)) & ~0UL/0x101;
+
+
+	t = (d >> 32) + (d >> 48);
+	// Now do branchless select!
+	s  = 64;
+	// if (r > t) {s -= 32; r -= t;}
+	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+	t  = (d >> (s - 16)) & 0xff;
+	// if (r > t) {s -= 16; r -= t;}
+	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+	t  = (c >> (s - 8)) & 0xf;
+	// if (r > t) {s -= 8; r -= t;}
+	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+	t  = (b >> (s - 4)) & 0x7;
+	// if (r > t) {s -= 4; r -= t;}
+	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+	t  = (a >> (s - 2)) & 0x3;
+	// if (r > t) {s -= 2; r -= t;}
+	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+	t  = (v >> (s - 1)) & 0x1;
+	// if (r > t) s--;
+	s -= ((t - r) & 256) >> 8;
+	// s = 65 - s;
+	return s - 1;
+#elif defined(LOOP)
+	for(unsigned i = 0; i < bit; i++) {
+		mask ^= (1ul << (__builtin_ffsl(mask) - 1ul));
+	}
+	return __builtin_ffsl(mask) - 1ul;
+#elif defined(PDEP)
+	uint64_t picked = _pdep_u64(1ul << bit, mask);
+	return __builtin_ffsl(picked) - 1ul;
+#else
+#error must define LOOP, PDEP or BRANCHLESS
+#endif
+}
+
+#include <cassert>
+#include <atomic>
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <locale>
+#include <thread>
+
+#include <unistd.h>
+
+class barrier_t {
+public:
+	barrier_t(size_t total)
+		: waiting(0)
+		, total(total)
+	{}
+
+	void wait(unsigned) {
+		size_t target = waiting++;
+		target = (target - (target % total)) + total;
+		while(waiting < target)
+			asm volatile("pause");
+
+		assert(waiting < (1ul << 60));
+    	}
+
+private:
+	std::atomic<size_t> waiting;
+	size_t total;
+};
+
+class Random {
+private:
+	unsigned int seed;
+public:
+	Random(int seed) {
+		this->seed = seed;
+	}
+
+	/** returns pseudorandom x satisfying 0 <= x < n. **/
+	unsigned int next() {
+		seed ^= seed << 6;
+		seed ^= seed >> 21;
+		seed ^= seed << 7;
+		return seed;
+    	}
+};
+
+using Clock = std::chrono::high_resolution_clock;
+using duration_t = std::chrono::duration<double>;
+using std::chrono::nanoseconds;
+
+template<typename Ratio, typename T>
+T duration_cast(T seconds) {
+	return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
+}
+
+void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) {
+
+
+	std::cout << "Starting" << std::endl;
+	auto before = Clock::now();
+	barrier.wait(0);
+
+	while(true) {
+		usleep(100000);
+		auto now = Clock::now();
+		duration_t durr = now - before;
+		if( durr.count() > duration ) {
+			done = true;
+			break;
+		}
+		std::cout << "\r" << std::setprecision(4) << durr.count();
+		std::cout.flush();
+	}
+
+	barrier.wait(0);
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	duration = durr.count();
+	std::cout << "\rClosing down" << std::endl;
+}
+
+__attribute__((noinline)) void body(Random & rand) {
+	uint64_t mask = (uint64_t(rand.next()) << 32ul) | uint64_t(rand.next());
+	unsigned idx = rand.next();
+
+	unsigned bit = rand_bit(idx, mask);
+
+	if(__builtin_expect(((1ul << bit) & mask) == 0, false)) {
+		std::cerr << std::hex <<  "Rand " << idx << " from " << mask;
+		std::cerr << " gave " << (1ul << bit) << "(" << std::dec << bit << ")" << std::endl;
+		std::abort();
+	}
+}
+
+void runRandBit(double duration) {
+
+	std::atomic_bool done  = { false };
+	barrier_t barrier(2);
+
+	size_t count = 0;
+	std::thread thread([&done, &barrier, &count]() {
+
+		Random rand(22);
+
+		barrier.wait(1);
+
+		for(;!done; count++) {
+			body(rand);
+		}
+
+		barrier.wait(1);
+	});
+
+	waitfor(duration, barrier, done);
+	thread.join();
+
+	size_t ops = count;
+	size_t ops_sec = size_t(double(ops) / duration);
+	auto dur_nano = duration_cast<std::nano>(1.0);
+
+	std::cout << "Duration      : " << duration << "s\n";
+	std::cout << "ns/Op         : " << ( dur_nano / ops )<< "\n";
+	std::cout << "Ops/sec       : " << ops_sec << "\n";
+	std::cout << "Total ops     : " << ops << std::endl;
+
+}
+
+int main() {
+	std::cout.imbue(std::locale(""));
+	runRandBit(5);
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1141 @@
+#if !defined(LIST_VARIANT_HPP)
+#define LIST_VARIANT_HPP "relaxed_list.hpp"
+#endif
+
+#include LIST_VARIANT_HPP
+#if !defined(LIST_VARIANT)
+#error not variant selected
+#endif
+
+#include <array>
+#include <iomanip>
+#include <iostream>
+#include <locale>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+
+#include "utils.hpp"
+
+struct __attribute__((aligned(64))) Node {
+	static std::atomic_size_t creates;
+	static std::atomic_size_t destroys;
+
+	_LinksFields_t<Node> _links;
+
+	int value;
+	int id;
+
+	Node() { creates++; }
+	Node(int value): value(value) { creates++; }
+	~Node() { destroys++; }
+};
+
+std::atomic_size_t Node::creates  = { 0 };
+std::atomic_size_t Node::destroys = { 0 };
+
+bool enable_stats = false;
+
+template<>
+thread_local LIST_VARIANT<Node>::TLS LIST_VARIANT<Node>::tls = {};
+
+template<>
+std::atomic_uint32_t LIST_VARIANT<Node>::ticket = { 0 };
+
+#ifndef NO_STATS
+template<>
+LIST_VARIANT<Node>::GlobalStats LIST_VARIANT<Node>::global_stats = {};
+#endif
+
+// ================================================================================================
+//                        UTILS
+// ================================================================================================
+
+struct local_stat_t {
+	size_t in  = 0;
+	size_t out = 0;
+	size_t empty = 0;
+	size_t crc_in  = 0;
+	size_t crc_out = 0;
+	size_t valmax = 0;
+	size_t valmin = 100000000ul;
+	struct {
+		size_t val = 0;
+		size_t cnt = 0;
+	} comp;
+	struct {
+		size_t val = 0;
+		size_t cnt = 0;
+	} subm;
+};
+
+struct global_stat_t {
+	std::atomic_size_t in  = { 0 };
+	std::atomic_size_t out = { 0 };
+	std::atomic_size_t empty = { 0 };
+	std::atomic_size_t crc_in  = { 0 };
+	std::atomic_size_t crc_out = { 0 };
+	std::atomic_size_t valmax = { 0 };
+	std::atomic_size_t valmin = { 100000000ul };
+	struct {
+		std::atomic_size_t val = { 0 };
+		std::atomic_size_t cnt = { 0 };
+	} comp;
+	struct {
+		std::atomic_size_t val = { 0 };
+		std::atomic_size_t cnt = { 0 };
+	} subm;
+};
+
+void atomic_max(std::atomic_size_t & target, size_t value) {
+	for(;;) {
+		size_t expect = target.load(std::memory_order_relaxed);
+		if(value <= expect) return;
+		bool success = target.compare_exchange_strong(expect, value);
+		if(success) return;
+	}
+}
+
+void atomic_min(std::atomic_size_t & target, size_t value) {
+	for(;;) {
+		size_t expect = target.load(std::memory_order_relaxed);
+		if(value >= expect) return;
+		bool success = target.compare_exchange_strong(expect, value);
+		if(success) return;
+	}
+}
+
+void tally_stats(global_stat_t & global, local_stat_t & local) {
+
+	global.in    += local.in;
+	global.out   += local.out;
+	global.empty += local.empty;
+
+	global.crc_in  += local.crc_in;
+	global.crc_out += local.crc_out;
+
+	global.comp.val += local.comp.val;
+	global.comp.cnt += local.comp.cnt;
+	global.subm.val += local.subm.val;
+	global.subm.cnt += local.subm.cnt;
+
+	atomic_max(global.valmax, local.valmax);
+	atomic_min(global.valmin, local.valmin);
+
+	LIST_VARIANT<Node>::stats_tls_tally();
+}
+
+void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) {
+	std::cout << "Starting" << std::endl;
+	auto before = Clock::now();
+	barrier.wait(0);
+	bool is_tty = isatty(STDOUT_FILENO);
+
+	while(true) {
+		usleep(100000);
+		auto now = Clock::now();
+		duration_t durr = now - before;
+		if( durr.count() > duration ) {
+			done = true;
+			break;
+		}
+		if(is_tty) {
+			std::cout << "\r" << std::setprecision(4) << durr.count();
+			std::cout.flush();
+		}
+	}
+
+	barrier.wait(0);
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	duration = durr.count();
+	std::cout << "\rClosing down" << std::endl;
+}
+
+void waitfor(double & duration, barrier_t & barrier, const std::atomic_size_t & count) {
+	std::cout << "Starting" << std::endl;
+	auto before = Clock::now();
+	barrier.wait(0);
+
+	while(true) {
+		usleep(100000);
+		size_t c = count.load();
+		if( c == 0 ) {
+			break;
+		}
+		std::cout << "\r" << c;
+		std::cout.flush();
+	}
+
+	barrier.wait(0);
+	auto after = Clock::now();
+	duration_t durr = after - before;
+	duration = durr.count();
+	std::cout << "\rClosing down" << std::endl;
+}
+
+void print_stats(double duration, unsigned nthread, global_stat_t & global) {
+	assert(Node::creates == Node::destroys);
+	assert(global.crc_in == global.crc_out);
+
+	std::cout << "Done" << std::endl;
+
+	size_t ops = global.in + global.out;
+	size_t ops_sec = size_t(double(ops) / duration);
+	size_t ops_thread = ops_sec / nthread;
+	auto dur_nano = duration_cast<std::nano>(1.0);
+
+	if(global.valmax != 0) {
+		std::cout << "Max runs      : " << global.valmax << "\n";
+		std::cout << "Min runs      : " << global.valmin << "\n";
+	}
+	if(global.comp.cnt != 0) {
+		std::cout << "Submit count  : " << global.subm.cnt << "\n";
+		std::cout << "Submit average: " << ((double(global.subm.val)) / global.subm.cnt) << "\n";
+		std::cout << "Complete count: " << global.comp.cnt << "\n";
+		std::cout << "Complete avg  : " << ((double(global.comp.val)) / global.comp.cnt) << "\n";
+	}
+	std::cout << "Duration      : " << duration << "s\n";
+	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
+	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
+	std::cout << "Ops/sec       : " << ops_sec << "\n";
+	std::cout << "Total ops     : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n";
+	#ifndef NO_STATS
+		LIST_VARIANT<Node>::stats_print(std::cout);
+	#endif
+}
+
+void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output);
+
+// ================================================================================================
+//                        EXPERIMENTS
+// ================================================================================================
+
+// ================================================================================================
+__attribute__((noinline)) void runChurn_body(
+	std::atomic<bool>& done,
+	Random & rand,
+	Node * my_nodes[],
+	unsigned nslots,
+	local_stat_t & local,
+	LIST_VARIANT<Node> & list
+) {
+	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
+		int idx = rand.next() % nslots;
+		if (auto node = my_nodes[idx]) {
+			local.crc_in += node->value;
+			list.push(node);
+			my_nodes[idx] = nullptr;
+			local.in++;
+		}
+		else if(auto node = list.pop()) {
+			local.crc_out += node->value;
+			my_nodes[idx] = node;
+			local.out++;
+		}
+		else {
+			local.empty++;
+		}
+	}
+}
+
+void runChurn(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes, const unsigned nslots) {
+	std::cout << "Churn Benchmark" << std::endl;
+	assert(nnodes <= nslots);
+	// List being tested
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	global_stat_t global;
+
+	// Flag to signal termination
+	std::atomic_bool done  = { false };
+
+	// Prep nodes
+	std::cout << "Initializing ";
+	size_t npushed = 0;
+	LIST_VARIANT<Node> list = { nthread, nqueues };
+	{
+		Node** all_nodes[nthread];
+		for(auto & nodes : all_nodes) {
+			nodes = new __attribute__((aligned(64))) Node*[nslots + 8];
+			Random rand(rdtscl());
+			for(unsigned i = 0; i < nnodes; i++) {
+				nodes[i] = new Node(rand.next() % 100);
+			}
+
+			for(unsigned i = nnodes; i < nslots; i++) {
+				nodes[i] = nullptr;
+			}
+
+			for(int i = 0; i < 10 && i < (int)nslots; i++) {
+				int idx = rand.next() % nslots;
+				if (auto node = nodes[idx]) {
+					global.crc_in += node->value;
+					list.push(node);
+					npushed++;
+					nodes[idx] = nullptr;
+				}
+			}
+		}
+
+		std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl;
+
+		enable_stats = true;
+
+		std::thread * threads[nthread];
+		unsigned i = 1;
+		for(auto & t : threads) {
+			auto & my_nodes = all_nodes[i - 1];
+			t = new std::thread([&done, &list, &barrier, &global, &my_nodes, nslots](unsigned tid) {
+				Random rand(tid + rdtscl());
+
+				local_stat_t local;
+
+				// affinity(tid);
+
+				barrier.wait(tid);
+
+				// EXPERIMENT START
+
+				runChurn_body(done, rand, my_nodes, nslots, local, list);
+
+				// EXPERIMENT END
+
+				barrier.wait(tid);
+
+				tally_stats(global, local);
+
+				for(unsigned i = 0; i < nslots; i++) {
+					delete my_nodes[i];
+				}
+			}, i++);
+		}
+
+		waitfor(duration, barrier, done);
+
+		for(auto t : threads) {
+			t->join();
+			delete t;
+		}
+
+		enable_stats = false;
+
+		while(auto node = list.pop()) {
+			global.crc_out += node->value;
+			delete node;
+		}
+
+		for(auto nodes : all_nodes) {
+			delete[] nodes;
+		}
+	}
+
+	print_stats(duration, nthread, global);
+}
+
+// ================================================================================================
+__attribute__((noinline)) void runPingPong_body(
+	std::atomic<bool>& done,
+	Node initial_nodes[],
+	unsigned nnodes,
+	local_stat_t & local,
+	LIST_VARIANT<Node> & list
+) {
+	Node * nodes[nnodes];
+	{
+		unsigned i = 0;
+		for(auto & n : nodes) {
+			n = &initial_nodes[i++];
+		}
+	}
+
+	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
+
+		for(Node * & node : nodes) {
+			local.crc_in += node->value;
+			list.push(node);
+			local.in++;
+		}
+
+		// -----
+
+		for(Node * & node : nodes) {
+			node = list.pop();
+			assert(node);
+			local.crc_out += node->value;
+			local.out++;
+		}
+	}
+}
+
+void runPingPong(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) {
+	std::cout << "PingPong Benchmark" << std::endl;
+
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	global_stat_t global;
+
+	// Flag to signal termination
+	std::atomic_bool done  = { false };
+
+	std::cout << "Initializing ";
+	// List being tested
+	LIST_VARIANT<Node> list = { nthread, nqueues };
+	{
+		enable_stats = true;
+
+		std::thread * threads[nthread];
+		unsigned i = 1;
+		for(auto & t : threads) {
+			t = new std::thread([&done, &list, &barrier, &global, nnodes](unsigned tid) {
+				Random rand(tid + rdtscl());
+
+				Node nodes[nnodes];
+				for(auto & n : nodes) {
+					n.value = (int)rand.next() % 100;
+				}
+
+				local_stat_t local;
+
+				// affinity(tid);
+
+				barrier.wait(tid);
+
+				// EXPERIMENT START
+
+				runPingPong_body(done, nodes, nnodes, local, list);
+
+				// EXPERIMENT END
+
+				barrier.wait(tid);
+
+				tally_stats(global, local);
+			}, i++);
+		}
+
+		waitfor(duration, barrier, done);
+
+		for(auto t : threads) {
+			t->join();
+			delete t;
+		}
+
+		enable_stats = false;
+	}
+
+	print_stats(duration, nthread, global);
+}
+
+// ================================================================================================
+struct __attribute__((aligned(64))) Slot {
+	Node * volatile node;
+};
+
+__attribute__((noinline)) void runProducer_body(
+	std::atomic<bool>& done,
+	Random & rand,
+	Slot * slots,
+	int nslots,
+	local_stat_t & local,
+	LIST_VARIANT<Node> & list
+) {
+	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
+
+		Node * node = list.pop();
+		if(!node) {
+			local.empty ++;
+			continue;
+		}
+
+		local.crc_out += node->value;
+		local.out++;
+
+		if(node->id == 0) {
+			unsigned cnt = 0;
+			for(int i = 0; i < nslots; i++) {
+				Node * found = __atomic_exchange_n( &slots[i].node, nullptr, __ATOMIC_SEQ_CST );
+				if( found ) {
+					local.crc_in += found->value;
+					local.in++;
+					cnt++;
+					list.push( found );
+				}
+			}
+
+			local.crc_in += node->value;
+			local.in++;
+			list.push( node );
+
+			local.comp.cnt++;
+			local.comp.val += cnt;
+		}
+		else {
+			unsigned len = 0;
+			while(true) {
+				auto off = rand.next();
+				for(int i = 0; i < nslots; i++) {
+					Node * expected = nullptr;
+					int idx = (i + off) % nslots;
+					Slot & slot = slots[ idx ];
+					if(
+						slot.node == nullptr &&
+						__atomic_compare_exchange_n( &slot.node, &expected, node, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST )
+					) {
+						local.subm.cnt++;
+						local.subm.val += len;
+						goto LOOP;
+					}
+					assert( expected != node );
+					len++;
+				}
+			}
+		}
+
+		LOOP:;
+	}
+}
+
+void runProducer(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) {
+	std::cout << "Producer Benchmark" << std::endl;
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	global_stat_t global;
+
+	// Flag to signal termination
+	std::atomic_bool done  = { false };
+
+	std::cout << "Initializing ";
+
+	int nslots = nnodes * 4;
+	Slot * slots = new Slot[nslots];
+	std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl;
+
+	// List being tested
+	LIST_VARIANT<Node> list = { nthread, nqueues };
+	{
+		Random rand(rdtscl());
+		for(unsigned i = 0; i < nnodes; i++) {
+			Node * node = new Node(rand.next() % 100);
+			node->id = i;
+			global.crc_in += node->value;
+			list.push(node);
+		}
+
+		for(int i = 0; i < nslots; i++) {
+			slots[i].node = nullptr;
+		}
+	}
+
+	{
+		enable_stats = true;
+
+		std::thread * threads[nthread];
+		unsigned i = 1;
+		for(auto & t : threads) {
+			t = new std::thread([&done, &list, &barrier, &global, slots, nslots](unsigned tid) {
+				Random rand(tid + rdtscl());
+
+				local_stat_t local;
+				barrier.wait(tid);
+
+				// EXPERIMENT START
+
+				runProducer_body(done, rand, slots, nslots, local, list);
+
+				// EXPERIMENT END
+
+				barrier.wait(tid);
+
+				tally_stats(global, local);
+			}, i++);
+		}
+
+		waitfor(duration, barrier, done);
+
+		for(auto t : threads) {
+			t->join();
+			delete t;
+		}
+
+		enable_stats = false;
+	}
+
+	{
+		while(Node * node = list.pop()) {
+			global.crc_out += node->value;
+			delete node;
+		}
+
+		for(int i = 0; i < nslots; i++) {
+			delete slots[i].node;
+		}
+
+		delete [] slots;
+	}
+
+	print_stats(duration, nthread, global);
+}
+
+// ================================================================================================
+__attribute__((noinline)) void runFairness_body(
+	unsigned tid,
+	size_t width,
+	size_t length,
+	int output[],
+	std::atomic_size_t & count,
+	Node initial_nodes[],
+	unsigned nnodes,
+	local_stat_t & local,
+	LIST_VARIANT<Node> & list
+) {
+	Node * nodes[nnodes];
+	{
+		unsigned i = 0;
+		for(auto & n : nodes) {
+			n = &initial_nodes[i++];
+		}
+	}
+
+	while(__builtin_expect(0 != count.load(std::memory_order_relaxed), true)) {
+
+		for(Node * & node : nodes) {
+			local.crc_in += node->id;
+			list.push(node);
+			local.in++;
+		}
+
+		// -----
+
+		for(Node * & node : nodes) {
+			node = list.pop();
+			assert(node);
+
+			if (unsigned(node->value) < length) {
+				size_t idx = (node->value * width) + node->id;
+				assert(idx < (width * length));
+				output[idx] = tid;
+			}
+
+			node->value++;
+			if(unsigned(node->value) == length) count--;
+
+			local.crc_out += node->id;
+			local.out++;
+		}
+	}
+}
+
+void runFairness(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes, const std::string & output) {
+	std::cout << "Fairness Benchmark, outputing to : " << output << std::endl;
+
+	// Barrier for synchronization
+	barrier_t barrier(nthread + 1);
+
+	// Data to check everything is OK
+	global_stat_t global;
+
+	std::cout << "Initializing ";
+
+	// Check fairness by creating a png of where the threads ran
+	size_t width = nthread * nnodes;
+	size_t length = 100000;
+
+	std::unique_ptr<int[]> data_out { new int[width * length] };
+
+	// Flag to signal termination
+	std::atomic_size_t count = width;
+
+	// List being tested
+	LIST_VARIANT<Node> list = { nthread, nqueues };
+	{
+		enable_stats = true;
+
+		std::thread * threads[nthread];
+		unsigned i = 1;
+		for(auto & t : threads) {
+			t = new std::thread([&count, &list, &barrier, &global, nnodes, width, length, data_out = data_out.get()](unsigned tid) {
+				unsigned int start = (tid - 1) * nnodes;
+				Node nodes[nnodes];
+				for(auto & n : nodes) {
+					n.id = start;
+					n.value = 0;
+					start++;
+				}
+
+				local_stat_t local;
+
+				// affinity(tid);
+
+				barrier.wait(tid);
+
+				// EXPERIMENT START
+
+				runFairness_body(tid, width, length, data_out, count, nodes, nnodes, local, list);
+
+				// EXPERIMENT END
+
+				barrier.wait(tid);
+
+				for(const auto & n : nodes) {
+					local.valmax = max(local.valmax, size_t(n.value));
+					local.valmin = min(local.valmin, size_t(n.value));
+				}
+
+				tally_stats(global, local);
+			}, i++);
+		}
+
+		waitfor(duration, barrier, count);
+
+		for(auto t : threads) {
+			t->join();
+			delete t;
+		}
+
+		enable_stats = false;
+	}
+
+	print_stats(duration, nthread, global);
+
+	// save_fairness(data_out.get(), 100, nthread, width, length, output);
+}
+
+// ================================================================================================
+
+bool iequals(const std::string& a, const std::string& b)
+{
+    return std::equal(a.begin(), a.end(),
+                      b.begin(), b.end(),
+                      [](char a, char b) {
+                          return std::tolower(a) == std::tolower(b);
+                      });
+}
+
+int main(int argc, char * argv[]) {
+
+	double duration   = 5.0;
+	unsigned nthreads = 2;
+	unsigned nqueues  = 4;
+	unsigned nnodes   = 100;
+	unsigned nslots   = 100;
+	std::string out   = "fairness.png";
+
+	enum {
+		Churn,
+		PingPong,
+		Producer,
+		Fairness,
+		NONE
+	} benchmark = NONE;
+
+	std::cout.imbue(std::locale(""));
+
+	for(;;) {
+		static struct option options[] = {
+			{"duration",  required_argument, 0, 'd'},
+			{"nthreads",  required_argument, 0, 't'},
+			{"nqueues",   required_argument, 0, 'q'},
+			{"benchmark", required_argument, 0, 'b'},
+			{0, 0, 0, 0}
+		};
+
+		int idx = 0;
+		int opt = getopt_long(argc, argv, "d:t:q:b:", options, &idx);
+
+		std::string arg = optarg ? optarg : "";
+		size_t len = 0;
+		switch(opt) {
+			// Exit Case
+			case -1:
+				/* paranoid */ assert(optind <= argc);
+				switch(benchmark) {
+				case NONE:
+					std::cerr << "Must specify a benchmark" << std::endl;
+					goto usage;
+				case PingPong:
+					nnodes = 1;
+					switch(argc - optind) {
+					case 0: break;
+					case 1:
+						try {
+							arg = optarg = argv[optind];
+							nnodes = stoul(optarg, &len);
+							if(len != arg.size()) { throw std::invalid_argument(""); }
+						} catch(std::invalid_argument &) {
+							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
+							goto usage;
+						}
+						break;
+					default:
+						std::cerr << "'PingPong' benchmark doesn't accept more than 1 extra arguments" << std::endl;
+						goto usage;
+					}
+					break;
+				case Producer:
+					nnodes = 32;
+					switch(argc - optind) {
+					case 0: break;
+					case 1:
+						try {
+							arg = optarg = argv[optind];
+							nnodes = stoul(optarg, &len);
+							if(len != arg.size()) { throw std::invalid_argument(""); }
+						} catch(std::invalid_argument &) {
+							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
+							goto usage;
+						}
+						break;
+					default:
+						std::cerr << "'Producer' benchmark doesn't accept more than 1 extra arguments" << std::endl;
+						goto usage;
+					}
+					break;
+				case Churn:
+					nnodes = 100;
+					nslots = 100;
+					switch(argc - optind) {
+					case 0: break;
+					case 1:
+						try {
+							arg = optarg = argv[optind];
+							nnodes = stoul(optarg, &len);
+							if(len != arg.size()) { throw std::invalid_argument(""); }
+							nslots = nnodes;
+						} catch(std::invalid_argument &) {
+							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
+							goto usage;
+						}
+						break;
+					case 2:
+						try {
+							arg = optarg = argv[optind];
+							nnodes = stoul(optarg, &len);
+							if(len != arg.size()) { throw std::invalid_argument(""); }
+						} catch(std::invalid_argument &) {
+							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
+							goto usage;
+						}
+						try {
+							arg = optarg = argv[optind + 1];
+							nslots = stoul(optarg, &len);
+							if(len != arg.size()) { throw std::invalid_argument(""); }
+						} catch(std::invalid_argument &) {
+							std::cerr << "Number of slots must be a positive integer, was " << arg << std::endl;
+							goto usage;
+						}
+						break;
+					default:
+						std::cerr << "'Churn' benchmark doesn't accept more than 2 extra arguments" << std::endl;
+						goto usage;
+					}
+					break;
+				case Fairness:
+					nnodes = 1;
+					switch(argc - optind) {
+					case 0: break;
+					case 1:
+						arg = optarg = argv[optind];
+						out = arg;
+						break;
+					default:
+						std::cerr << "'Churn' benchmark doesn't accept more than 2 extra arguments" << std::endl;
+						goto usage;
+					}
+				}
+				goto run;
+			// Benchmarks
+			case 'b':
+				if(benchmark != NONE) {
+					std::cerr << "Only when benchmark can be run" << std::endl;
+					goto usage;
+				}
+				if(iequals(arg, "churn")) {
+					benchmark = Churn;
+					break;
+				}
+				if(iequals(arg, "pingpong")) {
+					benchmark = PingPong;
+					break;
+				}
+				if(iequals(arg, "producer")) {
+					benchmark = Producer;
+					break;
+				}
+				if(iequals(arg, "fairness")) {
+					benchmark = Fairness;
+					break;
+				}
+				std::cerr << "Unkown benchmark " << arg << std::endl;
+				goto usage;
+			// Numeric Arguments
+			case 'd':
+				try {
+					duration = stod(optarg, &len);
+					if(len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Duration must be a valid double, was " << arg << std::endl;
+					goto usage;
+				}
+				break;
+			case 't':
+				try {
+					nthreads = stoul(optarg, &len);
+					if(len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Number of threads must be a positive integer, was " << arg << std::endl;
+					goto usage;
+				}
+				break;
+			case 'q':
+				try {
+					nqueues = stoul(optarg, &len);
+					if(len != arg.size()) { throw std::invalid_argument(""); }
+				} catch(std::invalid_argument &) {
+					std::cerr << "Number of queues must be a positive integer, was " << arg << std::endl;
+					goto usage;
+				}
+				break;
+			// Other cases
+			default: /* ? */
+				std::cerr << opt << std::endl;
+			usage:
+				std::cerr << "Usage: " << argv[0] << ": [options] -b churn [NNODES] [NSLOTS = NNODES]" << std::endl;
+				std::cerr << "  or:  " << argv[0] << ": [options] -b pingpong [NNODES]" << std::endl;
+				std::cerr << "  or:  " << argv[0] << ": [options] -b producer [NNODES]" << std::endl;
+				std::cerr << std::endl;
+				std::cerr << "  -d, --duration=DURATION  Duration of the experiment, in seconds" << std::endl;
+				std::cerr << "  -t, --nthreads=NTHREADS  Number of kernel threads" << std::endl;
+				std::cerr << "  -q, --nqueues=NQUEUES    Number of queues per threads" << std::endl;
+				std::exit(1);
+		}
+	}
+	run:
+
+	check_cache_line_size();
+
+	std::cout << "Running " << nthreads << " threads (" << (nthreads * nqueues) << " queues) for " << duration << " seconds" << std::endl;
+	std::cout << "Relaxed list variant: " << LIST_VARIANT<Node>::name() << std::endl;
+	switch(benchmark) {
+		case Churn:
+			runChurn(nthreads, nqueues, duration, nnodes, nslots);
+			break;
+		case PingPong:
+			runPingPong(nthreads, nqueues, duration, nnodes);
+			break;
+		case Producer:
+			runProducer(nthreads, nqueues, duration, nnodes);
+			break;
+		case Fairness:
+			runFairness(nthreads, nqueues, duration, nnodes, out);
+			break;
+		default:
+			abort();
+	}
+	return 0;
+}
+
+const char * __my_progname = "Relaxed List";
+
+struct rgb_t {
+    double r;       // a fraction between 0 and 1
+    double g;       // a fraction between 0 and 1
+    double b;       // a fraction between 0 and 1
+};
+
+struct hsv_t {
+    double h;       // angle in degrees
+    double s;       // a fraction between 0 and 1
+    double v;       // a fraction between 0 and 1
+};
+
+rgb_t hsv2rgb(hsv_t in) {
+	double hh, p, q, t, ff;
+	long   i;
+	rgb_t  out;
+
+	if(in.s <= 0.0) {       // < is bogus, just shuts up warnings
+		out.r = in.v;
+		out.g = in.v;
+		out.b = in.v;
+		return out;
+	}
+	hh = in.h;
+	if(hh >= 360.0) hh = 0.0;
+	hh /= 60.0;
+	i = (long)hh;
+	ff = hh - i;
+	p = in.v * (1.0 - in.s);
+	q = in.v * (1.0 - (in.s * ff));
+	t = in.v * (1.0 - (in.s * (1.0 - ff)));
+
+	switch(i) {
+	case 0:
+		out.r = in.v;
+		out.g = t;
+		out.b = p;
+		break;
+	case 1:
+		out.r = q;
+		out.g = in.v;
+		out.b = p;
+		break;
+	case 2:
+		out.r = p;
+		out.g = in.v;
+		out.b = t;
+		break;
+
+	case 3:
+		out.r = p;
+		out.g = q;
+		out.b = in.v;
+		break;
+	case 4:
+		out.r = t;
+		out.g = p;
+		out.b = in.v;
+		break;
+	case 5:
+	default:
+		out.r = in.v;
+		out.g = p;
+		out.b = q;
+		break;
+	}
+	return out;
+}
+
+// void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {
+// 	std::ofstream os(output);
+// 	os << "<html>\n";
+// 	os << "<head>\n";
+// 	os << "<style>\n";
+// 	os << "</style>\n";
+// 	os << "</head>\n";
+// 	os << "<body>\n";
+// 	os << "<table style=\"width=100%\">\n";
+
+// 	size_t idx = 0;
+// 	for(size_t r = 0ul; r < rows; r++) {
+// 		os << "<tr>\n";
+// 		for(size_t c = 0ul; c < columns; c++) {
+// 			os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";
+// 			idx++;
+// 		}
+// 		os << "</tr>\n";
+// 	}
+
+// 	os << "</table>\n";
+// 	os << "</body>\n";
+// 	os << "</html>\n";
+// 	os << std::endl;
+// }
+
+// #include <png.h>
+// #include <setjmp.h>
+
+/*
+void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {
+	int width  = columns * factor;
+	int height = rows / factor;
+
+	int code = 0;
+	int idx = 0;
+	FILE *fp = NULL;
+	png_structp png_ptr = NULL;
+	png_infop info_ptr = NULL;
+	png_bytep row = NULL;
+
+	// Open file for writing (binary mode)
+	fp = fopen(output.c_str(), "wb");
+	if (fp == NULL) {
+		fprintf(stderr, "Could not open file %s for writing\n", output.c_str());
+		code = 1;
+		goto finalise;
+	}
+
+	   // Initialize write structure
+	png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+	if (png_ptr == NULL) {
+		fprintf(stderr, "Could not allocate write struct\n");
+		code = 1;
+		goto finalise;
+	}
+
+	// Initialize info structure
+	info_ptr = png_create_info_struct(png_ptr);
+	if (info_ptr == NULL) {
+		fprintf(stderr, "Could not allocate info struct\n");
+		code = 1;
+		goto finalise;
+	}
+
+	// Setup Exception handling
+	if (setjmp(png_jmpbuf(png_ptr))) {
+		fprintf(stderr, "Error during png creation\n");
+		code = 1;
+		goto finalise;
+	}
+
+	png_init_io(png_ptr, fp);
+
+	// Write header (8 bit colour depth)
+	png_set_IHDR(png_ptr, info_ptr, width, height,
+		8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,
+		PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+	png_write_info(png_ptr, info_ptr);
+
+	// Allocate memory for one row (3 bytes per pixel - RGB)
+	row = (png_bytep) malloc(3 * width * sizeof(png_byte));
+
+	// Write image data
+	int x, y;
+	for (y=0 ; y<height ; y++) {
+		for (x=0 ; x<width ; x++) {
+			auto & r = row[(x * 3) + 0];
+			auto & g = row[(x * 3) + 1];
+			auto & b = row[(x * 3) + 2];
+			assert(idx < (rows * columns));
+			int color = data[idx] - 1;
+			assert(color < nthreads);
+			assert(color >= 0);
+			idx++;
+
+			double angle = double(color) / double(nthreads);
+
+			auto c = hsv2rgb({ 360.0 * angle, 0.8, 0.8 });
+
+			r = char(c.r * 255.0);
+			g = char(c.g * 255.0);
+			b = char(c.b * 255.0);
+
+		}
+		png_write_row(png_ptr, row);
+	}
+
+	assert(idx == (rows * columns));
+
+	// End write
+	png_write_end(png_ptr, NULL);
+
+	finalise:
+	if (fp != NULL) fclose(fp);
+	if (info_ptr != NULL) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
+	if (png_ptr != NULL) png_destroy_write_struct(&png_ptr, (png_infopp)NULL);
+	if (row != NULL) free(row);
+}
+*/
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,555 @@
+#pragma once
+#define LIST_VARIANT relaxed_list
+
+#define VANILLA 0
+#define SNZI 1
+#define BITMASK 2
+#define DISCOVER 3
+#define SNZM 4
+#define BIAS 5
+#define BACK 6
+#define BACKBIAS 7
+
+#ifndef VARIANT
+#define VARIANT VANILLA
+#endif
+
+#ifndef NO_STATS
+#include <iostream>
+#endif
+
+#include <cmath>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <thread>
+#include <type_traits>
+
+#include "assert.hpp"
+#include "utils.hpp"
+#include "links.hpp"
+#include "snzi.hpp"
+#include "snzi-packed.hpp"
+#include "snzm.hpp"
+
+using namespace std;
+
+struct pick_stat {
+	struct {
+		size_t attempt = 0;
+		size_t success = 0;
+		size_t local = 0;
+	} push;
+	struct {
+		size_t attempt = 0;
+		size_t success = 0;
+		size_t mask_attempt = 0;
+		size_t mask_reset = 0;
+		size_t local = 0;
+	} pop;
+};
+
+struct empty_stat {
+	struct {
+		size_t value = 0;
+		size_t count = 0;
+	} push;
+	struct {
+		size_t value = 0;
+		size_t count = 0;
+	} pop;
+};
+
+template<typename node_t>
+class __attribute__((aligned(128))) relaxed_list {
+	static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
+
+public:
+	static const char * name() {
+		const char * names[] = {
+			"RELAXED: VANILLA",
+			"RELAXED: SNZI",
+			"RELAXED: BITMASK",
+			"RELAXED: SNZI + DISCOVERED MASK",
+			"RELAXED: SNZI + MASK",
+			"RELAXED: SNZI + LOCAL BIAS",
+			"RELAXED: SNZI + REVERSE RNG",
+			"RELAXED: SNZI + LOCAL BIAS + REVERSE RNG"
+		};
+		return names[VARIANT];
+	}
+
+	relaxed_list(unsigned numThreads, unsigned numQueues)
+		: numLists(numThreads * numQueues)
+	  	, lists(new intrusive_queue_t<node_t>[numLists])
+		#if VARIANT == SNZI || VARIANT == BACK
+			, snzi( std::log2( numLists / (2 * numQueues) ), 2 )
+		#elif VARIANT == BIAS || VARIANT == BACKBIAS
+			#ifdef SNZI_PACKED
+				, snzi( std::ceil( std::log2(numLists) ) )
+			#else
+				, snzi( std::log2( numLists / (2 * numQueues) ), 2 )
+			#endif
+		#elif VARIANT == SNZM || VARIANT == DISCOVER
+			, snzm( numLists )
+		#endif
+	{
+		assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
+		std::cout << "Constructing Relaxed List with " << numLists << std::endl;
+	}
+
+	~relaxed_list() {
+		std::cout << "Destroying Relaxed List" << std::endl;
+		lists.reset();
+	}
+
+    	__attribute__((noinline, hot)) void push(node_t * node) {
+		node->_links.ts = rdtscl();
+
+		while(true) {
+			// Pick a random list
+			unsigned i = idx_from_r(tls.rng1.next(), VARIANT == BIAS || VARIANT == BACKBIAS);
+
+			#ifndef NO_STATS
+				tls.pick.push.attempt++;
+			#endif
+
+			// If we can't lock it retry
+			if( !lists[i].lock.try_lock() ) continue;
+
+			#if VARIANT == VANILLA || VARIANT == BITMASK
+				__attribute__((unused)) int num = numNonEmpty;
+			#endif
+
+			// Actually push it
+			if(lists[i].push(node)) {
+				#if VARIANT == DISCOVER
+					size_t qword = i >> 6ull;
+					size_t bit   = i & 63ull;
+					assert(qword == 0);
+					bts(tls.mask, bit);
+					snzm.arrive(i);
+				#elif VARIANT == SNZI || VARIANT == BIAS
+					snzi.arrive(i);
+				#elif VARIANT == BACK || VARIANT == BACKBIAS
+					snzi.arrive(i);
+					tls.rng2.set_raw_state( tls.rng1.get_raw_state());
+				#elif VARIANT == SNZM
+					snzm.arrive(i);
+				#elif VARIANT == BITMASK
+					numNonEmpty++;
+					size_t qword = i >> 6ull;
+					size_t bit   = i & 63ull;
+					assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
+					__attribute__((unused)) bool ret = bts(list_mask[qword], bit);
+					assert(!ret);
+					assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
+				#else
+					numNonEmpty++;
+				#endif
+			}
+			#if VARIANT == VANILLA || VARIANT == BITMASK
+				assert(numNonEmpty <= (int)numLists);
+			#endif
+
+			// Unlock and return
+			lists[i].lock.unlock();
+
+			#ifndef NO_STATS
+				tls.pick.push.success++;
+				#if VARIANT == VANILLA || VARIANT == BITMASK
+					tls.empty.push.value += num;
+					tls.empty.push.count += 1;
+				#endif
+			#endif
+			return;
+		}
+    	}
+
+	__attribute__((noinline, hot)) node_t * pop() {
+		#if VARIANT == DISCOVER
+			assert(numLists <= 64);
+			while(snzm.query()) {
+				tls.pick.pop.mask_attempt++;
+				unsigned i, j;
+				{
+					// Pick first list totally randomly
+					i = tls.rng1.next() % numLists;
+
+					// Pick the other according to the bitmask
+					unsigned r = tls.rng1.next();
+
+					size_t mask = tls.mask.load(std::memory_order_relaxed);
+					if(mask == 0) {
+						tls.pick.pop.mask_reset++;
+						mask = (1U << numLists) - 1;
+						tls.mask.store(mask, std::memory_order_relaxed);
+					}
+
+					unsigned b = rand_bit(r, mask);
+
+					assertf(b < 64, "%zu %u", mask, b);
+
+					j = b;
+
+					assert(j < numLists);
+				}
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+		#elif VARIANT == SNZI
+			while(snzi.query()) {
+				// Pick two lists at random
+				int i = tls.rng1.next() % numLists;
+				int j = tls.rng1.next() % numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+
+		#elif VARIANT == BACK
+			while(snzi.query()) {
+				// Pick two lists at random
+				int i = tls.rng2.prev() % numLists;
+				int j = tls.rng2.prev() % numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+
+		#elif VARIANT == BACKBIAS
+			while(snzi.query()) {
+				// Pick two lists at random
+				int i = idx_from_r(tls.rng2.prev(), true);
+				int j = idx_from_r(tls.rng2.prev(), true);
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+
+		#elif VARIANT == BIAS
+			while(snzi.query()) {
+				// Pick two lists at random
+				unsigned ri = tls.rng1.next();
+				unsigned i;
+				unsigned j = tls.rng1.next();
+				if(0 == (ri & 0xF)) {
+					i = (ri >> 4) % numLists;
+				} else {
+					i = tls.my_queue + ((ri >> 4) % 4);
+					j = tls.my_queue + ((j >> 4) % 4);
+					tls.pick.pop.local++;
+				}
+				i %= numLists;
+				j %= numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+		#elif VARIANT == SNZM
+			//*
+			while(snzm.query()) {
+				tls.pick.pop.mask_attempt++;
+				unsigned i, j;
+				{
+					// Pick two random number
+					unsigned ri = tls.rng1.next();
+					unsigned rj = tls.rng1.next();
+
+					// Pick two nodes from it
+					unsigned wdxi = ri & snzm.mask;
+					// unsigned wdxj = rj & snzm.mask;
+
+					// Get the masks from the nodes
+					// size_t maski = snzm.masks(wdxi);
+					size_t maskj = snzm.masks(wdxj);
+
+					if(maski == 0 && maskj == 0) continue;
+
+					#if defined(__BMI2__)
+						uint64_t idxsi = _pext_u64(snzm.indexes, maski);
+						// uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
+
+						auto pi = __builtin_popcountll(maski);
+						// auto pj = __builtin_popcountll(maskj);
+
+						ri = pi ? ri & ((pi >> 3) - 1) : 0;
+						rj = pj ? rj & ((pj >> 3) - 1) : 0;
+
+						unsigned bi = (idxsi >> (ri << 3)) & 0xff;
+						unsigned bj = (idxsj >> (rj << 3)) & 0xff;
+					#else
+						unsigned bi = rand_bit(ri >> snzm.depth, maski);
+						unsigned bj = rand_bit(rj >> snzm.depth, maskj);
+					#endif
+
+					i = (bi << snzm.depth) | wdxi;
+					j = (bj << snzm.depth) | wdxj;
+
+					/* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi);
+					/* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj);
+				}
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+			/*/
+			while(snzm.query()) {
+				// Pick two lists at random
+				int i = tls.rng1.next() % numLists;
+				int j = tls.rng1.next() % numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+			//*/
+		#elif VARIANT == BITMASK
+			int nnempty;
+			while(0 != (nnempty = numNonEmpty)) {
+				tls.pick.pop.mask_attempt++;
+				unsigned i, j;
+				{
+					// Pick two lists at random
+					unsigned num = ((numLists - 1) >> 6) + 1;
+
+					unsigned ri = tls.rng1.next();
+					unsigned rj = tls.rng1.next();
+
+					unsigned wdxi = (ri >> 6u) % num;
+					unsigned wdxj = (rj >> 6u) % num;
+
+					size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
+					size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
+
+					if(maski == 0 && maskj == 0) continue;
+
+					unsigned bi = rand_bit(ri, maski);
+					unsigned bj = rand_bit(rj, maskj);
+
+					assertf(bi < 64, "%zu %u", maski, bi);
+					assertf(bj < 64, "%zu %u", maskj, bj);
+
+					i = bi | (wdxi << 6);
+					j = bj | (wdxj << 6);
+
+					assertf(i < numLists, "%u", wdxi << 6);
+					assertf(j < numLists, "%u", wdxj << 6);
+				}
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+		#else
+			while(numNonEmpty != 0) {
+				// Pick two lists at random
+				int i = tls.rng1.next() % numLists;
+				int j = tls.rng1.next() % numLists;
+
+				if(auto node = try_pop(i, j)) return node;
+			}
+		#endif
+
+		return nullptr;
+    	}
+
+private:
+	node_t * try_pop(unsigned i, unsigned j) {
+		#ifndef NO_STATS
+			tls.pick.pop.attempt++;
+		#endif
+
+		#if VARIANT == DISCOVER
+			if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
+			if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
+		#endif
+
+		// Pick the bet list
+		int w = i;
+		if( __builtin_expect(lists[j].ts() != 0, true) ) {
+			w = (lists[i].ts() < lists[j].ts()) ? i : j;
+		}
+
+		auto & list = lists[w];
+		// If list looks empty retry
+		if( list.ts() == 0 ) return nullptr;
+
+		// If we can't get the lock retry
+		if( !list.lock.try_lock() ) return nullptr;
+
+		#if VARIANT == VANILLA || VARIANT == BITMASK
+			__attribute__((unused)) int num = numNonEmpty;
+		#endif
+
+		// If list is empty, unlock and retry
+		if( list.ts() == 0 ) {
+			list.lock.unlock();
+			return nullptr;
+		}
+
+		// Actually pop the list
+		node_t * node;
+		bool emptied;
+		std::tie(node, emptied) = list.pop();
+		assert(node);
+
+		if(emptied) {
+			#if VARIANT == DISCOVER
+				size_t qword = w >> 6ull;
+				size_t bit   = w & 63ull;
+				assert(qword == 0);
+				__attribute__((unused)) bool ret = btr(tls.mask, bit);
+				snzm.depart(w);
+			#elif VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
+				snzi.depart(w);
+			#elif VARIANT == SNZM
+				snzm.depart(w);
+			#elif VARIANT == BITMASK
+				numNonEmpty--;
+				size_t qword = w >> 6ull;
+				size_t bit   = w & 63ull;
+				assert((list_mask[qword] & (1ul << bit)) != 0);
+				__attribute__((unused)) bool ret = btr(list_mask[qword], bit);
+				assert(ret);
+				assert((list_mask[qword] & (1ul << bit)) == 0);
+			#else
+				numNonEmpty--;
+			#endif
+		}
+
+		// Unlock and return
+		list.lock.unlock();
+		#if VARIANT == VANILLA || VARIANT == BITMASK
+			assert(numNonEmpty >= 0);
+		#endif
+		#ifndef NO_STATS
+			tls.pick.pop.success++;
+			#if VARIANT == VANILLA || VARIANT == BITMASK
+				tls.empty.pop.value += num;
+				tls.empty.pop.count += 1;
+			#endif
+		#endif
+		return node;
+	}
+
+	inline unsigned idx_from_r(unsigned r, bool bias) {
+		unsigned i;
+		if(bias) {
+			if(0 == (r & 0x3F)) {
+				i = r >> 6;
+			} else {
+				i = tls.my_queue + ((r >> 6) % 4);
+				tls.pick.push.local++;
+			}
+		} else {
+			i = r;
+		}
+		return i % numLists;
+	}
+
+public:
+
+	static __attribute__((aligned(128))) thread_local struct TLS {
+		Random     rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
+		Random     rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
+		unsigned   my_queue = (ticket++) * 4;
+		pick_stat  pick;
+		empty_stat empty;
+		__attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
+	} tls;
+
+private:
+	const unsigned numLists;
+    	__attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;
+private:
+	#if VARIANT == SNZI || VARIANT == BACK
+		snzi_t snzi;
+	#elif VARIANT == BIAS || VARIANT == BACKBIAS
+		#ifdef SNZI_PACKED
+			snzip_t snzi;
+		#else
+			snzi_t snzi;
+		#endif
+	#elif VARIANT == SNZM || VARIANT == DISCOVER
+		snzm_t snzm;
+	#else
+		std::atomic_int numNonEmpty  = { 0 };  // number of non-empty lists
+	#endif
+	#if VARIANT == BITMASK
+		std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
+	#endif
+
+public:
+	static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t<node_t>);
+	static std::atomic_uint32_t ticket;
+
+#ifndef NO_STATS
+	static void stats_tls_tally() {
+		global_stats.pick.push.attempt += tls.pick.push.attempt;
+		global_stats.pick.push.success += tls.pick.push.success;
+		global_stats.pick.push.local += tls.pick.push.local;
+		global_stats.pick.pop .attempt += tls.pick.pop.attempt;
+		global_stats.pick.pop .success += tls.pick.pop.success;
+		global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
+		global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
+		global_stats.pick.pop .local += tls.pick.pop.local;
+
+		global_stats.qstat.push.value += tls.empty.push.value;
+		global_stats.qstat.push.count += tls.empty.push.count;
+		global_stats.qstat.pop .value += tls.empty.pop .value;
+		global_stats.qstat.pop .count += tls.empty.pop .count;
+	}
+
+private:
+	static struct GlobalStats {
+		struct {
+			struct {
+				std::atomic_size_t attempt = { 0 };
+				std::atomic_size_t success = { 0 };
+				std::atomic_size_t local = { 0 };
+			} push;
+			struct {
+				std::atomic_size_t attempt = { 0 };
+				std::atomic_size_t success = { 0 };
+				std::atomic_size_t mask_attempt = { 0 };
+				std::atomic_size_t mask_reset = { 0 };
+				std::atomic_size_t local = { 0 };
+			} pop;
+		} pick;
+		struct {
+			struct {
+				std::atomic_size_t value = { 0 };
+				std::atomic_size_t count = { 0 };
+			} push;
+			struct {
+				std::atomic_size_t value = { 0 };
+				std::atomic_size_t count = { 0 };
+			} pop;
+		} qstat;
+	} global_stats;
+
+public:
+	static void stats_print(std::ostream & os ) {
+		std::cout << "----- Relaxed List Stats -----" << std::endl;
+
+		const auto & global = global_stats;
+
+		double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
+		double pop_sur  = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
+		double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
+		double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset);
+
+		double push_len = double(global.pick.push.attempt     ) / global.pick.push.success;
+		double pop_len  = double(global.pick.pop .attempt     ) / global.pick.pop .success;
+		double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success;
+		double rpop_len = double(global.pick.pop .mask_reset  ) / global.pick.pop .success;
+
+		os << "Push   Pick   : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt      << " / " << global.pick.push.success << ")\n";
+		os << "Pop    Pick   : " << pop_sur  << " %, len " << pop_len  << " (" << global.pick.pop .attempt      << " / " << global.pick.pop .success << ")\n";
+		os << "TryPop Pick   : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n";
+		os << "Pop M Reset   : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset   << " / " << global.pick.pop .success << ")\n";
+
+		double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
+		double avgQ_pop  = double(global.qstat.pop .value) / global.qstat.pop .count;
+		double avgQ      = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
+		os << "Push   Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
+		os << "Pop    Avg Qs : " << avgQ_pop  << " (" << global.qstat.pop .count << "ops)\n";
+		os << "Global Avg Qs : " << avgQ      << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
+
+		os << "Local Push    : " << global.pick.push.local << "\n";
+		os << "Local Pop     : " << global.pick.pop .local << "\n";
+	}
+#endif
+};
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list_layout.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list_layout.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/relaxed_list_layout.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,23 @@
+#define NO_IO
+#define NDEBUG
+#include "relaxed_list.hpp"
+
+struct __attribute__((aligned(64))) Node {
+	static std::atomic_size_t creates;
+	static std::atomic_size_t destroys;
+
+	_LinksFields_t<Node> _links;
+
+	int value;
+	Node(int value): value(value) {
+		creates++;
+	}
+
+	~Node() {
+		destroys++;
+	}
+};
+
+int main() {
+	return sizeof(relaxed_list<Node>) + relaxed_list<Node>::sizeof_queue;
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/runperf.sh
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/runperf.sh	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/runperf.sh	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+name=$1
+event=$2
+
+shift 2
+
+echo "perf record -F 99 -a -g -o raw/$name.data -e $event -- $@ > raw/$name.out"
+perf record -F 99 -a -g -o raw/$name.data -e $event -- $@ > raw/$name.out
+echo "=============================="
+cat raw/$name.out
+echo "=============================="
+./process.sh $name
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/scale.sh
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/scale.sh	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/scale.sh	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,7 @@
+#!/bin/bash
+taskset -c 24-31 ./a.out -t  1 -b churn | grep --color -E "(ns|Ops|Running)"
+taskset -c 24-31 ./a.out -t  2 -b churn | grep --color -E "(ns|Ops|Running)"
+taskset -c 24-31 ./a.out -t  4 -b churn | grep --color -E "(ns|Ops|Running)"
+taskset -c 24-31 ./a.out -t  8 -b churn | grep --color -E "(ns|Ops|Running)"
+taskset -c 16-31 ./a.out -t 16 -b churn | grep --color -E "(ns|Ops|Running)"
+taskset -c  0-31 ./a.out -t 32 -b churn | grep --color -E "(ns|Ops|Running)"
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi-packed.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi-packed.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi-packed.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,179 @@
+#pragma once
+
+#define SNZI_PACKED
+
+#include "utils.hpp"
+
+
+class snzip_t {
+	class node;
+	class node_aligned;
+public:
+	const unsigned mask;
+	const int root;
+	std::unique_ptr<snzip_t::node[]> leafs;
+	std::unique_ptr<snzip_t::node_aligned[]> nodes;
+
+	snzip_t(unsigned depth);
+
+	void arrive(int idx) {
+		// idx >>= 1;
+		idx %= mask;
+		leafs[idx].arrive();
+	}
+
+	void depart(int idx) {
+		// idx >>= 1;
+		idx %= mask;
+		leafs[idx].depart();
+	}
+
+	bool query() const {
+		return nodes[root].query();
+	}
+
+
+private:
+	class __attribute__((aligned(32))) node {
+		friend class snzip_t;
+	private:
+
+		union val_t {
+			static constexpr char Half = -1;
+
+			uint64_t _all;
+			struct __attribute__((packed)) {
+				char cnt;
+				uint64_t ver:56;
+			};
+
+			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
+				val_t t;
+				t.ver = _ver;
+				t.cnt = _cnt;
+				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			bool cas(val_t & exp, const val_t & tar) volatile {
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			val_t() : _all(0) {}
+			val_t(const volatile val_t & o) : _all(o._all) {}
+		};
+
+		//--------------------------------------------------
+		// Hierarchical node
+		void arrive_h() {
+			int undoArr = 0;
+			bool success = false;
+			while(!success) {
+				auto x{ value };
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt >= 1 ) {
+					if( value.cas(x, x.cnt + 1, x.ver ) ) {
+						success = true;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == 0 ) {
+					if( value.cas(x, val_t::Half, x.ver + 1) ) {
+						success = true;
+						x.cnt = val_t::Half;
+						x.ver = x.ver + 1;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == val_t::Half ) {
+					/* paranoid */ assert(parent);
+					if(undoArr == 2) {
+						undoArr--;
+					} else {
+						parent->arrive();
+					}
+					if( !value.cas(x, 1, x.ver) ) {
+						undoArr = undoArr + 1;
+					}
+				}
+			}
+
+			for(int i = 0; i < undoArr; i++) {
+				/* paranoid */ assert(parent);
+				parent->depart();
+			}
+		}
+
+		void depart_h() {
+			while(true) {
+				auto x = (const val_t)value;
+				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
+				if( value.cas( x, x.cnt - 1, x.ver ) ) {
+					if( x.cnt == 1 ) {
+						/* paranoid */ assert(parent);
+						parent->depart();
+					}
+					return;
+				}
+			}
+		}
+
+		//--------------------------------------------------
+		// Root node
+		void arrive_r() {
+			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+		void depart_r() {
+			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+	private:
+		volatile val_t value;
+		class node * parent = nullptr;
+
+		bool is_root() {
+			return parent == nullptr;
+		}
+
+	public:
+		void arrive() {
+			if(is_root()) arrive_r();
+			else arrive_h();
+		}
+
+		void depart() {
+			if(is_root()) depart_r();
+			else depart_h();
+		}
+
+		bool query() {
+			/* paranoid */ assert(is_root());
+			return value._all > 0;
+		}
+	};
+
+	class __attribute__((aligned(128))) node_aligned : public node {};
+};
+
+snzip_t::snzip_t(unsigned depth)
+	: mask( std::pow(2, depth) )
+	, root( ((std::pow(2, depth + 1) - 1) / (2 -1)) - 1 - mask )
+	, leafs(new node[ mask ]())
+	, nodes(new node_aligned[ root + 1 ]())
+{
+	int width = std::pow(2, depth);
+	int hwdith = width / 2;
+	std::cout << "SNZI: " << depth << "x" << width << "(" << mask - 1 << ") " << (sizeof(snzip_t::node) * (root + 1)) << " bytes" << std::endl;
+	for(int i = 0; i < width; i++) {
+		int idx = i % hwdith;
+		std::cout << i << " -> " << idx + width << std::endl;
+		leafs[i].parent = &nodes[ idx ];
+	}
+
+	for(int i = 0; i < root; i++) {
+		int idx = (i / 2) + hwdith;
+		std::cout << i + width << " -> " << idx + width << std::endl;
+		nodes[i].parent = &nodes[ idx ];
+	}
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzi.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,164 @@
+#pragma once
+
+#include "utils.hpp"
+
+
+class snzi_t {
+	class node;
+public:
+	const unsigned mask;
+	const int root;
+	std::unique_ptr<snzi_t::node[]> nodes;
+
+	snzi_t(unsigned depth, unsigned base = 2);
+
+	void arrive(int idx) {
+		idx >>= 2;
+		idx %= mask;
+		nodes[idx].arrive();
+	}
+
+	void depart(int idx) {
+		idx >>= 2;
+		idx %= mask;
+		nodes[idx].depart();
+	}
+
+	bool query() const {
+		return nodes[root].query();
+	}
+
+
+private:
+	class __attribute__((aligned(128))) node {
+		friend class snzi_t;
+	private:
+
+		union val_t {
+			static constexpr char Half = -1;
+
+			uint64_t _all;
+			struct __attribute__((packed)) {
+				char cnt;
+				uint64_t ver:56;
+			};
+
+			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
+				val_t t;
+				t.ver = _ver;
+				t.cnt = _cnt;
+				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			bool cas(val_t & exp, const val_t & tar) volatile {
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			val_t() : _all(0) {}
+			val_t(const volatile val_t & o) : _all(o._all) {}
+		};
+
+		//--------------------------------------------------
+		// Hierarchical node
+		void arrive_h() {
+			int undoArr = 0;
+			bool success = false;
+			while(!success) {
+				auto x{ value };
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt >= 1 ) {
+					if( value.cas(x, x.cnt + 1, x.ver ) ) {
+						success = true;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == 0 ) {
+					if( value.cas(x, val_t::Half, x.ver + 1) ) {
+						success = true;
+						x.cnt = val_t::Half;
+						x.ver = x.ver + 1;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == val_t::Half ) {
+					/* paranoid */ assert(parent);
+					if(undoArr == 2) {
+						undoArr--;
+					} else {
+						parent->arrive();
+					}
+					if( !value.cas(x, 1, x.ver) ) {
+						undoArr = undoArr + 1;
+					}
+				}
+			}
+
+			for(int i = 0; i < undoArr; i++) {
+				/* paranoid */ assert(parent);
+				parent->depart();
+			}
+		}
+
+		void depart_h() {
+			while(true) {
+				auto x = (const val_t)value;
+				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
+				if( value.cas( x, x.cnt - 1, x.ver ) ) {
+					if( x.cnt == 1 ) {
+						/* paranoid */ assert(parent);
+						parent->depart();
+					}
+					return;
+				}
+			}
+		}
+
+		//--------------------------------------------------
+		// Root node
+		void arrive_r() {
+			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+		void depart_r() {
+			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+	private:
+		volatile val_t value;
+		class node * parent = nullptr;
+
+		bool is_root() {
+			return parent == nullptr;
+		}
+
+	public:
+		void arrive() {
+			if(is_root()) arrive_r();
+			else arrive_h();
+		}
+
+		void depart() {
+			if(is_root()) depart_r();
+			else depart_h();
+		}
+
+		bool query() {
+			/* paranoid */ assert(is_root());
+			return value._all > 0;
+		}
+	};
+};
+
+snzi_t::snzi_t(unsigned depth, unsigned base)
+	: mask( std::pow(base, depth) )
+	, root( ((std::pow(base, depth + 1) - 1) / (base -1)) - 1 )
+	, nodes(new node[ root + 1 ]())
+{
+	int width = std::pow(base, depth);
+	std::cout << "SNZI: " << depth << "x" << width << "(" << mask - 1 << ") " << (sizeof(snzi_t::node) * (root + 1)) << " bytes" << std::endl;
+	for(int i = 0; i < root; i++) {
+		std::cout << i << " -> " << (i / base) + width << std::endl;
+		nodes[i].parent = &nodes[(i / base) + width];
+	}
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzm.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzm.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/snzm.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,213 @@
+#pragma once
+
+#include "utils.hpp"
+
+
+class snzm_t {
+	class node;
+public:
+	const unsigned depth;
+	const unsigned mask;
+	const int root;
+	std::unique_ptr<snzm_t::node[]> nodes;
+
+	#if defined(__BMI2__)
+		const uint64_t indexes = 0x0706050403020100;
+	#endif
+
+	snzm_t(unsigned numLists);
+
+	void arrive(int idx) {
+		int i = idx & mask;
+		nodes[i].arrive( idx >> depth);
+	}
+
+	void depart(int idx) {
+		int i = idx & mask;
+		nodes[i].depart( idx >> depth );
+	}
+
+	bool query() const {
+		return nodes[root].query();
+	}
+
+	uint64_t masks( unsigned node ) {
+		/* paranoid */ assert( (node & mask) == node );
+		#if defined(__BMI2__)
+			return nodes[node].mask_all;
+		#else
+			return nodes[node].mask;
+		#endif
+	}
+
+private:
+	class __attribute__((aligned(128))) node {
+		friend class snzm_t;
+	private:
+
+		union val_t {
+			static constexpr char Half = -1;
+
+			uint64_t _all;
+			struct __attribute__((packed)) {
+				char cnt;
+				uint64_t ver:56;
+			};
+
+			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
+				val_t t;
+				t.ver = _ver;
+				t.cnt = _cnt;
+				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			bool cas(val_t & exp, const val_t & tar) volatile {
+				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+			}
+
+			val_t() : _all(0) {}
+			val_t(const volatile val_t & o) : _all(o._all) {}
+		};
+
+		//--------------------------------------------------
+		// Hierarchical node
+		void arrive_h() {
+			int undoArr = 0;
+			bool success = false;
+			while(!success) {
+				auto x{ value };
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt >= 1 ) {
+					if( value.cas(x, x.cnt + 1, x.ver ) ) {
+						success = true;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == 0 ) {
+					if( value.cas(x, val_t::Half, x.ver + 1) ) {
+						success = true;
+						x.cnt = val_t::Half;
+						x.ver = x.ver + 1;
+					}
+				}
+				/* paranoid */ assert(x.cnt <= 120);
+				if( x.cnt == val_t::Half ) {
+					/* paranoid */ assert(parent);
+					parent->arrive();
+					if( !value.cas(x, 1, x.ver) ) {
+						undoArr = undoArr + 1;
+					}
+				}
+			}
+
+			for(int i = 0; i < undoArr; i++) {
+				/* paranoid */ assert(parent);
+				parent->depart();
+			}
+		}
+
+		void depart_h() {
+			while(true) {
+				auto x = (const val_t)value;
+				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
+				if( value.cas( x, x.cnt - 1, x.ver ) ) {
+					if( x.cnt == 1 ) {
+						/* paranoid */ assert(parent);
+						parent->depart();
+					}
+					return;
+				}
+			}
+		}
+
+		//--------------------------------------------------
+		// Root node
+		void arrive_r() {
+			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+		void depart_r() {
+			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
+		}
+
+		//--------------------------------------------------
+		// Interface node
+		void arrive() {
+			/* paranoid */ assert(!is_leaf);
+			if(is_root()) arrive_r();
+			else arrive_h();
+		}
+
+		void depart() {
+			/* paranoid */ assert(!is_leaf);
+			if(is_root()) depart_r();
+			else depart_h();
+		}
+
+	private:
+		volatile val_t value;
+		#if defined(__BMI2__)
+			union __attribute__((packed)) {
+				volatile uint8_t mask[8];
+				volatile uint64_t mask_all;
+			};
+		#else
+			volatile size_t mask = 0;
+		#endif
+
+		class node * parent = nullptr;
+		bool is_leaf = false;
+
+		bool is_root() {
+			return parent == nullptr;
+		}
+
+	public:
+		void arrive( int bit ) {
+			/* paranoid */ assert( is_leaf );
+
+			arrive_h();
+			#if defined(__BMI2__)
+				/* paranoid */ assert( bit < 8 );
+				mask[bit] = 0xff;
+			#else
+				/* paranoid */ assert( (mask & ( 1 << bit )) == 0 );
+				__atomic_fetch_add( &mask, 1 << bit, __ATOMIC_RELAXED );
+			#endif
+
+		}
+
+		void depart( int bit ) {
+			/* paranoid */ assert( is_leaf );
+
+			#if defined(__BMI2__)
+				/* paranoid */ assert( bit < 8 );
+				mask[bit] = 0x00;
+			#else
+				/* paranoid */ assert( (mask & ( 1 << bit )) != 0 );
+				__atomic_fetch_sub( &mask, 1 << bit, __ATOMIC_RELAXED );
+			#endif
+			depart_h();
+		}
+
+		bool query() {
+			/* paranoid */ assert(is_root());
+			return value._all > 0;
+		}
+	};
+};
+
+snzm_t::snzm_t(unsigned numLists)
+	: depth( std::log2( numLists / 8 ) )
+	, mask( (1 << depth) - 1 )
+	, root( (1 << (depth + 1)) - 2 )
+	, nodes(new node[ root + 1 ]())
+{
+	int width = 1 << depth;
+	std::cout << "SNZI with Mask: " << depth << "x" << width << "(" << mask << ")" << std::endl;
+	for(int i = 0; i < root; i++) {
+		nodes[i].is_leaf = i < width;
+		nodes[i].parent = &nodes[(i / 2) + width ];
+	}
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/utils.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/utils.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/utils.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,250 @@
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <atomic>
+#include <chrono>
+#include <fstream>
+#include <iostream>
+
+#include <unistd.h>
+#include <sys/sysinfo.h>
+
+#include <x86intrin.h>
+
+// Barrier from
+class barrier_t {
+public:
+	barrier_t(size_t total)
+		: waiting(0)
+		, total(total)
+	{}
+
+	void wait(unsigned) {
+		size_t target = waiting++;
+		target = (target - (target % total)) + total;
+		while(waiting < target)
+			asm volatile("pause");
+
+		assert(waiting < (1ul << 60));
+    	}
+
+private:
+	std::atomic<size_t> waiting;
+	size_t total;
+};
+
+// class Random {
+// private:
+// 	unsigned int seed;
+// public:
+// 	Random(int seed) {
+// 		this->seed = seed;
+// 	}
+
+// 	/** returns pseudorandom x satisfying 0 <= x < n. **/
+// 	unsigned int next() {
+// 		seed ^= seed << 6;
+// 		seed ^= seed >> 21;
+// 		seed ^= seed << 7;
+// 		return seed;
+//     	}
+// };
+
+constexpr uint64_t extendedEuclidY(uint64_t a, uint64_t b);
+constexpr uint64_t extendedEuclidX(uint64_t a, uint64_t b){
+    return (b==0) ? 1 : extendedEuclidY(b, a - b * (a / b));
+}
+constexpr uint64_t extendedEuclidY(uint64_t a, uint64_t b){
+    return (b==0) ? 0 : extendedEuclidX(b, a - b * (a / b)) - (a / b) * extendedEuclidY(b, a - b * (a / b));
+}
+
+class Random {
+private:
+	uint64_t x;
+
+	static constexpr const uint64_t M  = 1ul << 48ul;
+	static constexpr const uint64_t A  = 25214903917;
+	static constexpr const uint64_t C  = 11;
+	static constexpr const uint64_t D  = 16;
+
+public:
+	static constexpr const uint64_t m  = M;
+	static constexpr const uint64_t a  = A;
+	static constexpr const uint64_t c  = C;
+	static constexpr const uint64_t d  = D;
+	static constexpr const uint64_t ai = extendedEuclidX(A, M);
+public:
+	Random(unsigned int seed) {
+		this->x = seed * a;
+	}
+
+	/** returns pseudorandom x satisfying 0 <= x < n. **/
+	unsigned int next() {
+		//nextx = (a * x + c) % m;
+		x = (A * x + C) & (M - 1);
+		return x >> D;
+	}
+	unsigned int prev() {
+		//prevx = (ainverse * (x - c)) mod m
+		unsigned int r = x >> D;
+		x = ai * (x - C) & (M - 1);
+		return r;
+	}
+
+	void set_raw_state(uint64_t _x) {
+		this->x = _x;
+	}
+
+	uint64_t get_raw_state() {
+		return this->x;
+	}
+};
+
+static inline long long rdtscl(void) {
+    unsigned int lo, hi;
+    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
+}
+
+static inline void affinity(int tid) {
+	static int cpus = get_nprocs();
+
+	cpu_set_t  mask;
+	CPU_ZERO(&mask);
+	int cpu = cpus - tid;  // Set CPU affinity to tid, starting from the end
+	CPU_SET(cpu, &mask);
+	auto result = sched_setaffinity(0, sizeof(mask), &mask);
+	if(result != 0) {
+		std::cerr << "Affinity set failed with " << result<< ", wanted " << cpu << std::endl;
+	}
+}
+
+static const constexpr std::size_t cache_line_size = 64;
+static inline void check_cache_line_size() {
+	std::cout << "Checking cache line size" << std::endl;
+	const std::string cache_file = "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size";
+
+	std::ifstream ifs (cache_file, std::ifstream::in);
+
+	if(!ifs.good()) {
+		std::cerr << "Could not open file to check cache line size" << std::endl;
+		std::cerr << "Looking for: " << cache_file << std::endl;
+		std::exit(2);
+	}
+
+	size_t got;
+	ifs >> got;
+
+	ifs.close();
+
+	if(cache_line_size != got) {
+		std::cerr << "Cache line has incorrect size : " << got << std::endl;
+		std::exit(1);
+	}
+
+	std::cout << "Done" << std::endl;
+}
+
+using Clock = std::chrono::high_resolution_clock;
+using duration_t = std::chrono::duration<double>;
+using std::chrono::nanoseconds;
+
+template<typename Ratio, typename T>
+T duration_cast(T seconds) {
+	return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
+}
+
+static inline unsigned rand_bit(unsigned rnum, size_t mask) __attribute__((artificial));
+static inline unsigned rand_bit(unsigned rnum, size_t mask) {
+	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
+#if !defined(__BMI2__)
+	uint64_t v = mask;   // Input value to find position with rank r.
+	unsigned int r = bit + 1;// Input: bit's desired rank [1-64].
+	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
+	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
+	unsigned int t;      // Bit count temporary.
+
+	// Do a normal parallel bit count for a 64-bit integer,
+	// but store all intermediate steps.
+	a =  v - ((v >> 1) & ~0UL/3);
+	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
+	c = (b + (b >> 4)) & ~0UL/0x11;
+	d = (c + (c >> 8)) & ~0UL/0x101;
+
+
+	t = (d >> 32) + (d >> 48);
+	// Now do branchless select!
+	s  = 64;
+	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+	t  = (d >> (s - 16)) & 0xff;
+	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+	t  = (c >> (s - 8)) & 0xf;
+	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+	t  = (b >> (s - 4)) & 0x7;
+	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+	t  = (a >> (s - 2)) & 0x3;
+	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+	t  = (v >> (s - 1)) & 0x1;
+	s -= ((t - r) & 256) >> 8;
+	return s - 1;
+#else
+	uint64_t picked = _pdep_u64(1ul << bit, mask);
+	return picked ? __builtin_ctzl(picked) : 0;
+#endif
+}
+
+struct spinlock_t {
+	std::atomic_bool ll = { false };
+
+	inline void lock() {
+		while( __builtin_expect(ll.exchange(true),false) ) {
+			while(ll.load(std::memory_order_relaxed))
+				asm volatile("pause");
+		}
+	}
+
+	inline bool try_lock() {
+		return false == ll.exchange(true);
+	}
+
+	inline void unlock() {
+		ll.store(false, std::memory_order_release);
+	}
+
+	inline explicit operator bool() {
+		return ll.load(std::memory_order_relaxed);
+	}
+};
+
+static inline bool bts(std::atomic_size_t & target, size_t bit ) {
+	//*
+	int result = 0;
+	asm volatile(
+		"LOCK btsq %[bit], %[target]\n\t"
+		:"=@ccc" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+	return result != 0;
+	/*/
+	size_t mask = 1ul << bit;
+	size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
+	return (ret & mask) != 0;
+	//*/
+}
+
+static inline bool btr(std::atomic_size_t & target, size_t bit ) {
+	//*
+	int result = 0;
+	asm volatile(
+		"LOCK btrq %[bit], %[target]\n\t"
+		:"=@ccc" (result)
+		: [target] "m" (target), [bit] "r" (bit)
+	);
+	return result != 0;
+	/*/
+	size_t mask = 1ul << bit;
+	size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
+	return (ret & mask) != 0;
+	//*/
+}
Index: doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ doc/theses/thierry_delisle_PhD/code/readyQ_proto/work_stealing.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,222 @@
+#pragma once
+#define LIST_VARIANT work_stealing
+
+#include <cmath>
+#include <iomanip>
+#include <memory>
+#include <mutex>
+#include <type_traits>
+
+#include "assert.hpp"
+#include "utils.hpp"
+#include "links.hpp"
+#include "snzi.hpp"
+
+using namespace std;
+
+template<typename node_t>
+class __attribute__((aligned(128))) work_stealing {
+	static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
+
+public:
+	static const char * name() {
+		return "Work Stealing";
+	}
+
+	work_stealing(unsigned _numThreads, unsigned)
+		: numThreads(_numThreads)
+		, lists(new intrusive_queue_t<node_t>[numThreads])
+		, snzi( std::log2( numThreads / 2 ), 2 )
+
+	{
+		std::cout << "Constructing Work Stealer with " << numThreads << std::endl;
+	}
+
+	~work_stealing() {
+		std::cout << "Destroying Work Stealer" << std::endl;
+		lists.reset();
+	}
+
+	__attribute__((noinline, hot)) void push(node_t * node) {
+		node->_links.ts = rdtscl();
+		if( node->_links.hint > numThreads ) {
+			node->_links.hint = tls.rng.next() % numThreads;
+			tls.stat.push.nhint++;
+		}
+
+		unsigned i = node->_links.hint;
+		auto & list = lists[i];
+		list.lock.lock();
+
+		if(list.push( node )) {
+			snzi.arrive(i);
+		}
+
+		list.lock.unlock();
+	}
+
+	__attribute__((noinline, hot)) node_t * pop() {
+		node_t * node;
+		while(true) {
+			if(!snzi.query()) {
+				return nullptr;
+			}
+
+			{
+				unsigned i = tls.my_queue;
+				auto & list = lists[i];
+				if( list.ts() != 0 ) {
+					list.lock.lock();
+					if((node = try_pop(i))) {
+						tls.stat.pop.local.success++;
+						break;
+					}
+					else {
+						tls.stat.pop.local.elock++;
+					}
+				}
+				else {
+					tls.stat.pop.local.espec++;
+				}
+			}
+
+			tls.stat.pop.steal.tried++;
+
+			int i = tls.rng.next() % numThreads;
+			auto & list = lists[i];
+			if( list.ts() == 0 ) {
+				tls.stat.pop.steal.empty++;
+				continue;
+			}
+
+			if( !list.lock.try_lock() ) {
+				tls.stat.pop.steal.locked++;
+				continue;
+			}
+
+			if((node = try_pop(i))) {
+				tls.stat.pop.steal.success++;
+				break;
+			}
+		}
+
+		#if defined(READ)
+			const unsigned f = READ;
+			if(0 == (tls.it % f)) {
+				unsigned i = tls.it / f;
+				lists[i % numThreads].ts();
+			}
+			// lists[tls.it].ts();
+			tls.it++;
+		#endif
+
+
+		return node;
+	}
+
+private:
+	node_t * try_pop(unsigned i) {
+		auto & list = lists[i];
+
+		// If list is empty, unlock and retry
+		if( list.ts() == 0 ) {
+			list.lock.unlock();
+			return nullptr;
+		}
+
+			// Actually pop the list
+		node_t * node;
+		bool emptied;
+		std::tie(node, emptied) = list.pop();
+		assert(node);
+
+		if(emptied) {
+			snzi.depart(i);
+		}
+
+		// Unlock and return
+		list.lock.unlock();
+		return node;
+	}
+
+
+public:
+
+	static std::atomic_uint32_t ticket;
+	static __attribute__((aligned(128))) thread_local struct TLS {
+		Random     rng = { int(rdtscl()) };
+		unsigned   my_queue = ticket++;
+		#if defined(READ)
+			unsigned it = 0;
+		#endif
+		struct {
+			struct {
+				std::size_t nhint = { 0 };
+			} push;
+			struct {
+				struct {
+					std::size_t success = { 0 };
+					std::size_t espec = { 0 };
+					std::size_t elock = { 0 };
+				} local;
+				struct {
+					std::size_t tried   = { 0 };
+					std::size_t locked  = { 0 };
+					std::size_t empty   = { 0 };
+					std::size_t success = { 0 };
+				} steal;
+			} pop;
+		} stat;
+	} tls;
+
+private:
+	const unsigned numThreads;
+    	std::unique_ptr<intrusive_queue_t<node_t> []> lists;
+	__attribute__((aligned(64))) snzi_t snzi;
+
+#ifndef NO_STATS
+private:
+	static struct GlobalStats {
+		struct {
+			std::atomic_size_t nhint = { 0 };
+		} push;
+		struct {
+			struct {
+				std::atomic_size_t success = { 0 };
+				std::atomic_size_t espec = { 0 };
+				std::atomic_size_t elock = { 0 };
+			} local;
+			struct {
+				std::atomic_size_t tried   = { 0 };
+				std::atomic_size_t locked  = { 0 };
+				std::atomic_size_t empty   = { 0 };
+				std::atomic_size_t success = { 0 };
+			} steal;
+		} pop;
+	} global_stats;
+
+public:
+	static void stats_tls_tally() {
+		global_stats.push.nhint += tls.stat.push.nhint;
+		global_stats.pop.local.success += tls.stat.pop.local.success;
+		global_stats.pop.local.espec   += tls.stat.pop.local.espec  ;
+		global_stats.pop.local.elock   += tls.stat.pop.local.elock  ;
+		global_stats.pop.steal.tried   += tls.stat.pop.steal.tried  ;
+		global_stats.pop.steal.locked  += tls.stat.pop.steal.locked ;
+		global_stats.pop.steal.empty   += tls.stat.pop.steal.empty  ;
+		global_stats.pop.steal.success += tls.stat.pop.steal.success;
+	}
+
+	static void stats_print(std::ostream & os ) {
+		std::cout << "----- Work Stealing Stats -----" << std::endl;
+
+		double stealSucc = double(global_stats.pop.steal.success) / global_stats.pop.steal.tried;
+		os << "Push to new Q : " << std::setw(15) << global_stats.push.nhint << "\n";
+		os << "Local Pop     : " << std::setw(15) << global_stats.pop.local.success << "\n";
+		os << "Steal Pop     : " << std::setw(15) << global_stats.pop.steal.success << "(" << global_stats.pop.local.espec << "s, " << global_stats.pop.local.elock << "l)\n";
+		os << "Steal Success : " << std::setw(15) << stealSucc << "(" << global_stats.pop.steal.tried << " tries)\n";
+		os << "Steal Fails   : " << std::setw(15) << global_stats.pop.steal.empty << "e, " << global_stats.pop.steal.locked << "l\n";
+	}
+private:
+#endif
+};
Index: c/theses/thierry_delisle_PhD/code/relaxed_list.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,1141 +1,0 @@
-#if !defined(LIST_VARIANT_HPP)
-#define LIST_VARIANT_HPP "relaxed_list.hpp"
-#endif
-
-#include LIST_VARIANT_HPP
-#if !defined(LIST_VARIANT)
-#error not variant selected
-#endif
-
-#include <array>
-#include <iomanip>
-#include <iostream>
-#include <locale>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include <getopt.h>
-#include <unistd.h>
-#include <sys/sysinfo.h>
-
-#include "utils.hpp"
-
-struct __attribute__((aligned(64))) Node {
-	static std::atomic_size_t creates;
-	static std::atomic_size_t destroys;
-
-	_LinksFields_t<Node> _links;
-
-	int value;
-	int id;
-
-	Node() { creates++; }
-	Node(int value): value(value) { creates++; }
-	~Node() { destroys++; }
-};
-
-std::atomic_size_t Node::creates  = { 0 };
-std::atomic_size_t Node::destroys = { 0 };
-
-bool enable_stats = false;
-
-template<>
-thread_local LIST_VARIANT<Node>::TLS LIST_VARIANT<Node>::tls = {};
-
-template<>
-std::atomic_uint32_t LIST_VARIANT<Node>::ticket = { 0 };
-
-#ifndef NO_STATS
-template<>
-LIST_VARIANT<Node>::GlobalStats LIST_VARIANT<Node>::global_stats = {};
-#endif
-
-// ================================================================================================
-//                        UTILS
-// ================================================================================================
-
-struct local_stat_t {
-	size_t in  = 0;
-	size_t out = 0;
-	size_t empty = 0;
-	size_t crc_in  = 0;
-	size_t crc_out = 0;
-	size_t valmax = 0;
-	size_t valmin = 100000000ul;
-	struct {
-		size_t val = 0;
-		size_t cnt = 0;
-	} comp;
-	struct {
-		size_t val = 0;
-		size_t cnt = 0;
-	} subm;
-};
-
-struct global_stat_t {
-	std::atomic_size_t in  = { 0 };
-	std::atomic_size_t out = { 0 };
-	std::atomic_size_t empty = { 0 };
-	std::atomic_size_t crc_in  = { 0 };
-	std::atomic_size_t crc_out = { 0 };
-	std::atomic_size_t valmax = { 0 };
-	std::atomic_size_t valmin = { 100000000ul };
-	struct {
-		std::atomic_size_t val = { 0 };
-		std::atomic_size_t cnt = { 0 };
-	} comp;
-	struct {
-		std::atomic_size_t val = { 0 };
-		std::atomic_size_t cnt = { 0 };
-	} subm;
-};
-
-void atomic_max(std::atomic_size_t & target, size_t value) {
-	for(;;) {
-		size_t expect = target.load(std::memory_order_relaxed);
-		if(value <= expect) return;
-		bool success = target.compare_exchange_strong(expect, value);
-		if(success) return;
-	}
-}
-
-void atomic_min(std::atomic_size_t & target, size_t value) {
-	for(;;) {
-		size_t expect = target.load(std::memory_order_relaxed);
-		if(value >= expect) return;
-		bool success = target.compare_exchange_strong(expect, value);
-		if(success) return;
-	}
-}
-
-void tally_stats(global_stat_t & global, local_stat_t & local) {
-
-	global.in    += local.in;
-	global.out   += local.out;
-	global.empty += local.empty;
-
-	global.crc_in  += local.crc_in;
-	global.crc_out += local.crc_out;
-
-	global.comp.val += local.comp.val;
-	global.comp.cnt += local.comp.cnt;
-	global.subm.val += local.subm.val;
-	global.subm.cnt += local.subm.cnt;
-
-	atomic_max(global.valmax, local.valmax);
-	atomic_min(global.valmin, local.valmin);
-
-	LIST_VARIANT<Node>::stats_tls_tally();
-}
-
-void waitfor(double & duration, barrier_t & barrier, std::atomic_bool & done) {
-	std::cout << "Starting" << std::endl;
-	auto before = Clock::now();
-	barrier.wait(0);
-	bool is_tty = isatty(STDOUT_FILENO);
-
-	while(true) {
-		usleep(100000);
-		auto now = Clock::now();
-		duration_t durr = now - before;
-		if( durr.count() > duration ) {
-			done = true;
-			break;
-		}
-		if(is_tty) {
-			std::cout << "\r" << std::setprecision(4) << durr.count();
-			std::cout.flush();
-		}
-	}
-
-	barrier.wait(0);
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	duration = durr.count();
-	std::cout << "\rClosing down" << std::endl;
-}
-
-void waitfor(double & duration, barrier_t & barrier, const std::atomic_size_t & count) {
-	std::cout << "Starting" << std::endl;
-	auto before = Clock::now();
-	barrier.wait(0);
-
-	while(true) {
-		usleep(100000);
-		size_t c = count.load();
-		if( c == 0 ) {
-			break;
-		}
-		std::cout << "\r" << c;
-		std::cout.flush();
-	}
-
-	barrier.wait(0);
-	auto after = Clock::now();
-	duration_t durr = after - before;
-	duration = durr.count();
-	std::cout << "\rClosing down" << std::endl;
-}
-
-void print_stats(double duration, unsigned nthread, global_stat_t & global) {
-	assert(Node::creates == Node::destroys);
-	assert(global.crc_in == global.crc_out);
-
-	std::cout << "Done" << std::endl;
-
-	size_t ops = global.in + global.out;
-	size_t ops_sec = size_t(double(ops) / duration);
-	size_t ops_thread = ops_sec / nthread;
-	auto dur_nano = duration_cast<std::nano>(1.0);
-
-	if(global.valmax != 0) {
-		std::cout << "Max runs      : " << global.valmax << "\n";
-		std::cout << "Min runs      : " << global.valmin << "\n";
-	}
-	if(global.comp.cnt != 0) {
-		std::cout << "Submit count  : " << global.subm.cnt << "\n";
-		std::cout << "Submit average: " << ((double(global.subm.val)) / global.subm.cnt) << "\n";
-		std::cout << "Complete count: " << global.comp.cnt << "\n";
-		std::cout << "Complete avg  : " << ((double(global.comp.val)) / global.comp.cnt) << "\n";
-	}
-	std::cout << "Duration      : " << duration << "s\n";
-	std::cout << "ns/Op         : " << ( dur_nano / ops_thread )<< "\n";
-	std::cout << "Ops/sec/thread: " << ops_thread << "\n";
-	std::cout << "Ops/sec       : " << ops_sec << "\n";
-	std::cout << "Total ops     : " << ops << "(" << global.in << "i, " << global.out << "o, " << global.empty << "e)\n";
-	#ifndef NO_STATS
-		LIST_VARIANT<Node>::stats_print(std::cout);
-	#endif
-}
-
-void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output);
-
-// ================================================================================================
-//                        EXPERIMENTS
-// ================================================================================================
-
-// ================================================================================================
-__attribute__((noinline)) void runChurn_body(
-	std::atomic<bool>& done,
-	Random & rand,
-	Node * my_nodes[],
-	unsigned nslots,
-	local_stat_t & local,
-	LIST_VARIANT<Node> & list
-) {
-	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
-		int idx = rand.next() % nslots;
-		if (auto node = my_nodes[idx]) {
-			local.crc_in += node->value;
-			list.push(node);
-			my_nodes[idx] = nullptr;
-			local.in++;
-		}
-		else if(auto node = list.pop()) {
-			local.crc_out += node->value;
-			my_nodes[idx] = node;
-			local.out++;
-		}
-		else {
-			local.empty++;
-		}
-	}
-}
-
-void runChurn(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes, const unsigned nslots) {
-	std::cout << "Churn Benchmark" << std::endl;
-	assert(nnodes <= nslots);
-	// List being tested
-
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Data to check everything is OK
-	global_stat_t global;
-
-	// Flag to signal termination
-	std::atomic_bool done  = { false };
-
-	// Prep nodes
-	std::cout << "Initializing ";
-	size_t npushed = 0;
-	LIST_VARIANT<Node> list = { nthread, nqueues };
-	{
-		Node** all_nodes[nthread];
-		for(auto & nodes : all_nodes) {
-			nodes = new __attribute__((aligned(64))) Node*[nslots + 8];
-			Random rand(rdtscl());
-			for(unsigned i = 0; i < nnodes; i++) {
-				nodes[i] = new Node(rand.next() % 100);
-			}
-
-			for(unsigned i = nnodes; i < nslots; i++) {
-				nodes[i] = nullptr;
-			}
-
-			for(int i = 0; i < 10 && i < (int)nslots; i++) {
-				int idx = rand.next() % nslots;
-				if (auto node = nodes[idx]) {
-					global.crc_in += node->value;
-					list.push(node);
-					npushed++;
-					nodes[idx] = nullptr;
-				}
-			}
-		}
-
-		std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl;
-
-		enable_stats = true;
-
-		std::thread * threads[nthread];
-		unsigned i = 1;
-		for(auto & t : threads) {
-			auto & my_nodes = all_nodes[i - 1];
-			t = new std::thread([&done, &list, &barrier, &global, &my_nodes, nslots](unsigned tid) {
-				Random rand(tid + rdtscl());
-
-				local_stat_t local;
-
-				// affinity(tid);
-
-				barrier.wait(tid);
-
-				// EXPERIMENT START
-
-				runChurn_body(done, rand, my_nodes, nslots, local, list);
-
-				// EXPERIMENT END
-
-				barrier.wait(tid);
-
-				tally_stats(global, local);
-
-				for(unsigned i = 0; i < nslots; i++) {
-					delete my_nodes[i];
-				}
-			}, i++);
-		}
-
-		waitfor(duration, barrier, done);
-
-		for(auto t : threads) {
-			t->join();
-			delete t;
-		}
-
-		enable_stats = false;
-
-		while(auto node = list.pop()) {
-			global.crc_out += node->value;
-			delete node;
-		}
-
-		for(auto nodes : all_nodes) {
-			delete[] nodes;
-		}
-	}
-
-	print_stats(duration, nthread, global);
-}
-
-// ================================================================================================
-__attribute__((noinline)) void runPingPong_body(
-	std::atomic<bool>& done,
-	Node initial_nodes[],
-	unsigned nnodes,
-	local_stat_t & local,
-	LIST_VARIANT<Node> & list
-) {
-	Node * nodes[nnodes];
-	{
-		unsigned i = 0;
-		for(auto & n : nodes) {
-			n = &initial_nodes[i++];
-		}
-	}
-
-	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
-
-		for(Node * & node : nodes) {
-			local.crc_in += node->value;
-			list.push(node);
-			local.in++;
-		}
-
-		// -----
-
-		for(Node * & node : nodes) {
-			node = list.pop();
-			assert(node);
-			local.crc_out += node->value;
-			local.out++;
-		}
-	}
-}
-
-void runPingPong(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) {
-	std::cout << "PingPong Benchmark" << std::endl;
-
-
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Data to check everything is OK
-	global_stat_t global;
-
-	// Flag to signal termination
-	std::atomic_bool done  = { false };
-
-	std::cout << "Initializing ";
-	// List being tested
-	LIST_VARIANT<Node> list = { nthread, nqueues };
-	{
-		enable_stats = true;
-
-		std::thread * threads[nthread];
-		unsigned i = 1;
-		for(auto & t : threads) {
-			t = new std::thread([&done, &list, &barrier, &global, nnodes](unsigned tid) {
-				Random rand(tid + rdtscl());
-
-				Node nodes[nnodes];
-				for(auto & n : nodes) {
-					n.value = (int)rand.next() % 100;
-				}
-
-				local_stat_t local;
-
-				// affinity(tid);
-
-				barrier.wait(tid);
-
-				// EXPERIMENT START
-
-				runPingPong_body(done, nodes, nnodes, local, list);
-
-				// EXPERIMENT END
-
-				barrier.wait(tid);
-
-				tally_stats(global, local);
-			}, i++);
-		}
-
-		waitfor(duration, barrier, done);
-
-		for(auto t : threads) {
-			t->join();
-			delete t;
-		}
-
-		enable_stats = false;
-	}
-
-	print_stats(duration, nthread, global);
-}
-
-// ================================================================================================
-struct __attribute__((aligned(64))) Slot {
-	Node * volatile node;
-};
-
-__attribute__((noinline)) void runProducer_body(
-	std::atomic<bool>& done,
-	Random & rand,
-	Slot * slots,
-	int nslots,
-	local_stat_t & local,
-	LIST_VARIANT<Node> & list
-) {
-	while(__builtin_expect(!done.load(std::memory_order_relaxed), true)) {
-
-		Node * node = list.pop();
-		if(!node) {
-			local.empty ++;
-			continue;
-		}
-
-		local.crc_out += node->value;
-		local.out++;
-
-		if(node->id == 0) {
-			unsigned cnt = 0;
-			for(int i = 0; i < nslots; i++) {
-				Node * found = __atomic_exchange_n( &slots[i].node, nullptr, __ATOMIC_SEQ_CST );
-				if( found ) {
-					local.crc_in += found->value;
-					local.in++;
-					cnt++;
-					list.push( found );
-				}
-			}
-
-			local.crc_in += node->value;
-			local.in++;
-			list.push( node );
-
-			local.comp.cnt++;
-			local.comp.val += cnt;
-		}
-		else {
-			unsigned len = 0;
-			while(true) {
-				auto off = rand.next();
-				for(int i = 0; i < nslots; i++) {
-					Node * expected = nullptr;
-					int idx = (i + off) % nslots;
-					Slot & slot = slots[ idx ];
-					if(
-						slot.node == nullptr &&
-						__atomic_compare_exchange_n( &slot.node, &expected, node, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST )
-					) {
-						local.subm.cnt++;
-						local.subm.val += len;
-						goto LOOP;
-					}
-					assert( expected != node );
-					len++;
-				}
-			}
-		}
-
-		LOOP:;
-	}
-}
-
-void runProducer(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes) {
-	std::cout << "Producer Benchmark" << std::endl;
-
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Data to check everything is OK
-	global_stat_t global;
-
-	// Flag to signal termination
-	std::atomic_bool done  = { false };
-
-	std::cout << "Initializing ";
-
-	int nslots = nnodes * 4;
-	Slot * slots = new Slot[nslots];
-	std::cout << nnodes << " nodes (" << nslots << " slots)" << std::endl;
-
-	// List being tested
-	LIST_VARIANT<Node> list = { nthread, nqueues };
-	{
-		Random rand(rdtscl());
-		for(unsigned i = 0; i < nnodes; i++) {
-			Node * node = new Node(rand.next() % 100);
-			node->id = i;
-			global.crc_in += node->value;
-			list.push(node);
-		}
-
-		for(int i = 0; i < nslots; i++) {
-			slots[i].node = nullptr;
-		}
-	}
-
-	{
-		enable_stats = true;
-
-		std::thread * threads[nthread];
-		unsigned i = 1;
-		for(auto & t : threads) {
-			t = new std::thread([&done, &list, &barrier, &global, slots, nslots](unsigned tid) {
-				Random rand(tid + rdtscl());
-
-				local_stat_t local;
-				barrier.wait(tid);
-
-				// EXPERIMENT START
-
-				runProducer_body(done, rand, slots, nslots, local, list);
-
-				// EXPERIMENT END
-
-				barrier.wait(tid);
-
-				tally_stats(global, local);
-			}, i++);
-		}
-
-		waitfor(duration, barrier, done);
-
-		for(auto t : threads) {
-			t->join();
-			delete t;
-		}
-
-		enable_stats = false;
-	}
-
-	{
-		while(Node * node = list.pop()) {
-			global.crc_out += node->value;
-			delete node;
-		}
-
-		for(int i = 0; i < nslots; i++) {
-			delete slots[i].node;
-		}
-
-		delete [] slots;
-	}
-
-	print_stats(duration, nthread, global);
-}
-
-// ================================================================================================
-__attribute__((noinline)) void runFairness_body(
-	unsigned tid,
-	size_t width,
-	size_t length,
-	int output[],
-	std::atomic_size_t & count,
-	Node initial_nodes[],
-	unsigned nnodes,
-	local_stat_t & local,
-	LIST_VARIANT<Node> & list
-) {
-	Node * nodes[nnodes];
-	{
-		unsigned i = 0;
-		for(auto & n : nodes) {
-			n = &initial_nodes[i++];
-		}
-	}
-
-	while(__builtin_expect(0 != count.load(std::memory_order_relaxed), true)) {
-
-		for(Node * & node : nodes) {
-			local.crc_in += node->id;
-			list.push(node);
-			local.in++;
-		}
-
-		// -----
-
-		for(Node * & node : nodes) {
-			node = list.pop();
-			assert(node);
-
-			if (unsigned(node->value) < length) {
-				size_t idx = (node->value * width) + node->id;
-				assert(idx < (width * length));
-				output[idx] = tid;
-			}
-
-			node->value++;
-			if(unsigned(node->value) == length) count--;
-
-			local.crc_out += node->id;
-			local.out++;
-		}
-	}
-}
-
-void runFairness(unsigned nthread, unsigned nqueues, double duration, unsigned nnodes, const std::string & output) {
-	std::cout << "Fairness Benchmark, outputing to : " << output << std::endl;
-
-	// Barrier for synchronization
-	barrier_t barrier(nthread + 1);
-
-	// Data to check everything is OK
-	global_stat_t global;
-
-	std::cout << "Initializing ";
-
-	// Check fairness by creating a png of where the threads ran
-	size_t width = nthread * nnodes;
-	size_t length = 100000;
-
-	std::unique_ptr<int[]> data_out { new int[width * length] };
-
-	// Flag to signal termination
-	std::atomic_size_t count = width;
-
-	// List being tested
-	LIST_VARIANT<Node> list = { nthread, nqueues };
-	{
-		enable_stats = true;
-
-		std::thread * threads[nthread];
-		unsigned i = 1;
-		for(auto & t : threads) {
-			t = new std::thread([&count, &list, &barrier, &global, nnodes, width, length, data_out = data_out.get()](unsigned tid) {
-				unsigned int start = (tid - 1) * nnodes;
-				Node nodes[nnodes];
-				for(auto & n : nodes) {
-					n.id = start;
-					n.value = 0;
-					start++;
-				}
-
-				local_stat_t local;
-
-				// affinity(tid);
-
-				barrier.wait(tid);
-
-				// EXPERIMENT START
-
-				runFairness_body(tid, width, length, data_out, count, nodes, nnodes, local, list);
-
-				// EXPERIMENT END
-
-				barrier.wait(tid);
-
-				for(const auto & n : nodes) {
-					local.valmax = max(local.valmax, size_t(n.value));
-					local.valmin = min(local.valmin, size_t(n.value));
-				}
-
-				tally_stats(global, local);
-			}, i++);
-		}
-
-		waitfor(duration, barrier, count);
-
-		for(auto t : threads) {
-			t->join();
-			delete t;
-		}
-
-		enable_stats = false;
-	}
-
-	print_stats(duration, nthread, global);
-
-	// save_fairness(data_out.get(), 100, nthread, width, length, output);
-}
-
-// ================================================================================================
-
-bool iequals(const std::string& a, const std::string& b)
-{
-    return std::equal(a.begin(), a.end(),
-                      b.begin(), b.end(),
-                      [](char a, char b) {
-                          return std::tolower(a) == std::tolower(b);
-                      });
-}
-
-int main(int argc, char * argv[]) {
-
-	double duration   = 5.0;
-	unsigned nthreads = 2;
-	unsigned nqueues  = 4;
-	unsigned nnodes   = 100;
-	unsigned nslots   = 100;
-	std::string out   = "fairness.png";
-
-	enum {
-		Churn,
-		PingPong,
-		Producer,
-		Fairness,
-		NONE
-	} benchmark = NONE;
-
-	std::cout.imbue(std::locale(""));
-
-	for(;;) {
-		static struct option options[] = {
-			{"duration",  required_argument, 0, 'd'},
-			{"nthreads",  required_argument, 0, 't'},
-			{"nqueues",   required_argument, 0, 'q'},
-			{"benchmark", required_argument, 0, 'b'},
-			{0, 0, 0, 0}
-		};
-
-		int idx = 0;
-		int opt = getopt_long(argc, argv, "d:t:q:b:", options, &idx);
-
-		std::string arg = optarg ? optarg : "";
-		size_t len = 0;
-		switch(opt) {
-			// Exit Case
-			case -1:
-				/* paranoid */ assert(optind <= argc);
-				switch(benchmark) {
-				case NONE:
-					std::cerr << "Must specify a benchmark" << std::endl;
-					goto usage;
-				case PingPong:
-					nnodes = 1;
-					switch(argc - optind) {
-					case 0: break;
-					case 1:
-						try {
-							arg = optarg = argv[optind];
-							nnodes = stoul(optarg, &len);
-							if(len != arg.size()) { throw std::invalid_argument(""); }
-						} catch(std::invalid_argument &) {
-							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
-							goto usage;
-						}
-						break;
-					default:
-						std::cerr << "'PingPong' benchmark doesn't accept more than 1 extra arguments" << std::endl;
-						goto usage;
-					}
-					break;
-				case Producer:
-					nnodes = 32;
-					switch(argc - optind) {
-					case 0: break;
-					case 1:
-						try {
-							arg = optarg = argv[optind];
-							nnodes = stoul(optarg, &len);
-							if(len != arg.size()) { throw std::invalid_argument(""); }
-						} catch(std::invalid_argument &) {
-							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
-							goto usage;
-						}
-						break;
-					default:
-						std::cerr << "'Producer' benchmark doesn't accept more than 1 extra arguments" << std::endl;
-						goto usage;
-					}
-					break;
-				case Churn:
-					nnodes = 100;
-					nslots = 100;
-					switch(argc - optind) {
-					case 0: break;
-					case 1:
-						try {
-							arg = optarg = argv[optind];
-							nnodes = stoul(optarg, &len);
-							if(len != arg.size()) { throw std::invalid_argument(""); }
-							nslots = nnodes;
-						} catch(std::invalid_argument &) {
-							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
-							goto usage;
-						}
-						break;
-					case 2:
-						try {
-							arg = optarg = argv[optind];
-							nnodes = stoul(optarg, &len);
-							if(len != arg.size()) { throw std::invalid_argument(""); }
-						} catch(std::invalid_argument &) {
-							std::cerr << "Number of nodes must be a positive integer, was " << arg << std::endl;
-							goto usage;
-						}
-						try {
-							arg = optarg = argv[optind + 1];
-							nslots = stoul(optarg, &len);
-							if(len != arg.size()) { throw std::invalid_argument(""); }
-						} catch(std::invalid_argument &) {
-							std::cerr << "Number of slots must be a positive integer, was " << arg << std::endl;
-							goto usage;
-						}
-						break;
-					default:
-						std::cerr << "'Churn' benchmark doesn't accept more than 2 extra arguments" << std::endl;
-						goto usage;
-					}
-					break;
-				case Fairness:
-					nnodes = 1;
-					switch(argc - optind) {
-					case 0: break;
-					case 1:
-						arg = optarg = argv[optind];
-						out = arg;
-						break;
-					default:
-						std::cerr << "'Churn' benchmark doesn't accept more than 2 extra arguments" << std::endl;
-						goto usage;
-					}
-				}
-				goto run;
-			// Benchmarks
-			case 'b':
-				if(benchmark != NONE) {
-					std::cerr << "Only when benchmark can be run" << std::endl;
-					goto usage;
-				}
-				if(iequals(arg, "churn")) {
-					benchmark = Churn;
-					break;
-				}
-				if(iequals(arg, "pingpong")) {
-					benchmark = PingPong;
-					break;
-				}
-				if(iequals(arg, "producer")) {
-					benchmark = Producer;
-					break;
-				}
-				if(iequals(arg, "fairness")) {
-					benchmark = Fairness;
-					break;
-				}
-				std::cerr << "Unkown benchmark " << arg << std::endl;
-				goto usage;
-			// Numeric Arguments
-			case 'd':
-				try {
-					duration = stod(optarg, &len);
-					if(len != arg.size()) { throw std::invalid_argument(""); }
-				} catch(std::invalid_argument &) {
-					std::cerr << "Duration must be a valid double, was " << arg << std::endl;
-					goto usage;
-				}
-				break;
-			case 't':
-				try {
-					nthreads = stoul(optarg, &len);
-					if(len != arg.size()) { throw std::invalid_argument(""); }
-				} catch(std::invalid_argument &) {
-					std::cerr << "Number of threads must be a positive integer, was " << arg << std::endl;
-					goto usage;
-				}
-				break;
-			case 'q':
-				try {
-					nqueues = stoul(optarg, &len);
-					if(len != arg.size()) { throw std::invalid_argument(""); }
-				} catch(std::invalid_argument &) {
-					std::cerr << "Number of queues must be a positive integer, was " << arg << std::endl;
-					goto usage;
-				}
-				break;
-			// Other cases
-			default: /* ? */
-				std::cerr << opt << std::endl;
-			usage:
-				std::cerr << "Usage: " << argv[0] << ": [options] -b churn [NNODES] [NSLOTS = NNODES]" << std::endl;
-				std::cerr << "  or:  " << argv[0] << ": [options] -b pingpong [NNODES]" << std::endl;
-				std::cerr << "  or:  " << argv[0] << ": [options] -b producer [NNODES]" << std::endl;
-				std::cerr << std::endl;
-				std::cerr << "  -d, --duration=DURATION  Duration of the experiment, in seconds" << std::endl;
-				std::cerr << "  -t, --nthreads=NTHREADS  Number of kernel threads" << std::endl;
-				std::cerr << "  -q, --nqueues=NQUEUES    Number of queues per threads" << std::endl;
-				std::exit(1);
-		}
-	}
-	run:
-
-	check_cache_line_size();
-
-	std::cout << "Running " << nthreads << " threads (" << (nthreads * nqueues) << " queues) for " << duration << " seconds" << std::endl;
-	std::cout << "Relaxed list variant: " << LIST_VARIANT<Node>::name() << std::endl;
-	switch(benchmark) {
-		case Churn:
-			runChurn(nthreads, nqueues, duration, nnodes, nslots);
-			break;
-		case PingPong:
-			runPingPong(nthreads, nqueues, duration, nnodes);
-			break;
-		case Producer:
-			runProducer(nthreads, nqueues, duration, nnodes);
-			break;
-		case Fairness:
-			runFairness(nthreads, nqueues, duration, nnodes, out);
-			break;
-		default:
-			abort();
-	}
-	return 0;
-}
-
-const char * __my_progname = "Relaxed List";
-
-struct rgb_t {
-    double r;       // a fraction between 0 and 1
-    double g;       // a fraction between 0 and 1
-    double b;       // a fraction between 0 and 1
-};
-
-struct hsv_t {
-    double h;       // angle in degrees
-    double s;       // a fraction between 0 and 1
-    double v;       // a fraction between 0 and 1
-};
-
-rgb_t hsv2rgb(hsv_t in) {
-	double hh, p, q, t, ff;
-	long   i;
-	rgb_t  out;
-
-	if(in.s <= 0.0) {       // < is bogus, just shuts up warnings
-		out.r = in.v;
-		out.g = in.v;
-		out.b = in.v;
-		return out;
-	}
-	hh = in.h;
-	if(hh >= 360.0) hh = 0.0;
-	hh /= 60.0;
-	i = (long)hh;
-	ff = hh - i;
-	p = in.v * (1.0 - in.s);
-	q = in.v * (1.0 - (in.s * ff));
-	t = in.v * (1.0 - (in.s * (1.0 - ff)));
-
-	switch(i) {
-	case 0:
-		out.r = in.v;
-		out.g = t;
-		out.b = p;
-		break;
-	case 1:
-		out.r = q;
-		out.g = in.v;
-		out.b = p;
-		break;
-	case 2:
-		out.r = p;
-		out.g = in.v;
-		out.b = t;
-		break;
-
-	case 3:
-		out.r = p;
-		out.g = q;
-		out.b = in.v;
-		break;
-	case 4:
-		out.r = t;
-		out.g = p;
-		out.b = in.v;
-		break;
-	case 5:
-	default:
-		out.r = in.v;
-		out.g = p;
-		out.b = q;
-		break;
-	}
-	return out;
-}
-
-// void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {
-// 	std::ofstream os(output);
-// 	os << "<html>\n";
-// 	os << "<head>\n";
-// 	os << "<style>\n";
-// 	os << "</style>\n";
-// 	os << "</head>\n";
-// 	os << "<body>\n";
-// 	os << "<table style=\"width=100%\">\n";
-
-// 	size_t idx = 0;
-// 	for(size_t r = 0ul; r < rows; r++) {
-// 		os << "<tr>\n";
-// 		for(size_t c = 0ul; c < columns; c++) {
-// 			os << "<td class=\"custom custom" << data[idx] << "\"></td>\n";
-// 			idx++;
-// 		}
-// 		os << "</tr>\n";
-// 	}
-
-// 	os << "</table>\n";
-// 	os << "</body>\n";
-// 	os << "</html>\n";
-// 	os << std::endl;
-// }
-
-// #include <png.h>
-// #include <setjmp.h>
-
-/*
-void save_fairness(const int data[], int factor, unsigned nthreads, size_t columns, size_t rows, const std::string & output) {
-	int width  = columns * factor;
-	int height = rows / factor;
-
-	int code = 0;
-	int idx = 0;
-	FILE *fp = NULL;
-	png_structp png_ptr = NULL;
-	png_infop info_ptr = NULL;
-	png_bytep row = NULL;
-
-	// Open file for writing (binary mode)
-	fp = fopen(output.c_str(), "wb");
-	if (fp == NULL) {
-		fprintf(stderr, "Could not open file %s for writing\n", output.c_str());
-		code = 1;
-		goto finalise;
-	}
-
-	   // Initialize write structure
-	png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
-	if (png_ptr == NULL) {
-		fprintf(stderr, "Could not allocate write struct\n");
-		code = 1;
-		goto finalise;
-	}
-
-	// Initialize info structure
-	info_ptr = png_create_info_struct(png_ptr);
-	if (info_ptr == NULL) {
-		fprintf(stderr, "Could not allocate info struct\n");
-		code = 1;
-		goto finalise;
-	}
-
-	// Setup Exception handling
-	if (setjmp(png_jmpbuf(png_ptr))) {
-		fprintf(stderr, "Error during png creation\n");
-		code = 1;
-		goto finalise;
-	}
-
-	png_init_io(png_ptr, fp);
-
-	// Write header (8 bit colour depth)
-	png_set_IHDR(png_ptr, info_ptr, width, height,
-		8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,
-		PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
-
-	png_write_info(png_ptr, info_ptr);
-
-	// Allocate memory for one row (3 bytes per pixel - RGB)
-	row = (png_bytep) malloc(3 * width * sizeof(png_byte));
-
-	// Write image data
-	int x, y;
-	for (y=0 ; y<height ; y++) {
-		for (x=0 ; x<width ; x++) {
-			auto & r = row[(x * 3) + 0];
-			auto & g = row[(x * 3) + 1];
-			auto & b = row[(x * 3) + 2];
-			assert(idx < (rows * columns));
-			int color = data[idx] - 1;
-			assert(color < nthreads);
-			assert(color >= 0);
-			idx++;
-
-			double angle = double(color) / double(nthreads);
-
-			auto c = hsv2rgb({ 360.0 * angle, 0.8, 0.8 });
-
-			r = char(c.r * 255.0);
-			g = char(c.g * 255.0);
-			b = char(c.b * 255.0);
-
-		}
-		png_write_row(png_ptr, row);
-	}
-
-	assert(idx == (rows * columns));
-
-	// End write
-	png_write_end(png_ptr, NULL);
-
-	finalise:
-	if (fp != NULL) fclose(fp);
-	if (info_ptr != NULL) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
-	if (png_ptr != NULL) png_destroy_write_struct(&png_ptr, (png_infopp)NULL);
-	if (row != NULL) free(row);
-}
-*/
Index: c/theses/thierry_delisle_PhD/code/relaxed_list.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,555 +1,0 @@
-#pragma once
-#define LIST_VARIANT relaxed_list
-
-#define VANILLA 0
-#define SNZI 1
-#define BITMASK 2
-#define DISCOVER 3
-#define SNZM 4
-#define BIAS 5
-#define BACK 6
-#define BACKBIAS 7
-
-#ifndef VARIANT
-#define VARIANT VANILLA
-#endif
-
-#ifndef NO_STATS
-#include <iostream>
-#endif
-
-#include <cmath>
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <thread>
-#include <type_traits>
-
-#include "assert.hpp"
-#include "utils.hpp"
-#include "links.hpp"
-#include "snzi.hpp"
-#include "snzi-packed.hpp"
-#include "snzm.hpp"
-
-using namespace std;
-
-struct pick_stat {
-	struct {
-		size_t attempt = 0;
-		size_t success = 0;
-		size_t local = 0;
-	} push;
-	struct {
-		size_t attempt = 0;
-		size_t success = 0;
-		size_t mask_attempt = 0;
-		size_t mask_reset = 0;
-		size_t local = 0;
-	} pop;
-};
-
-struct empty_stat {
-	struct {
-		size_t value = 0;
-		size_t count = 0;
-	} push;
-	struct {
-		size_t value = 0;
-		size_t count = 0;
-	} pop;
-};
-
-template<typename node_t>
-class __attribute__((aligned(128))) relaxed_list {
-	static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
-
-public:
-	static const char * name() {
-		const char * names[] = {
-			"RELAXED: VANILLA",
-			"RELAXED: SNZI",
-			"RELAXED: BITMASK",
-			"RELAXED: SNZI + DISCOVERED MASK",
-			"RELAXED: SNZI + MASK",
-			"RELAXED: SNZI + LOCAL BIAS",
-			"RELAXED: SNZI + REVERSE RNG",
-			"RELAXED: SNZI + LOCAL BIAS + REVERSE RNG"
-		};
-		return names[VARIANT];
-	}
-
-	relaxed_list(unsigned numThreads, unsigned numQueues)
-		: numLists(numThreads * numQueues)
-	  	, lists(new intrusive_queue_t<node_t>[numLists])
-		#if VARIANT == SNZI || VARIANT == BACK
-			, snzi( std::log2( numLists / (2 * numQueues) ), 2 )
-		#elif VARIANT == BIAS || VARIANT == BACKBIAS
-			#ifdef SNZI_PACKED
-				, snzi( std::ceil( std::log2(numLists) ) )
-			#else
-				, snzi( std::log2( numLists / (2 * numQueues) ), 2 )
-			#endif
-		#elif VARIANT == SNZM || VARIANT == DISCOVER
-			, snzm( numLists )
-		#endif
-	{
-		assertf(7 * 8 * 8 >= numLists, "List currently only supports 448 sublists");
-		std::cout << "Constructing Relaxed List with " << numLists << std::endl;
-	}
-
-	~relaxed_list() {
-		std::cout << "Destroying Relaxed List" << std::endl;
-		lists.reset();
-	}
-
-    	__attribute__((noinline, hot)) void push(node_t * node) {
-		node->_links.ts = rdtscl();
-
-		while(true) {
-			// Pick a random list
-			unsigned i = idx_from_r(tls.rng1.next(), VARIANT == BIAS || VARIANT == BACKBIAS);
-
-			#ifndef NO_STATS
-				tls.pick.push.attempt++;
-			#endif
-
-			// If we can't lock it retry
-			if( !lists[i].lock.try_lock() ) continue;
-
-			#if VARIANT == VANILLA || VARIANT == BITMASK
-				__attribute__((unused)) int num = numNonEmpty;
-			#endif
-
-			// Actually push it
-			if(lists[i].push(node)) {
-				#if VARIANT == DISCOVER
-					size_t qword = i >> 6ull;
-					size_t bit   = i & 63ull;
-					assert(qword == 0);
-					bts(tls.mask, bit);
-					snzm.arrive(i);
-				#elif VARIANT == SNZI || VARIANT == BIAS
-					snzi.arrive(i);
-				#elif VARIANT == BACK || VARIANT == BACKBIAS
-					snzi.arrive(i);
-					tls.rng2.set_raw_state( tls.rng1.get_raw_state());
-				#elif VARIANT == SNZM
-					snzm.arrive(i);
-				#elif VARIANT == BITMASK
-					numNonEmpty++;
-					size_t qword = i >> 6ull;
-					size_t bit   = i & 63ull;
-					assertf((list_mask[qword] & (1ul << bit)) == 0, "Before set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
-					__attribute__((unused)) bool ret = bts(list_mask[qword], bit);
-					assert(!ret);
-					assertf((list_mask[qword] & (1ul << bit)) != 0, "After set %zu:%zu (%u), %zx & %zx", qword, bit, i, list_mask[qword].load(), (1ul << bit));
-				#else
-					numNonEmpty++;
-				#endif
-			}
-			#if VARIANT == VANILLA || VARIANT == BITMASK
-				assert(numNonEmpty <= (int)numLists);
-			#endif
-
-			// Unlock and return
-			lists[i].lock.unlock();
-
-			#ifndef NO_STATS
-				tls.pick.push.success++;
-				#if VARIANT == VANILLA || VARIANT == BITMASK
-					tls.empty.push.value += num;
-					tls.empty.push.count += 1;
-				#endif
-			#endif
-			return;
-		}
-    	}
-
-	__attribute__((noinline, hot)) node_t * pop() {
-		#if VARIANT == DISCOVER
-			assert(numLists <= 64);
-			while(snzm.query()) {
-				tls.pick.pop.mask_attempt++;
-				unsigned i, j;
-				{
-					// Pick first list totally randomly
-					i = tls.rng1.next() % numLists;
-
-					// Pick the other according to the bitmask
-					unsigned r = tls.rng1.next();
-
-					size_t mask = tls.mask.load(std::memory_order_relaxed);
-					if(mask == 0) {
-						tls.pick.pop.mask_reset++;
-						mask = (1U << numLists) - 1;
-						tls.mask.store(mask, std::memory_order_relaxed);
-					}
-
-					unsigned b = rand_bit(r, mask);
-
-					assertf(b < 64, "%zu %u", mask, b);
-
-					j = b;
-
-					assert(j < numLists);
-				}
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-		#elif VARIANT == SNZI
-			while(snzi.query()) {
-				// Pick two lists at random
-				int i = tls.rng1.next() % numLists;
-				int j = tls.rng1.next() % numLists;
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-
-		#elif VARIANT == BACK
-			while(snzi.query()) {
-				// Pick two lists at random
-				int i = tls.rng2.prev() % numLists;
-				int j = tls.rng2.prev() % numLists;
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-
-		#elif VARIANT == BACKBIAS
-			while(snzi.query()) {
-				// Pick two lists at random
-				int i = idx_from_r(tls.rng2.prev(), true);
-				int j = idx_from_r(tls.rng2.prev(), true);
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-
-		#elif VARIANT == BIAS
-			while(snzi.query()) {
-				// Pick two lists at random
-				unsigned ri = tls.rng1.next();
-				unsigned i;
-				unsigned j = tls.rng1.next();
-				if(0 == (ri & 0xF)) {
-					i = (ri >> 4) % numLists;
-				} else {
-					i = tls.my_queue + ((ri >> 4) % 4);
-					j = tls.my_queue + ((j >> 4) % 4);
-					tls.pick.pop.local++;
-				}
-				i %= numLists;
-				j %= numLists;
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-		#elif VARIANT == SNZM
-			//*
-			while(snzm.query()) {
-				tls.pick.pop.mask_attempt++;
-				unsigned i, j;
-				{
-					// Pick two random number
-					unsigned ri = tls.rng1.next();
-					unsigned rj = tls.rng1.next();
-
-					// Pick two nodes from it
-					unsigned wdxi = ri & snzm.mask;
-					// unsigned wdxj = rj & snzm.mask;
-
-					// Get the masks from the nodes
-					// size_t maski = snzm.masks(wdxi);
-					size_t maskj = snzm.masks(wdxj);
-
-					if(maski == 0 && maskj == 0) continue;
-
-					#if defined(__BMI2__)
-						uint64_t idxsi = _pext_u64(snzm.indexes, maski);
-						// uint64_t idxsj = _pext_u64(snzm.indexes, maskj);
-
-						auto pi = __builtin_popcountll(maski);
-						// auto pj = __builtin_popcountll(maskj);
-
-						ri = pi ? ri & ((pi >> 3) - 1) : 0;
-						rj = pj ? rj & ((pj >> 3) - 1) : 0;
-
-						unsigned bi = (idxsi >> (ri << 3)) & 0xff;
-						unsigned bj = (idxsj >> (rj << 3)) & 0xff;
-					#else
-						unsigned bi = rand_bit(ri >> snzm.depth, maski);
-						unsigned bj = rand_bit(rj >> snzm.depth, maskj);
-					#endif
-
-					i = (bi << snzm.depth) | wdxi;
-					j = (bj << snzm.depth) | wdxj;
-
-					/* paranoid */ assertf(i < numLists, "%u %u", bj, wdxi);
-					/* paranoid */ assertf(j < numLists, "%u %u", bj, wdxj);
-				}
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-			/*/
-			while(snzm.query()) {
-				// Pick two lists at random
-				int i = tls.rng1.next() % numLists;
-				int j = tls.rng1.next() % numLists;
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-			//*/
-		#elif VARIANT == BITMASK
-			int nnempty;
-			while(0 != (nnempty = numNonEmpty)) {
-				tls.pick.pop.mask_attempt++;
-				unsigned i, j;
-				{
-					// Pick two lists at random
-					unsigned num = ((numLists - 1) >> 6) + 1;
-
-					unsigned ri = tls.rng1.next();
-					unsigned rj = tls.rng1.next();
-
-					unsigned wdxi = (ri >> 6u) % num;
-					unsigned wdxj = (rj >> 6u) % num;
-
-					size_t maski = list_mask[wdxi].load(std::memory_order_relaxed);
-					size_t maskj = list_mask[wdxj].load(std::memory_order_relaxed);
-
-					if(maski == 0 && maskj == 0) continue;
-
-					unsigned bi = rand_bit(ri, maski);
-					unsigned bj = rand_bit(rj, maskj);
-
-					assertf(bi < 64, "%zu %u", maski, bi);
-					assertf(bj < 64, "%zu %u", maskj, bj);
-
-					i = bi | (wdxi << 6);
-					j = bj | (wdxj << 6);
-
-					assertf(i < numLists, "%u", wdxi << 6);
-					assertf(j < numLists, "%u", wdxj << 6);
-				}
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-		#else
-			while(numNonEmpty != 0) {
-				// Pick two lists at random
-				int i = tls.rng1.next() % numLists;
-				int j = tls.rng1.next() % numLists;
-
-				if(auto node = try_pop(i, j)) return node;
-			}
-		#endif
-
-		return nullptr;
-    	}
-
-private:
-	node_t * try_pop(unsigned i, unsigned j) {
-		#ifndef NO_STATS
-			tls.pick.pop.attempt++;
-		#endif
-
-		#if VARIANT == DISCOVER
-			if(lists[i].ts() > 0) bts(tls.mask, i); else btr(tls.mask, i);
-			if(lists[j].ts() > 0) bts(tls.mask, j); else btr(tls.mask, j);
-		#endif
-
-		// Pick the bet list
-		int w = i;
-		if( __builtin_expect(lists[j].ts() != 0, true) ) {
-			w = (lists[i].ts() < lists[j].ts()) ? i : j;
-		}
-
-		auto & list = lists[w];
-		// If list looks empty retry
-		if( list.ts() == 0 ) return nullptr;
-
-		// If we can't get the lock retry
-		if( !list.lock.try_lock() ) return nullptr;
-
-		#if VARIANT == VANILLA || VARIANT == BITMASK
-			__attribute__((unused)) int num = numNonEmpty;
-		#endif
-
-		// If list is empty, unlock and retry
-		if( list.ts() == 0 ) {
-			list.lock.unlock();
-			return nullptr;
-		}
-
-		// Actually pop the list
-		node_t * node;
-		bool emptied;
-		std::tie(node, emptied) = list.pop();
-		assert(node);
-
-		if(emptied) {
-			#if VARIANT == DISCOVER
-				size_t qword = w >> 6ull;
-				size_t bit   = w & 63ull;
-				assert(qword == 0);
-				__attribute__((unused)) bool ret = btr(tls.mask, bit);
-				snzm.depart(w);
-			#elif VARIANT == SNZI || VARIANT == BIAS || VARIANT == BACK || VARIANT == BACKBIAS
-				snzi.depart(w);
-			#elif VARIANT == SNZM
-				snzm.depart(w);
-			#elif VARIANT == BITMASK
-				numNonEmpty--;
-				size_t qword = w >> 6ull;
-				size_t bit   = w & 63ull;
-				assert((list_mask[qword] & (1ul << bit)) != 0);
-				__attribute__((unused)) bool ret = btr(list_mask[qword], bit);
-				assert(ret);
-				assert((list_mask[qword] & (1ul << bit)) == 0);
-			#else
-				numNonEmpty--;
-			#endif
-		}
-
-		// Unlock and return
-		list.lock.unlock();
-		#if VARIANT == VANILLA || VARIANT == BITMASK
-			assert(numNonEmpty >= 0);
-		#endif
-		#ifndef NO_STATS
-			tls.pick.pop.success++;
-			#if VARIANT == VANILLA || VARIANT == BITMASK
-				tls.empty.pop.value += num;
-				tls.empty.pop.count += 1;
-			#endif
-		#endif
-		return node;
-	}
-
-	inline unsigned idx_from_r(unsigned r, bool bias) {
-		unsigned i;
-		if(bias) {
-			if(0 == (r & 0x3F)) {
-				i = r >> 6;
-			} else {
-				i = tls.my_queue + ((r >> 6) % 4);
-				tls.pick.push.local++;
-			}
-		} else {
-			i = r;
-		}
-		return i % numLists;
-	}
-
-public:
-
-	static __attribute__((aligned(128))) thread_local struct TLS {
-		Random     rng1 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
-		Random     rng2 = { unsigned(std::hash<std::thread::id>{}(std::this_thread::get_id()) ^ rdtscl()) };
-		unsigned   my_queue = (ticket++) * 4;
-		pick_stat  pick;
-		empty_stat empty;
-		__attribute__((aligned(64))) std::atomic_size_t mask = { 0 };
-	} tls;
-
-private:
-	const unsigned numLists;
-    	__attribute__((aligned(64))) std::unique_ptr<intrusive_queue_t<node_t> []> lists;
-private:
-	#if VARIANT == SNZI || VARIANT == BACK
-		snzi_t snzi;
-	#elif VARIANT == BIAS || VARIANT == BACKBIAS
-		#ifdef SNZI_PACKED
-			snzip_t snzi;
-		#else
-			snzi_t snzi;
-		#endif
-	#elif VARIANT == SNZM || VARIANT == DISCOVER
-		snzm_t snzm;
-	#else
-		std::atomic_int numNonEmpty  = { 0 };  // number of non-empty lists
-	#endif
-	#if VARIANT == BITMASK
-		std::atomic_size_t list_mask[7] = { {0}, {0}, {0}, {0}, {0}, {0}, {0} }; // which queues are empty
-	#endif
-
-public:
-	static const constexpr size_t sizeof_queue = sizeof(intrusive_queue_t<node_t>);
-	static std::atomic_uint32_t ticket;
-
-#ifndef NO_STATS
-	static void stats_tls_tally() {
-		global_stats.pick.push.attempt += tls.pick.push.attempt;
-		global_stats.pick.push.success += tls.pick.push.success;
-		global_stats.pick.push.local += tls.pick.push.local;
-		global_stats.pick.pop .attempt += tls.pick.pop.attempt;
-		global_stats.pick.pop .success += tls.pick.pop.success;
-		global_stats.pick.pop .mask_attempt += tls.pick.pop.mask_attempt;
-		global_stats.pick.pop .mask_reset += tls.pick.pop.mask_reset;
-		global_stats.pick.pop .local += tls.pick.pop.local;
-
-		global_stats.qstat.push.value += tls.empty.push.value;
-		global_stats.qstat.push.count += tls.empty.push.count;
-		global_stats.qstat.pop .value += tls.empty.pop .value;
-		global_stats.qstat.pop .count += tls.empty.pop .count;
-	}
-
-private:
-	static struct GlobalStats {
-		struct {
-			struct {
-				std::atomic_size_t attempt = { 0 };
-				std::atomic_size_t success = { 0 };
-				std::atomic_size_t local = { 0 };
-			} push;
-			struct {
-				std::atomic_size_t attempt = { 0 };
-				std::atomic_size_t success = { 0 };
-				std::atomic_size_t mask_attempt = { 0 };
-				std::atomic_size_t mask_reset = { 0 };
-				std::atomic_size_t local = { 0 };
-			} pop;
-		} pick;
-		struct {
-			struct {
-				std::atomic_size_t value = { 0 };
-				std::atomic_size_t count = { 0 };
-			} push;
-			struct {
-				std::atomic_size_t value = { 0 };
-				std::atomic_size_t count = { 0 };
-			} pop;
-		} qstat;
-	} global_stats;
-
-public:
-	static void stats_print(std::ostream & os ) {
-		std::cout << "----- Relaxed List Stats -----" << std::endl;
-
-		const auto & global = global_stats;
-
-		double push_sur = (100.0 * double(global.pick.push.success) / global.pick.push.attempt);
-		double pop_sur  = (100.0 * double(global.pick.pop .success) / global.pick.pop .attempt);
-		double mpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_attempt);
-		double rpop_sur = (100.0 * double(global.pick.pop .success) / global.pick.pop .mask_reset);
-
-		double push_len = double(global.pick.push.attempt     ) / global.pick.push.success;
-		double pop_len  = double(global.pick.pop .attempt     ) / global.pick.pop .success;
-		double mpop_len = double(global.pick.pop .mask_attempt) / global.pick.pop .success;
-		double rpop_len = double(global.pick.pop .mask_reset  ) / global.pick.pop .success;
-
-		os << "Push   Pick   : " << push_sur << " %, len " << push_len << " (" << global.pick.push.attempt      << " / " << global.pick.push.success << ")\n";
-		os << "Pop    Pick   : " << pop_sur  << " %, len " << pop_len  << " (" << global.pick.pop .attempt      << " / " << global.pick.pop .success << ")\n";
-		os << "TryPop Pick   : " << mpop_sur << " %, len " << mpop_len << " (" << global.pick.pop .mask_attempt << " / " << global.pick.pop .success << ")\n";
-		os << "Pop M Reset   : " << rpop_sur << " %, len " << rpop_len << " (" << global.pick.pop .mask_reset   << " / " << global.pick.pop .success << ")\n";
-
-		double avgQ_push = double(global.qstat.push.value) / global.qstat.push.count;
-		double avgQ_pop  = double(global.qstat.pop .value) / global.qstat.pop .count;
-		double avgQ      = double(global.qstat.push.value + global.qstat.pop .value) / (global.qstat.push.count + global.qstat.pop .count);
-		os << "Push   Avg Qs : " << avgQ_push << " (" << global.qstat.push.count << "ops)\n";
-		os << "Pop    Avg Qs : " << avgQ_pop  << " (" << global.qstat.pop .count << "ops)\n";
-		os << "Global Avg Qs : " << avgQ      << " (" << (global.qstat.push.count + global.qstat.pop .count) << "ops)\n";
-
-		os << "Local Push    : " << global.pick.push.local << "\n";
-		os << "Local Pop     : " << global.pick.pop .local << "\n";
-	}
-#endif
-};
Index: c/theses/thierry_delisle_PhD/code/relaxed_list_layout.cpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/relaxed_list_layout.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,23 +1,0 @@
-#define NO_IO
-#define NDEBUG
-#include "relaxed_list.hpp"
-
-struct __attribute__((aligned(64))) Node {
-	static std::atomic_size_t creates;
-	static std::atomic_size_t destroys;
-
-	_LinksFields_t<Node> _links;
-
-	int value;
-	Node(int value): value(value) {
-		creates++;
-	}
-
-	~Node() {
-		destroys++;
-	}
-};
-
-int main() {
-	return sizeof(relaxed_list<Node>) + relaxed_list<Node>::sizeof_queue;
-}
Index: c/theses/thierry_delisle_PhD/code/runperf.sh
===================================================================
--- doc/theses/thierry_delisle_PhD/code/runperf.sh	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,14 +1,0 @@
-#!/bin/bash
-set -e
-
-name=$1
-event=$2
-
-shift 2
-
-echo "perf record -F 99 -a -g -o raw/$name.data -e $event -- $@ > raw/$name.out"
-perf record -F 99 -a -g -o raw/$name.data -e $event -- $@ > raw/$name.out
-echo "=============================="
-cat raw/$name.out
-echo "=============================="
-./process.sh $name
Index: c/theses/thierry_delisle_PhD/code/scale.sh
===================================================================
--- doc/theses/thierry_delisle_PhD/code/scale.sh	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,7 +1,0 @@
-#!/bin/bash
-taskset -c 24-31 ./a.out -t  1 -b churn | grep --color -E "(ns|Ops|Running)"
-taskset -c 24-31 ./a.out -t  2 -b churn | grep --color -E "(ns|Ops|Running)"
-taskset -c 24-31 ./a.out -t  4 -b churn | grep --color -E "(ns|Ops|Running)"
-taskset -c 24-31 ./a.out -t  8 -b churn | grep --color -E "(ns|Ops|Running)"
-taskset -c 16-31 ./a.out -t 16 -b churn | grep --color -E "(ns|Ops|Running)"
-taskset -c  0-31 ./a.out -t 32 -b churn | grep --color -E "(ns|Ops|Running)"
Index: c/theses/thierry_delisle_PhD/code/snzi-packed.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/snzi-packed.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,179 +1,0 @@
-#pragma once
-
-#define SNZI_PACKED
-
-#include "utils.hpp"
-
-
-class snzip_t {
-	class node;
-	class node_aligned;
-public:
-	const unsigned mask;
-	const int root;
-	std::unique_ptr<snzip_t::node[]> leafs;
-	std::unique_ptr<snzip_t::node_aligned[]> nodes;
-
-	snzip_t(unsigned depth);
-
-	void arrive(int idx) {
-		// idx >>= 1;
-		idx %= mask;
-		leafs[idx].arrive();
-	}
-
-	void depart(int idx) {
-		// idx >>= 1;
-		idx %= mask;
-		leafs[idx].depart();
-	}
-
-	bool query() const {
-		return nodes[root].query();
-	}
-
-
-private:
-	class __attribute__((aligned(32))) node {
-		friend class snzip_t;
-	private:
-
-		union val_t {
-			static constexpr char Half = -1;
-
-			uint64_t _all;
-			struct __attribute__((packed)) {
-				char cnt;
-				uint64_t ver:56;
-			};
-
-			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
-				val_t t;
-				t.ver = _ver;
-				t.cnt = _cnt;
-				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
-				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
-			}
-
-			bool cas(val_t & exp, const val_t & tar) volatile {
-				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
-			}
-
-			val_t() : _all(0) {}
-			val_t(const volatile val_t & o) : _all(o._all) {}
-		};
-
-		//--------------------------------------------------
-		// Hierarchical node
-		void arrive_h() {
-			int undoArr = 0;
-			bool success = false;
-			while(!success) {
-				auto x{ value };
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt >= 1 ) {
-					if( value.cas(x, x.cnt + 1, x.ver ) ) {
-						success = true;
-					}
-				}
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt == 0 ) {
-					if( value.cas(x, val_t::Half, x.ver + 1) ) {
-						success = true;
-						x.cnt = val_t::Half;
-						x.ver = x.ver + 1;
-					}
-				}
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt == val_t::Half ) {
-					/* paranoid */ assert(parent);
-					if(undoArr == 2) {
-						undoArr--;
-					} else {
-						parent->arrive();
-					}
-					if( !value.cas(x, 1, x.ver) ) {
-						undoArr = undoArr + 1;
-					}
-				}
-			}
-
-			for(int i = 0; i < undoArr; i++) {
-				/* paranoid */ assert(parent);
-				parent->depart();
-			}
-		}
-
-		void depart_h() {
-			while(true) {
-				auto x = (const val_t)value;
-				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
-				if( value.cas( x, x.cnt - 1, x.ver ) ) {
-					if( x.cnt == 1 ) {
-						/* paranoid */ assert(parent);
-						parent->depart();
-					}
-					return;
-				}
-			}
-		}
-
-		//--------------------------------------------------
-		// Root node
-		void arrive_r() {
-			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
-		}
-
-		void depart_r() {
-			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
-		}
-
-	private:
-		volatile val_t value;
-		class node * parent = nullptr;
-
-		bool is_root() {
-			return parent == nullptr;
-		}
-
-	public:
-		void arrive() {
-			if(is_root()) arrive_r();
-			else arrive_h();
-		}
-
-		void depart() {
-			if(is_root()) depart_r();
-			else depart_h();
-		}
-
-		bool query() {
-			/* paranoid */ assert(is_root());
-			return value._all > 0;
-		}
-	};
-
-	class __attribute__((aligned(128))) node_aligned : public node {};
-};
-
-snzip_t::snzip_t(unsigned depth)
-	: mask( std::pow(2, depth) )
-	, root( ((std::pow(2, depth + 1) - 1) / (2 -1)) - 1 - mask )
-	, leafs(new node[ mask ]())
-	, nodes(new node_aligned[ root + 1 ]())
-{
-	int width = std::pow(2, depth);
-	int hwdith = width / 2;
-	std::cout << "SNZI: " << depth << "x" << width << "(" << mask - 1 << ") " << (sizeof(snzip_t::node) * (root + 1)) << " bytes" << std::endl;
-	for(int i = 0; i < width; i++) {
-		int idx = i % hwdith;
-		std::cout << i << " -> " << idx + width << std::endl;
-		leafs[i].parent = &nodes[ idx ];
-	}
-
-	for(int i = 0; i < root; i++) {
-		int idx = (i / 2) + hwdith;
-		std::cout << i + width << " -> " << idx + width << std::endl;
-		nodes[i].parent = &nodes[ idx ];
-	}
-}
Index: c/theses/thierry_delisle_PhD/code/snzi.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/snzi.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,164 +1,0 @@
-#pragma once
-
-#include "utils.hpp"
-
-
-class snzi_t {
-	class node;
-public:
-	const unsigned mask;
-	const int root;
-	std::unique_ptr<snzi_t::node[]> nodes;
-
-	snzi_t(unsigned depth, unsigned base = 2);
-
-	void arrive(int idx) {
-		idx >>= 2;
-		idx %= mask;
-		nodes[idx].arrive();
-	}
-
-	void depart(int idx) {
-		idx >>= 2;
-		idx %= mask;
-		nodes[idx].depart();
-	}
-
-	bool query() const {
-		return nodes[root].query();
-	}
-
-
-private:
-	class __attribute__((aligned(128))) node {
-		friend class snzi_t;
-	private:
-
-		union val_t {
-			static constexpr char Half = -1;
-
-			uint64_t _all;
-			struct __attribute__((packed)) {
-				char cnt;
-				uint64_t ver:56;
-			};
-
-			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
-				val_t t;
-				t.ver = _ver;
-				t.cnt = _cnt;
-				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
-				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
-			}
-
-			bool cas(val_t & exp, const val_t & tar) volatile {
-				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
-			}
-
-			val_t() : _all(0) {}
-			val_t(const volatile val_t & o) : _all(o._all) {}
-		};
-
-		//--------------------------------------------------
-		// Hierarchical node
-		void arrive_h() {
-			int undoArr = 0;
-			bool success = false;
-			while(!success) {
-				auto x{ value };
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt >= 1 ) {
-					if( value.cas(x, x.cnt + 1, x.ver ) ) {
-						success = true;
-					}
-				}
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt == 0 ) {
-					if( value.cas(x, val_t::Half, x.ver + 1) ) {
-						success = true;
-						x.cnt = val_t::Half;
-						x.ver = x.ver + 1;
-					}
-				}
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt == val_t::Half ) {
-					/* paranoid */ assert(parent);
-					if(undoArr == 2) {
-						undoArr--;
-					} else {
-						parent->arrive();
-					}
-					if( !value.cas(x, 1, x.ver) ) {
-						undoArr = undoArr + 1;
-					}
-				}
-			}
-
-			for(int i = 0; i < undoArr; i++) {
-				/* paranoid */ assert(parent);
-				parent->depart();
-			}
-		}
-
-		void depart_h() {
-			while(true) {
-				auto x = (const val_t)value;
-				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
-				if( value.cas( x, x.cnt - 1, x.ver ) ) {
-					if( x.cnt == 1 ) {
-						/* paranoid */ assert(parent);
-						parent->depart();
-					}
-					return;
-				}
-			}
-		}
-
-		//--------------------------------------------------
-		// Root node
-		void arrive_r() {
-			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
-		}
-
-		void depart_r() {
-			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
-		}
-
-	private:
-		volatile val_t value;
-		class node * parent = nullptr;
-
-		bool is_root() {
-			return parent == nullptr;
-		}
-
-	public:
-		void arrive() {
-			if(is_root()) arrive_r();
-			else arrive_h();
-		}
-
-		void depart() {
-			if(is_root()) depart_r();
-			else depart_h();
-		}
-
-		bool query() {
-			/* paranoid */ assert(is_root());
-			return value._all > 0;
-		}
-	};
-};
-
-snzi_t::snzi_t(unsigned depth, unsigned base)
-	: mask( std::pow(base, depth) )
-	, root( ((std::pow(base, depth + 1) - 1) / (base -1)) - 1 )
-	, nodes(new node[ root + 1 ]())
-{
-	int width = std::pow(base, depth);
-	std::cout << "SNZI: " << depth << "x" << width << "(" << mask - 1 << ") " << (sizeof(snzi_t::node) * (root + 1)) << " bytes" << std::endl;
-	for(int i = 0; i < root; i++) {
-		std::cout << i << " -> " << (i / base) + width << std::endl;
-		nodes[i].parent = &nodes[(i / base) + width];
-	}
-}
Index: c/theses/thierry_delisle_PhD/code/snzm.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/snzm.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,213 +1,0 @@
-#pragma once
-
-#include "utils.hpp"
-
-
-class snzm_t {
-	class node;
-public:
-	const unsigned depth;
-	const unsigned mask;
-	const int root;
-	std::unique_ptr<snzm_t::node[]> nodes;
-
-	#if defined(__BMI2__)
-		const uint64_t indexes = 0x0706050403020100;
-	#endif
-
-	snzm_t(unsigned numLists);
-
-	void arrive(int idx) {
-		int i = idx & mask;
-		nodes[i].arrive( idx >> depth);
-	}
-
-	void depart(int idx) {
-		int i = idx & mask;
-		nodes[i].depart( idx >> depth );
-	}
-
-	bool query() const {
-		return nodes[root].query();
-	}
-
-	uint64_t masks( unsigned node ) {
-		/* paranoid */ assert( (node & mask) == node );
-		#if defined(__BMI2__)
-			return nodes[node].mask_all;
-		#else
-			return nodes[node].mask;
-		#endif
-	}
-
-private:
-	class __attribute__((aligned(128))) node {
-		friend class snzm_t;
-	private:
-
-		union val_t {
-			static constexpr char Half = -1;
-
-			uint64_t _all;
-			struct __attribute__((packed)) {
-				char cnt;
-				uint64_t ver:56;
-			};
-
-			bool cas(val_t & exp, char _cnt, uint64_t _ver) volatile {
-				val_t t;
-				t.ver = _ver;
-				t.cnt = _cnt;
-				/* paranoid */ assert(t._all == ((_ver << 8) | ((unsigned char)_cnt)));
-				return __atomic_compare_exchange_n(&this->_all, &exp._all, t._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
-			}
-
-			bool cas(val_t & exp, const val_t & tar) volatile {
-				return __atomic_compare_exchange_n(&this->_all, &exp._all, tar._all, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
-			}
-
-			val_t() : _all(0) {}
-			val_t(const volatile val_t & o) : _all(o._all) {}
-		};
-
-		//--------------------------------------------------
-		// Hierarchical node
-		void arrive_h() {
-			int undoArr = 0;
-			bool success = false;
-			while(!success) {
-				auto x{ value };
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt >= 1 ) {
-					if( value.cas(x, x.cnt + 1, x.ver ) ) {
-						success = true;
-					}
-				}
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt == 0 ) {
-					if( value.cas(x, val_t::Half, x.ver + 1) ) {
-						success = true;
-						x.cnt = val_t::Half;
-						x.ver = x.ver + 1;
-					}
-				}
-				/* paranoid */ assert(x.cnt <= 120);
-				if( x.cnt == val_t::Half ) {
-					/* paranoid */ assert(parent);
-					parent->arrive();
-					if( !value.cas(x, 1, x.ver) ) {
-						undoArr = undoArr + 1;
-					}
-				}
-			}
-
-			for(int i = 0; i < undoArr; i++) {
-				/* paranoid */ assert(parent);
-				parent->depart();
-			}
-		}
-
-		void depart_h() {
-			while(true) {
-				auto x = (const val_t)value;
-				/* paranoid */ assertf(x.cnt >= 1, "%d", x.cnt);
-				if( value.cas( x, x.cnt - 1, x.ver ) ) {
-					if( x.cnt == 1 ) {
-						/* paranoid */ assert(parent);
-						parent->depart();
-					}
-					return;
-				}
-			}
-		}
-
-		//--------------------------------------------------
-		// Root node
-		void arrive_r() {
-			__atomic_fetch_add(&value._all, 1, __ATOMIC_SEQ_CST);
-		}
-
-		void depart_r() {
-			__atomic_fetch_sub(&value._all, 1, __ATOMIC_SEQ_CST);
-		}
-
-		//--------------------------------------------------
-		// Interface node
-		void arrive() {
-			/* paranoid */ assert(!is_leaf);
-			if(is_root()) arrive_r();
-			else arrive_h();
-		}
-
-		void depart() {
-			/* paranoid */ assert(!is_leaf);
-			if(is_root()) depart_r();
-			else depart_h();
-		}
-
-	private:
-		volatile val_t value;
-		#if defined(__BMI2__)
-			union __attribute__((packed)) {
-				volatile uint8_t mask[8];
-				volatile uint64_t mask_all;
-			};
-		#else
-			volatile size_t mask = 0;
-		#endif
-
-		class node * parent = nullptr;
-		bool is_leaf = false;
-
-		bool is_root() {
-			return parent == nullptr;
-		}
-
-	public:
-		void arrive( int bit ) {
-			/* paranoid */ assert( is_leaf );
-
-			arrive_h();
-			#if defined(__BMI2__)
-				/* paranoid */ assert( bit < 8 );
-				mask[bit] = 0xff;
-			#else
-				/* paranoid */ assert( (mask & ( 1 << bit )) == 0 );
-				__atomic_fetch_add( &mask, 1 << bit, __ATOMIC_RELAXED );
-			#endif
-
-		}
-
-		void depart( int bit ) {
-			/* paranoid */ assert( is_leaf );
-
-			#if defined(__BMI2__)
-				/* paranoid */ assert( bit < 8 );
-				mask[bit] = 0x00;
-			#else
-				/* paranoid */ assert( (mask & ( 1 << bit )) != 0 );
-				__atomic_fetch_sub( &mask, 1 << bit, __ATOMIC_RELAXED );
-			#endif
-			depart_h();
-		}
-
-		bool query() {
-			/* paranoid */ assert(is_root());
-			return value._all > 0;
-		}
-	};
-};
-
-snzm_t::snzm_t(unsigned numLists)
-	: depth( std::log2( numLists / 8 ) )
-	, mask( (1 << depth) - 1 )
-	, root( (1 << (depth + 1)) - 2 )
-	, nodes(new node[ root + 1 ]())
-{
-	int width = 1 << depth;
-	std::cout << "SNZI with Mask: " << depth << "x" << width << "(" << mask << ")" << std::endl;
-	for(int i = 0; i < root; i++) {
-		nodes[i].is_leaf = i < width;
-		nodes[i].parent = &nodes[(i / 2) + width ];
-	}
-}
Index: c/theses/thierry_delisle_PhD/code/utils.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/utils.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,250 +1,0 @@
-#pragma once
-
-#include <cassert>
-#include <cstddef>
-#include <atomic>
-#include <chrono>
-#include <fstream>
-#include <iostream>
-
-#include <unistd.h>
-#include <sys/sysinfo.h>
-
-#include <x86intrin.h>
-
-// Barrier from
-class barrier_t {
-public:
-	barrier_t(size_t total)
-		: waiting(0)
-		, total(total)
-	{}
-
-	void wait(unsigned) {
-		size_t target = waiting++;
-		target = (target - (target % total)) + total;
-		while(waiting < target)
-			asm volatile("pause");
-
-		assert(waiting < (1ul << 60));
-    	}
-
-private:
-	std::atomic<size_t> waiting;
-	size_t total;
-};
-
-// class Random {
-// private:
-// 	unsigned int seed;
-// public:
-// 	Random(int seed) {
-// 		this->seed = seed;
-// 	}
-
-// 	/** returns pseudorandom x satisfying 0 <= x < n. **/
-// 	unsigned int next() {
-// 		seed ^= seed << 6;
-// 		seed ^= seed >> 21;
-// 		seed ^= seed << 7;
-// 		return seed;
-//     	}
-// };
-
-constexpr uint64_t extendedEuclidY(uint64_t a, uint64_t b);
-constexpr uint64_t extendedEuclidX(uint64_t a, uint64_t b){
-    return (b==0) ? 1 : extendedEuclidY(b, a - b * (a / b));
-}
-constexpr uint64_t extendedEuclidY(uint64_t a, uint64_t b){
-    return (b==0) ? 0 : extendedEuclidX(b, a - b * (a / b)) - (a / b) * extendedEuclidY(b, a - b * (a / b));
-}
-
-class Random {
-private:
-	uint64_t x;
-
-	static constexpr const uint64_t M  = 1ul << 48ul;
-	static constexpr const uint64_t A  = 25214903917;
-	static constexpr const uint64_t C  = 11;
-	static constexpr const uint64_t D  = 16;
-
-public:
-	static constexpr const uint64_t m  = M;
-	static constexpr const uint64_t a  = A;
-	static constexpr const uint64_t c  = C;
-	static constexpr const uint64_t d  = D;
-	static constexpr const uint64_t ai = extendedEuclidX(A, M);
-public:
-	Random(unsigned int seed) {
-		this->x = seed * a;
-	}
-
-	/** returns pseudorandom x satisfying 0 <= x < n. **/
-	unsigned int next() {
-		//nextx = (a * x + c) % m;
-		x = (A * x + C) & (M - 1);
-		return x >> D;
-	}
-	unsigned int prev() {
-		//prevx = (ainverse * (x - c)) mod m
-		unsigned int r = x >> D;
-		x = ai * (x - C) & (M - 1);
-		return r;
-	}
-
-	void set_raw_state(uint64_t _x) {
-		this->x = _x;
-	}
-
-	uint64_t get_raw_state() {
-		return this->x;
-	}
-};
-
-static inline long long rdtscl(void) {
-    unsigned int lo, hi;
-    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
-    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
-}
-
-static inline void affinity(int tid) {
-	static int cpus = get_nprocs();
-
-	cpu_set_t  mask;
-	CPU_ZERO(&mask);
-	int cpu = cpus - tid;  // Set CPU affinity to tid, starting from the end
-	CPU_SET(cpu, &mask);
-	auto result = sched_setaffinity(0, sizeof(mask), &mask);
-	if(result != 0) {
-		std::cerr << "Affinity set failed with " << result<< ", wanted " << cpu << std::endl;
-	}
-}
-
-static const constexpr std::size_t cache_line_size = 64;
-static inline void check_cache_line_size() {
-	std::cout << "Checking cache line size" << std::endl;
-	const std::string cache_file = "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size";
-
-	std::ifstream ifs (cache_file, std::ifstream::in);
-
-	if(!ifs.good()) {
-		std::cerr << "Could not open file to check cache line size" << std::endl;
-		std::cerr << "Looking for: " << cache_file << std::endl;
-		std::exit(2);
-	}
-
-	size_t got;
-	ifs >> got;
-
-	ifs.close();
-
-	if(cache_line_size != got) {
-		std::cerr << "Cache line has incorrect size : " << got << std::endl;
-		std::exit(1);
-	}
-
-	std::cout << "Done" << std::endl;
-}
-
-using Clock = std::chrono::high_resolution_clock;
-using duration_t = std::chrono::duration<double>;
-using std::chrono::nanoseconds;
-
-template<typename Ratio, typename T>
-T duration_cast(T seconds) {
-	return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
-}
-
-static inline unsigned rand_bit(unsigned rnum, size_t mask) __attribute__((artificial));
-static inline unsigned rand_bit(unsigned rnum, size_t mask) {
-	unsigned bit = mask ? rnum % __builtin_popcountl(mask) : 0;
-#if !defined(__BMI2__)
-	uint64_t v = mask;   // Input value to find position with rank r.
-	unsigned int r = bit + 1;// Input: bit's desired rank [1-64].
-	unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
-	uint64_t a, b, c, d; // Intermediate temporaries for bit count.
-	unsigned int t;      // Bit count temporary.
-
-	// Do a normal parallel bit count for a 64-bit integer,
-	// but store all intermediate steps.
-	a =  v - ((v >> 1) & ~0UL/3);
-	b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
-	c = (b + (b >> 4)) & ~0UL/0x11;
-	d = (c + (c >> 8)) & ~0UL/0x101;
-
-
-	t = (d >> 32) + (d >> 48);
-	// Now do branchless select!
-	s  = 64;
-	s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
-	t  = (d >> (s - 16)) & 0xff;
-	s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
-	t  = (c >> (s - 8)) & 0xf;
-	s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
-	t  = (b >> (s - 4)) & 0x7;
-	s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
-	t  = (a >> (s - 2)) & 0x3;
-	s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
-	t  = (v >> (s - 1)) & 0x1;
-	s -= ((t - r) & 256) >> 8;
-	return s - 1;
-#else
-	uint64_t picked = _pdep_u64(1ul << bit, mask);
-	return picked ? __builtin_ctzl(picked) : 0;
-#endif
-}
-
-struct spinlock_t {
-	std::atomic_bool ll = { false };
-
-	inline void lock() {
-		while( __builtin_expect(ll.exchange(true),false) ) {
-			while(ll.load(std::memory_order_relaxed))
-				asm volatile("pause");
-		}
-	}
-
-	inline bool try_lock() {
-		return false == ll.exchange(true);
-	}
-
-	inline void unlock() {
-		ll.store(false, std::memory_order_release);
-	}
-
-	inline explicit operator bool() {
-		return ll.load(std::memory_order_relaxed);
-	}
-};
-
-static inline bool bts(std::atomic_size_t & target, size_t bit ) {
-	//*
-	int result = 0;
-	asm volatile(
-		"LOCK btsq %[bit], %[target]\n\t"
-		:"=@ccc" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
-	return result != 0;
-	/*/
-	size_t mask = 1ul << bit;
-	size_t ret = target.fetch_or(mask, std::memory_order_relaxed);
-	return (ret & mask) != 0;
-	//*/
-}
-
-static inline bool btr(std::atomic_size_t & target, size_t bit ) {
-	//*
-	int result = 0;
-	asm volatile(
-		"LOCK btrq %[bit], %[target]\n\t"
-		:"=@ccc" (result)
-		: [target] "m" (target), [bit] "r" (bit)
-	);
-	return result != 0;
-	/*/
-	size_t mask = 1ul << bit;
-	size_t ret = target.fetch_and(~mask, std::memory_order_relaxed);
-	return (ret & mask) != 0;
-	//*/
-}
Index: c/theses/thierry_delisle_PhD/code/work_stealing.hpp
===================================================================
--- doc/theses/thierry_delisle_PhD/code/work_stealing.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,222 +1,0 @@
-#pragma once
-#define LIST_VARIANT work_stealing
-
-#include <cmath>
-#include <iomanip>
-#include <memory>
-#include <mutex>
-#include <type_traits>
-
-#include "assert.hpp"
-#include "utils.hpp"
-#include "links.hpp"
-#include "snzi.hpp"
-
-using namespace std;
-
-template<typename node_t>
-class __attribute__((aligned(128))) work_stealing {
-	static_assert(std::is_same<decltype(node_t::_links), _LinksFields_t<node_t>>::value, "Node must have a links field");
-
-public:
-	static const char * name() {
-		return "Work Stealing";
-	}
-
-	work_stealing(unsigned _numThreads, unsigned)
-		: numThreads(_numThreads)
-		, lists(new intrusive_queue_t<node_t>[numThreads])
-		, snzi( std::log2( numThreads / 2 ), 2 )
-
-	{
-		std::cout << "Constructing Work Stealer with " << numThreads << std::endl;
-	}
-
-	~work_stealing() {
-		std::cout << "Destroying Work Stealer" << std::endl;
-		lists.reset();
-	}
-
-	__attribute__((noinline, hot)) void push(node_t * node) {
-		node->_links.ts = rdtscl();
-		if( node->_links.hint > numThreads ) {
-			node->_links.hint = tls.rng.next() % numThreads;
-			tls.stat.push.nhint++;
-		}
-
-		unsigned i = node->_links.hint;
-		auto & list = lists[i];
-		list.lock.lock();
-
-		if(list.push( node )) {
-			snzi.arrive(i);
-		}
-
-		list.lock.unlock();
-	}
-
-	__attribute__((noinline, hot)) node_t * pop() {
-		node_t * node;
-		while(true) {
-			if(!snzi.query()) {
-				return nullptr;
-			}
-
-			{
-				unsigned i = tls.my_queue;
-				auto & list = lists[i];
-				if( list.ts() != 0 ) {
-					list.lock.lock();
-					if((node = try_pop(i))) {
-						tls.stat.pop.local.success++;
-						break;
-					}
-					else {
-						tls.stat.pop.local.elock++;
-					}
-				}
-				else {
-					tls.stat.pop.local.espec++;
-				}
-			}
-
-			tls.stat.pop.steal.tried++;
-
-			int i = tls.rng.next() % numThreads;
-			auto & list = lists[i];
-			if( list.ts() == 0 ) {
-				tls.stat.pop.steal.empty++;
-				continue;
-			}
-
-			if( !list.lock.try_lock() ) {
-				tls.stat.pop.steal.locked++;
-				continue;
-			}
-
-			if((node = try_pop(i))) {
-				tls.stat.pop.steal.success++;
-				break;
-			}
-		}
-
-		#if defined(READ)
-			const unsigned f = READ;
-			if(0 == (tls.it % f)) {
-				unsigned i = tls.it / f;
-				lists[i % numThreads].ts();
-			}
-			// lists[tls.it].ts();
-			tls.it++;
-		#endif
-
-
-		return node;
-	}
-
-private:
-	node_t * try_pop(unsigned i) {
-		auto & list = lists[i];
-
-		// If list is empty, unlock and retry
-		if( list.ts() == 0 ) {
-			list.lock.unlock();
-			return nullptr;
-		}
-
-			// Actually pop the list
-		node_t * node;
-		bool emptied;
-		std::tie(node, emptied) = list.pop();
-		assert(node);
-
-		if(emptied) {
-			snzi.depart(i);
-		}
-
-		// Unlock and return
-		list.lock.unlock();
-		return node;
-	}
-
-
-public:
-
-	static std::atomic_uint32_t ticket;
-	static __attribute__((aligned(128))) thread_local struct TLS {
-		Random     rng = { int(rdtscl()) };
-		unsigned   my_queue = ticket++;
-		#if defined(READ)
-			unsigned it = 0;
-		#endif
-		struct {
-			struct {
-				std::size_t nhint = { 0 };
-			} push;
-			struct {
-				struct {
-					std::size_t success = { 0 };
-					std::size_t espec = { 0 };
-					std::size_t elock = { 0 };
-				} local;
-				struct {
-					std::size_t tried   = { 0 };
-					std::size_t locked  = { 0 };
-					std::size_t empty   = { 0 };
-					std::size_t success = { 0 };
-				} steal;
-			} pop;
-		} stat;
-	} tls;
-
-private:
-	const unsigned numThreads;
-    	std::unique_ptr<intrusive_queue_t<node_t> []> lists;
-	__attribute__((aligned(64))) snzi_t snzi;
-
-#ifndef NO_STATS
-private:
-	static struct GlobalStats {
-		struct {
-			std::atomic_size_t nhint = { 0 };
-		} push;
-		struct {
-			struct {
-				std::atomic_size_t success = { 0 };
-				std::atomic_size_t espec = { 0 };
-				std::atomic_size_t elock = { 0 };
-			} local;
-			struct {
-				std::atomic_size_t tried   = { 0 };
-				std::atomic_size_t locked  = { 0 };
-				std::atomic_size_t empty   = { 0 };
-				std::atomic_size_t success = { 0 };
-			} steal;
-		} pop;
-	} global_stats;
-
-public:
-	static void stats_tls_tally() {
-		global_stats.push.nhint += tls.stat.push.nhint;
-		global_stats.pop.local.success += tls.stat.pop.local.success;
-		global_stats.pop.local.espec   += tls.stat.pop.local.espec  ;
-		global_stats.pop.local.elock   += tls.stat.pop.local.elock  ;
-		global_stats.pop.steal.tried   += tls.stat.pop.steal.tried  ;
-		global_stats.pop.steal.locked  += tls.stat.pop.steal.locked ;
-		global_stats.pop.steal.empty   += tls.stat.pop.steal.empty  ;
-		global_stats.pop.steal.success += tls.stat.pop.steal.success;
-	}
-
-	static void stats_print(std::ostream & os ) {
-		std::cout << "----- Work Stealing Stats -----" << std::endl;
-
-		double stealSucc = double(global_stats.pop.steal.success) / global_stats.pop.steal.tried;
-		os << "Push to new Q : " << std::setw(15) << global_stats.push.nhint << "\n";
-		os << "Local Pop     : " << std::setw(15) << global_stats.pop.local.success << "\n";
-		os << "Steal Pop     : " << std::setw(15) << global_stats.pop.steal.success << "(" << global_stats.pop.local.espec << "s, " << global_stats.pop.local.elock << "l)\n";
-		os << "Steal Success : " << std::setw(15) << stealSucc << "(" << global_stats.pop.steal.tried << " tries)\n";
-		os << "Steal Fails   : " << std::setw(15) << global_stats.pop.steal.empty << "e, " << global_stats.pop.steal.locked << "l\n";
-	}
-private:
-#endif
-};
Index: doc/user/Makefile
===================================================================
--- doc/user/Makefile	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/user/Makefile	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -55,5 +55,5 @@
 
 ${DOCUMENT} : ${BASE}.ps
-	ps2pdf $<
+	ps2pdf -dPDFSETTINGS=/prepress $<
 
 ${BASE}.ps : ${BASE}.dvi
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ doc/user/user.tex	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -11,6 +11,6 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Fri Mar  6 13:34:52 2020
-%% Update Count     : 3924
+%% Last Modified On : Mon Oct  5 08:57:29 2020
+%% Update Count     : 3998
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
@@ -30,35 +30,12 @@
 \usepackage{upquote}									% switch curled `'" to straight
 \usepackage{calc}
-\usepackage{xspace}
 \usepackage{varioref}									% extended references
-\usepackage{listings}									% format program code
+\usepackage[labelformat=simple,aboveskip=0pt,farskip=0pt]{subfig}
+\renewcommand{\thesubfigure}{\alph{subfigure})}
 \usepackage[flushmargin]{footmisc}						% support label/reference in footnote
 \usepackage{latexsym}                                   % \Box glyph
 \usepackage{mathptmx}                                   % better math font with "times"
 \usepackage[usenames]{color}
-\input{common}                                          % common CFA document macros
-\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
-\usepackage{breakurl}
-
-\usepackage[pagewise]{lineno}
-\renewcommand{\linenumberfont}{\scriptsize\sffamily}
-\usepackage[firstpage]{draftwatermark}
-\SetWatermarkLightness{0.9}
-
-% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
-% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
-% AFTER HYPERREF.
-\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
-
-\setlength{\topmargin}{-0.45in}							% move running title into header
-\setlength{\headsep}{0.25in}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\CFAStyle												% use default CFA format-style
-\lstnewenvironment{C++}[1][]                            % use C++ style
-{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}
-{}
-
+\newcommand{\CFALatin}{}
 % inline code ©...© (copyright symbol) emacs: C-q M-)
 % red highlighting ®...® (registered trademark symbol) emacs: C-q M-.
@@ -68,4 +45,32 @@
 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
 % math escape $...$ (dollar symbol)
+\input{common}                                          % common CFA document macros
+\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
+\usepackage{breakurl}
+
+\renewcommand\footnoterule{\kern -3pt\rule{0.3\linewidth}{0.15pt}\kern 2pt}
+
+\usepackage[pagewise]{lineno}
+\renewcommand{\linenumberfont}{\scriptsize\sffamily}
+\usepackage[firstpage]{draftwatermark}
+\SetWatermarkLightness{0.9}
+
+% Default underscore is too low and wide. Cannot use lstlisting "literate" as replacing underscore
+% removes it as a variable-name character so keywords in variables are highlighted. MUST APPEAR
+% AFTER HYPERREF.
+\renewcommand{\textunderscore}{\leavevmode\makebox[1.2ex][c]{\rule{1ex}{0.075ex}}}
+
+\setlength{\topmargin}{-0.45in}							% move running title into header
+\setlength{\headsep}{0.25in}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\CFAStyle												% use default CFA format-style
+\lstnewenvironment{C++}[1][]                            % use C++ style
+{\lstset{language=C++,moredelim=**[is][\protect\color{red}]{®}{®},#1}}
+{}
+
+\newsavebox{\myboxA}
+\newsavebox{\myboxB}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -79,6 +84,4 @@
 \newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
 \newcommand{\KWC}{K-W C\xspace}
-
-\newsavebox{\LstBox}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -253,8 +256,8 @@
 
 The signature feature of \CFA is \emph{\Index{overload}able} \Index{parametric-polymorphic} functions~\cite{forceone:impl,Cormack90,Duggan96} with functions generalized using a ©forall© clause (giving the language its name):
-\begin{lstlisting}
+\begin{cfa}
 ®forall( otype T )® T identity( T val ) { return val; }
 int forty_two = identity( 42 ); §\C{// T is bound to int, forty\_two == 42}§
-\end{lstlisting}
+\end{cfa}
 % extending the C type system with parametric polymorphism and overloading, as opposed to the \Index*[C++]{\CC{}} approach of object-oriented extensions.
 \CFA{}\hspace{1pt}'s polymorphism was originally formalized by \Index*{Glen Ditchfield}\index{Ditchfield, Glen}~\cite{Ditchfield92}, and first implemented by \Index*{Richard Bilson}\index{Bilson, Richard}~\cite{Bilson03}.
@@ -275,5 +278,5 @@
 \begin{comment}
 A simple example is leveraging the existing type-unsafe (©void *©) C ©bsearch© to binary search a sorted floating array:
-\begin{lstlisting}
+\begin{cfa}
 void * bsearch( const void * key, const void * base, size_t dim, size_t size,
 				int (* compar)( const void *, const void * ));
@@ -284,7 +287,7 @@
 double key = 5.0, vals[10] = { /* 10 sorted floating values */ };
 double * val = (double *)bsearch( &key, vals, 10, sizeof(vals[0]), comp ); §\C{// search sorted array}§
-\end{lstlisting}
+\end{cfa}
 which can be augmented simply with a polymorphic, type-safe, \CFA-overloaded wrappers:
-\begin{lstlisting}
+\begin{cfa}
 forall( otype T | { int ?<?( T, T ); } ) T * bsearch( T key, const T * arr, size_t size ) {
 	int comp( const void * t1, const void * t2 ) { /* as above with double changed to T */ }
@@ -297,5 +300,5 @@
 double * val = bsearch( 5.0, vals, 10 ); §\C{// selection based on return type}§
 int posn = bsearch( 5.0, vals, 10 );
-\end{lstlisting}
+\end{cfa}
 The nested function ©comp© provides the hidden interface from typed \CFA to untyped (©void *©) C, plus the cast of the result.
 Providing a hidden ©comp© function in \CC is awkward as lambdas do not use C calling-conventions and template declarations cannot appear at block scope.
@@ -305,10 +308,10 @@
 \CFA has replacement libraries condensing hundreds of existing C functions into tens of \CFA overloaded functions, all without rewriting the actual computations.
 For example, it is possible to write a type-safe \CFA wrapper ©malloc© based on the C ©malloc©:
-\begin{lstlisting}
+\begin{cfa}
 forall( dtype T | sized(T) ) T * malloc( void ) { return (T *)malloc( sizeof(T) ); }
 int * ip = malloc(); §\C{// select type and size from left-hand side}§
 double * dp = malloc();
 struct S {...} * sp = malloc();
-\end{lstlisting}
+\end{cfa}
 where the return type supplies the type/size of the allocation, which is impossible in most type systems.
 \end{comment}
@@ -943,39 +946,4 @@
 the same level as a ©case© clause; the target label may be case ©default©, but only associated
 with the current ©switch©/©choose© statement.
-
-
-\subsection{Loop Control}
-
-The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
-\begin{itemize}
-\item
-The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.
-\item
-An empty conditional implies comparison value of ©1© (true).
-\item
-A comparison N is implicit up-to exclusive range [0,N©®)®©.
-\item
-A comparison ©=© N is implicit up-to inclusive range [0,N©®]®©.
-\item
-The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N©®)®©.
-\item
-The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N©®]®©.
-\item
-The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M©®)®©.
-\item
-The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M©®]®©.
-\item
-©0© is the implicit start value;
-\item
-©1© is the implicit increment value.
-\item
-The up-to range uses operator ©+=© for increment;
-\item
-The down-to range uses operator ©-=© for decrement.
-\item
-©@© means put nothing in this field.
-\item
-©:© means start another index.
-\end{itemize}
 
 \begin{figure}
@@ -1086,4 +1054,39 @@
 
 
+\subsection{Loop Control}
+
+The ©for©/©while©/©do-while© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
+\begin{itemize}
+\item
+The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.
+\item
+An empty conditional implies comparison value of ©1© (true).
+\item
+A comparison N is implicit up-to exclusive range [0,N©®)®©.
+\item
+A comparison ©=© N is implicit up-to inclusive range [0,N©®]®©.
+\item
+The up-to range M ©~©\index{~@©~©} N means exclusive range [M,N©®)®©.
+\item
+The up-to range M ©~=©\index{~=@©~=©} N means inclusive range [M,N©®]®©.
+\item
+The down-to range M ©-~©\index{-~@©-~©} N means exclusive range [N,M©®)®©.
+\item
+The down-to range M ©-~=©\index{-~=@©-~=©} N means inclusive range [N,M©®]®©.
+\item
+©0© is the implicit start value;
+\item
+©1© is the implicit increment value.
+\item
+The up-to range uses operator ©+=© for increment;
+\item
+The down-to range uses operator ©-=© for decrement.
+\item
+©@© means put nothing in this field.
+\item
+©:© means start another index.
+\end{itemize}
+
+
 %\subsection{\texorpdfstring{Labelled \protect\lstinline@continue@ / \protect\lstinline@break@}{Labelled continue / break}}
 \subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break} Statement}{Labelled continue / break Statement}}
@@ -1095,81 +1098,73 @@
 for ©break©, the target label can also be associated with a ©switch©, ©if© or compound (©{}©) statement.
 \VRef[Figure]{f:MultiLevelExit} shows ©continue© and ©break© indicating the specific control structure, and the corresponding C program using only ©goto© and labels.
-The innermost loop has 7 exit points, which cause continuation or termination of one or more of the 7 \Index{nested control-structure}s.
+The innermost loop has 8 exit points, which cause continuation or termination of one or more of the 7 \Index{nested control-structure}s.
 
 \begin{figure}
-\begin{tabular}{@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{\hspace{\parindentlnth}}l@{}}
-\multicolumn{1}{@{\hspace{\parindentlnth}}c@{\hspace{\parindentlnth}}}{\textbf{\CFA}}	& \multicolumn{1}{@{\hspace{\parindentlnth}}c}{\textbf{C}}	\\
-\begin{cfa}
-®LC:® {
-	... §declarations§ ...
-	®LS:® switch ( ... ) {
-	  case 3:
-		®LIF:® if ( ... ) {
-			®LF:® for ( ... ) {
-				®LW:® while ( ... ) {
-					... break ®LC®; ...
-					... break ®LS®; ...
-					... break ®LIF®; ...
-					... continue ®LF;® ...
-					... break ®LF®; ...
-					... continue ®LW®; ...
-					... break ®LW®; ...
-				} // while
-			} // for
-		} else {
-			... break ®LIF®; ...
-		} // if
-	} // switch
+\centering
+\begin{lrbox}{\myboxA}
+\begin{cfa}[tabsize=3]
+®Compound:® {
+	®Try:® try {
+		®For:® for ( ... ) {
+			®While:® while ( ... ) {
+				®Do:® do {
+					®If:® if ( ... ) {
+						®Switch:® switch ( ... ) {
+							case 3:
+								®break Compound®;
+								®break Try®;
+								®break For®;      /* or */  ®continue For®;
+								®break While®;  /* or */  ®continue While®;
+								®break Do®;      /* or */  ®continue Do®;
+								®break If®;
+								®break Switch®;
+							} // switch
+						} else {
+							... ®break If®; ...	// terminate if
+						} // if
+				} while ( ... ); // do
+			} // while
+		} // for
+	} ®finally® { // always executed
+	} // try
 } // compound
 \end{cfa}
-&
-\begin{cfa}
+\end{lrbox}
+
+\begin{lrbox}{\myboxB}
+\begin{cfa}[tabsize=3]
 {
-	... §declarations§ ...
-	switch ( ... ) {
-	  case 3:
-		if ( ... ) {
-			for ( ... ) {
-				while ( ... ) {
-					... goto ®LC®; ...
-					... goto ®LS®; ...
-					... goto ®LIF®; ...
-					... goto ®LFC®; ...
-					... goto ®LFB®; ...
-					... goto ®LWC®; ...
-					... goto ®LWB®; ...
-				  ®LWC®: ; } ®LWB:® ;
-			  ®LFC:® ; } ®LFB:® ;
-		} else {
-			... goto ®LIF®; ...
-		} ®L3:® ;
-	} ®LS:® ;
-} ®LC:® ;
-\end{cfa}
-&
-\begin{cfa}
-
-
-
-
-
-
-
-// terminate compound
-// terminate switch
-// terminate if
-// continue loop
-// terminate loop
-// continue loop
-// terminate loop
-
-
-
-// terminate if
-
-
-
-\end{cfa}
-\end{tabular}
+
+		®ForC:® for ( ... ) {
+			®WhileC:® while ( ... ) {
+				®DoC:® do {
+					if ( ... ) {
+						switch ( ... ) {
+							case 3:
+								®goto Compound®;
+								®goto Try®;
+								®goto ForB®;      /* or */  ®goto ForC®;
+								®goto WhileB®;  /* or */  ®goto WhileC®;
+								®goto DoB®;      /* or */  ®goto DoC®;
+								®goto If®;
+								®goto Switch®;
+							} ®Switch:® ;
+						} else {
+							... ®goto If®; ...	// terminate if
+						} ®If:®;
+				} while ( ... ); ®DoB:® ;
+			} ®WhileB:® ;
+		} ®ForB:® ;
+
+
+} ®Compound:® ;
+\end{cfa}
+\end{lrbox}
+
+\subfloat[\CFA]{\label{f:CFibonacci}\usebox\myboxA}
+\hspace{2pt}
+\vrule
+\hspace{2pt}
+\subfloat[C]{\label{f:CFAFibonacciGen}\usebox\myboxB}
 \caption{Multi-level Exit}
 \label{f:MultiLevelExit}
@@ -1426,7 +1421,7 @@
 try {
 	f(...);
-} catch( E e ; §boolean-predicate§ ) {		§\C[8cm]{// termination handler}§
+} catch( E e ; §boolean-predicate§ ) {		§\C{// termination handler}§
 	// recover and continue
-} catchResume( E e ; §boolean-predicate§ ) { §\C{// resumption handler}\CRT§
+} catchResume( E e ; §boolean-predicate§ ) { §\C{// resumption handler}§
 	// repair and return
 } finally {
@@ -3491,5 +3486,5 @@
 For implicit formatted input, the common case is reading a sequence of values separated by whitespace, where the type of an input constant must match with the type of the input variable.
 \begin{cquote}
-\begin{lrbox}{\LstBox}
+\begin{lrbox}{\myboxA}
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
 int x;   double y   char z;
@@ -3497,5 +3492,5 @@
 \end{lrbox}
 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{3em}}l@{}}
-\multicolumn{1}{@{}l@{}}{\usebox\LstBox} \\
+\multicolumn{1}{@{}l@{}}{\usebox\myboxA} \\
 \multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CFA}}	& \multicolumn{1}{c@{\hspace{2em}}}{\textbf{\CC}}	& \multicolumn{1}{c}{\textbf{Python}}	\\
 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
@@ -6672,4 +6667,7 @@
 For example, an initial alignment and fill capability are preserved during a resize copy so the copy has the same alignment and extended storage is filled.
 Without sticky properties it is dangerous to use ©realloc©, resulting in an idiom of manually performing the reallocation to maintain correctness.
+\begin{cfa}
+
+\end{cfa}
 
 \CFA memory management extends allocation to support constructors for initialization of allocated storage, \eg in
@@ -6721,24 +6719,26 @@
 
 	// §\CFA§ safe general allocation, fill, resize, alignment, array
-	T * alloc( void );§\indexc{alloc}§
-	T * alloc( size_t dim );
-	T * alloc( T ptr[], size_t dim );
-	T * alloc_set( char fill );§\indexc{alloc_set}§
-	T * alloc_set( T fill );
-	T * alloc_set( size_t dim, char fill );
-	T * alloc_set( size_t dim, T fill );
-	T * alloc_set( size_t dim, const T fill[] );
-	T * alloc_set( T ptr[], size_t dim, char fill );
-
-	T * alloc_align( size_t align );
-	T * alloc_align( size_t align, size_t dim );
-	T * alloc_align( T ptr[], size_t align ); // aligned realloc array
-	T * alloc_align( T ptr[], size_t align, size_t dim ); // aligned realloc array
-	T * alloc_align_set( size_t align, char fill );
-	T * alloc_align_set( size_t align, T fill );
-	T * alloc_align_set( size_t align, size_t dim, char fill );
-	T * alloc_align_set( size_t align, size_t dim, T fill );
-	T * alloc_align_set( size_t align, size_t dim, const T fill[] );
-	T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill );
+	T * alloc( void );§\indexc{alloc}§					§\C[3.5in]{// variable, T size}§
+	T * alloc( size_t dim );							§\C{// array[dim], T size elements}§
+	T * alloc( T ptr[], size_t dim );					§\C{// realloc array[dim], T size elements}§
+
+	T * alloc_set( char fill );§\indexc{alloc_set}§		§\C{// variable, T size, fill bytes with value}§
+	T * alloc_set( T fill );							§\C{// variable, T size, fill with value}§
+	T * alloc_set( size_t dim, char fill );				§\C{// array[dim], T size elements, fill bytes with value}§
+	T * alloc_set( size_t dim, T fill );				§\C{// array[dim], T size elements, fill elements with value}§
+	T * alloc_set( size_t dim, const T fill[] );		§\C{// array[dim], T size elements, fill elements with array}§
+	T * alloc_set( T ptr[], size_t dim, char fill );	§\C{// realloc array[dim], T size elements, fill bytes with value}§
+
+	T * alloc_align( size_t align );					§\C{// aligned variable, T size}§
+	T * alloc_align( size_t align, size_t dim );		§\C{// aligned array[dim], T size elements}§
+	T * alloc_align( T ptr[], size_t align );			§\C{// realloc new aligned array}§
+	T * alloc_align( T ptr[], size_t align, size_t dim ); §\C{// realloc new aligned array[dim]}§
+
+	T * alloc_align_set( size_t align, char fill );		§\C{// aligned variable, T size, fill bytes with value}§
+	T * alloc_align_set( size_t align, T fill );		§\C{// aligned variable, T size, fill with value}§
+	T * alloc_align_set( size_t align, size_t dim, char fill ); §\C{// aligned array[dim], T size elements, fill bytes with value}§
+	T * alloc_align_set( size_t align, size_t dim, T fill ); §\C{// aligned array[dim], T size elements, fill elements with value}§
+	T * alloc_align_set( size_t align, size_t dim, const T fill[] ); §\C{// aligned array[dim], T size elements, fill elements with array}§
+	T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); §\C{// realloc new aligned array[dim], fill new bytes with value}§
 
 	// §\CFA§ safe initialization/copy, i.e., implicit size specification
Index: amples/ArrayN.c
===================================================================
--- examples/ArrayN.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,23 +1,0 @@
-#include <fstream.hfa>
-
-// [unsigned, unsigned] offset_to_index(unsigned offset, unsigned sx, unsigned sy)
-// {
-//     return [offset / sx, offset % sy];
-// }
-
-forall(otype index_t)
-index_t offset_to_index(unsigned offset, index_t size) {
-    return [offset / size.0, offset % size.1];
-}
-
-int main(int argc, char* argv[]) {
-    unsigned x = 0, y = 0, i = 0;
-    unsigned sx = 4, sy = 4;
-
-    i = 6;
-    [x, y] = offset_to_index(6, [sx, sy]);
-
-    sout | x | ' ' | y;
-
-    return 0;
-}
Index: amples/Initialization.c
===================================================================
--- examples/Initialization.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,41 +1,0 @@
-// Cforall extensions
-
-int * x11 = 0, x12 = 0;
-int * x21 = 0, x22 = 0;
-
-[20] int y1, y2 = { 1, 2, 3 };
-
-// designators
-
-struct {
-	[int] w;
-} a = { .w : [2] };
-
-struct { int a[3], b; } w [] = { [0].a : {1}, [0].b : 3, [1].a[0] : 2 };
-
-struct {
-	int f1, f2, f3;
-	struct { int g1, g2, g3; } f4[4];
-} v7 = {
-  .f1 : 4,
-  f2 : 3,
-  .f4[2] : {
-	  .g1 : 3,
-	  g3 : 0,
-	},
-  .f4[3].g3 : 7,
-};
-
-struct point { int x; int z; struct {int y1, y2, y3;} y; int w;};
-struct quintet { int v, w, x, y, z;};
-
-int main() {
-	struct point p1 = { x : 3 };
-	struct point p2 = { 3, 4 };
-	struct point p3 = { .[x,z] : 5, y : { .[y3,y1] : 6, 17 } };
-	struct point p4 = { w : 5, 4 };
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: amples/Initialization2.c
===================================================================
--- examples/Initialization2.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,15 +1,0 @@
-int a = 3;
-struct { int x; int y; } z = { 3, 7 };      /* OK */
-struct { int x; int y; } z1 = { .[x,y]:3 }; /* OK */
-struct { int x; int y; } z2 = { y:3, x:4 }; /* OK */
-struct { int x; struct { int y1; int y2; } y; } z3 = { x:3, y:{y1:4, y2:5} };  /* OK */
-struct { int x; struct { int y1; int y2; } y; } z3 = { y:{y2:9, y1:8}, x:7 };  /* OK */
-struct { int x; struct { int y1; int y2; } y; } z3 = { x:7, {y2:9, y1:8} };  /* OK */
-struct { int x; struct { int y1; int y2; } y; } z3 = { 3, {4, 5} };   /* OK */
-//struct { int x; struct { int y1; int y2; } } z3 = {4, {5,6}};
-//struct { int x; struct { int y1; int y2; } y; } z4 = { y:{4,5}, a:3 };
-//struct { int x; struct { int y1; int y2; } y; } z5 = { a:3, {4,5}};
-//int x[20] = { [10]: 4 };
-struct t { int a, b; };
-struct t x = { b:4, a:3 };
-struct { int x; int y; } z6= {5,6,4};  /* (should be an) error */
Index: amples/Makefile.example
===================================================================
--- examples/Makefile.example	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,42 +1,0 @@
-CFA ?= ../driver/cfa-cpp
-CFAOPT ?= -a
-OUTPUT ?= Output
-EXPECT ?= Expect
-OUTPUTDIR ?= ${OUTPUT}${CFAOPT}
-EXPECTDIR ?= ${EXPECT}${CFAOPT}
-EXAMPLES = ${wildcard *.c}
-OUTPUTS = ${addprefix ${OUTPUTDIR}/,${EXAMPLES:.c=.txt}}
-
-#.SILENT :
-
-all :
-	+for opt in -a -e -f -r -s -v ; do \
-	    make test CFAOPT=$${opt} ; \
-	done ; \
-	rm -f core
-
-test : ${OUTPUTS} ${OUTPUTDIR}/report
-
-${OUTPUTDIR}/%.txt : %.c ${CFA} Makefile
-	-${CFA} -n ${CFAOPT} $< > $@ 2>&1
-
-${OUTPUTDIR}/report : ${OUTPUTS} ${EXPECTDIR}
-	rm -f $@
-	echo "===== regression test using cfa-cpp flag ${CFAOPT} ====="
-	@for i in ${OUTPUTS} ; do \
-	     echo "---"`basename $$i`"---" | tee -a $@; \
-	     diff -B -w ${EXPECTDIR}/`basename $$i` $$i | tee -a $@; \
-	done
-
-${OUTPUTS} : | ${OUTPUTDIR}		# order only prerequisite
-
-${OUTPUTDIR} :
-	mkdir -p $@
-
-# remove the expected results directories to generate new ones from the current output
-
-${EXPECTDIR} : | ${OUTPUTS}		# new Expected results ?
-	cp -pr ${OUTPUTDIR} $@
-
-clean :
-	rm -rf ${OUTPUT}-* core
Index: amples/Members.c
===================================================================
--- examples/Members.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,72 +1,0 @@
-char ?=?( char*, char );
-int ?=?( int*, int );
-float ?=?( float*, float );
-forall( dtype DT ) DT * ?=?( DT**, DT* );
-forall(otype T) lvalue T *?( T* );
-char *__builtin_memcpy();
-
-void a( char );
-void b( int );
-void c( int* );
-void d( float* );
-
-struct a_struct {
-	int a;
-	char a;
-	float a;
-};
-
-union b_struct {
-	int *a;
-	char *a;
-	float *a;
-};
-
-void f() {
-	struct a_struct the_struct;
-	union b_struct the_struct;
-  
-	a( the_struct.a );
-	b( the_struct.a );
-	c( the_struct.a );
-	d( the_struct.a );
-}
-
-struct c_struct {
-	int;
-	char;
-	float;
-};
-
-union d_struct {
-	int*;
-	char*;
-	float*;
-};
-
-void g() {
-	unsigned short x;
-	struct c_struct x;
-	union d_struct x;
-  
-	a( x );	// the 'a' and 'b' calls resolve to the ushort
-	b( x );	// it's debatable whether this is good
-	c( x );
-	d( x );
-}
-
-// make sure that forward declarations work
-
-struct forward;
-
-struct forward *q;
-
-struct forward { int y; };
-
-void h() {
-	q->y;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: amples/Misc.c
===================================================================
--- examples/Misc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,19 +1,0 @@
-// interesting corner cases
-
-int a;
-int b;
-float b;
-
-void g( int );
-void g( unsigned );
-
-void f( void ) {
-	g( (a, b) );
-	g( (a, a, b) );
-	g( sizeof a );
-	g( sizeof( int ) );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: amples/MiscError.c
===================================================================
--- examples/MiscError.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,16 +1,0 @@
-int a;
-int b;
-float b;
-
-void g( int );
-
-void f( void ) {
-	g( (b, a) );
-	g( (b, a, b) );
-	g( (a, b, b) );
-	sizeof b;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: amples/Rank2.c
===================================================================
--- examples/Rank2.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,20 +1,0 @@
-int ?=?( int *, int );
-forall(dtype DT) DT * ?=?( DT **, DT * );
-
-void a() {
-	forall( otype T ) void f( T );
-	void g( forall( otype U ) void p( U ) );
-	g( f );
-}
-
-void g() {
-	void h( int *null );
-	forall( otype T ) T id( T );
-	forall( dtype T ) T *0;
-	int 0;
-	h( id( id( id( 0 ) ) ) );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: amples/Tuple.c
===================================================================
--- examples/Tuple.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,73 +1,0 @@
-int f( int, int );
-int g( int, int, int );
-static
-[ int, int *, * int, int ] h( int a, int b, * int c, [] char d );
-
-struct inner {
-	int f2, f3;
-};
-
-struct outer {
-	int f1;
-	struct inner i;
-	double f4;
-} s, *sp;
-
-const volatile [ int, int ] t1;
-static const [ int, int ] t2;
-const static [ int, const int ] t3;
-
-[ int rc ] printf( * char fmt, ... );
-int printf( char *fmt, ... );
-
-[ short x, unsigned y ] f1( int w ) {
-// 	return [ y, x ] = [ x, y ] = [ w, 23 ];
-}
-
-[ [ int, char, long, int ] r ] g1() {
-	short int x, p;
-	unsigned int y;
-	[ int, int ] z;
-
-	[ x, y, z ] = [ p, f( 17, 18 ), 4, 3 ];
-//	[ x, y, z ] = ([short, unsigned int, [int, int]])([ p, f( 17, 18 ), 4, 3 ]);
-	r = [ x, y, z ];
-}
-
-[ int rc ] main( int argc, ** char argv ) {
-	int a, b, c, d;
-//	struct outer t = { .[ f1, f4 ] : [ 1, 7.0 ] };
-	f( [ 3,5 ] );
-	g( [ 3,5 ], 3 );
-	f( t1 );
-	g( t1, 3 );
-
-//	[ , , , ];						/* empty tuple */
-	[ 3, 5 ];
-	[ a, b ] = 3;
-//	[ a, b ] = [ 4.6 ];
-	[ a, b ] = 4.6;
-	[ a, b ] = [ c, d ] = [ 3, 5 ];
-//	[ a, b, [ c ] ] = [ 2, [ a, b ] ];
-	[ a, b, c ] = [ 2, [ a, b ] ];
-	[ a, b ] = 3 > 4 ? [ b, 6 ] : [ 7, 8 ];
-
-	t1 = [ a, b ];
-	t1 = t2 = [ a, b ];
-	[ a, b ] = [ c, d ] = d += c += 1;
-	[ a, b ] = [ c, d ] = t1;
-	[ a, b ] = t1 = [ c, d ];
-	[ a, b ] = t1 = t2 = [ c, d ];
-//	t1 = [ 3, 4 ] = [ 3, 4 ] = t1 = [ 3, 4 ];
-
-	s.[ f1, i.[ f2, f3 ], f4 ] = [ 11, 12, 13, 3.14159 ];
-//	s.[ f1, i.[ f2, f3 ], f4 ] = h( 3, 3, (* int)0, "abc" );
-//	[ a, , b, ] = h( 3, 3, 0, "abc" );			/* ignore some results */
-	sp->[ f4, f1 ] = sp->[ f1, f4 ];
-	printf( "expecting 3, 17, 23, 4; got %d, %d, %d, %d\n", s.[ f4, i.[ f3, f2 ], f1 ] );
-	rc = 0;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// End: //
Index: amples/abstype.c
===================================================================
--- examples/abstype.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,43 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// abstype.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jun 14 14:27:48 2016
-// Update Count     : 9
-//
-
-otype T | { T x( T ); };
-
-T y( T t ) {
-	T t_instance;
-	return x( t );
-}
-
-forall( otype T ) lvalue T *?( T * );
-int ?++( int * );
-int ?=?( int *, int );
-forall( dtype DT ) DT * ?=?( DT **, DT * );
-
-otype U = int *;
-
-U x( U u ) {
-	U u_instance = u;
-	(*u)++;
-	return u;
-}
-
-int *break_abstraction( U u ) {
-	return u;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa abstype.c" //
-// End: //
Index: amples/constructors.c
===================================================================
--- examples/constructors.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,61 +1,0 @@
-int fred() {
-    // initialize basic structure
-    struct S {
-	int i, j, k;
-    };
-    void ?{}( S *s ) { s->i = 1, s->k = 2; }		// default constructor
-    void ?{}( S *s, int i, int k ) { s->i = i, s->k = k; } // 2 parameter constructor
-    void ?{}( S *s, S c ) { *s = c; }			// copy constructor
-    void ^?{}( S *s ) { s->i = 0, s->k = 0; }		// default destructor
-    void ^?{}( S *s, int i ) { s->i = i, s->k = i; }	// 1 parameter destructor
-    {
-	S s1;			// default constructor
-	S s2 = { 3, 7 };	// 2 parameter constructor
-	S s3 @= { .k:3, .i:7 };	// 2 parameter C initialization
-	?{}( &s3, 2, 5 );	// explicit 2 parameter constructor
-	^?{}( &s1 );		// explicit call to default destructor
-    } // implicit call to default destructor for s2, explicit call s1, no call for s3
-    S s4 @= {};			// no default construction
-    (&s4){ 2, 5 };		// explicit 2 parameter constructor
-    ^s4{ 3 };			// explicit call to 1 parameter destructor
-
-    // initialize pointer to a basic structure
-
-    void ?{}( S **s ) { *s = malloc(); (*s)->i = 1, (*s)->k = 2; } // default constructor
-    void ?{}( S **s, int i, int k ) { *s = malloc(); (*s)->i = i, (*s)->k = k; } // 2 parameter constructor
-    void ^?{}( S **s ) { (*s)->i = 0, (*s)->k = 0; free( *s ); *s = 0; } // default destructor
-    {
-	S *ps1;			// default constructor
-	S *ps2 = { 3, 7 };	// 2 parameter constructor
-	S *ps3 @= 0;		// C initialization
-	S *ps4 @= {};		// no default construction
-    } // implicit call to default destructor for ps2 and ps1, checks ordering of explicit destructor calls
-
-    ?{}( &ps3, 2, 5 );		// explicit 2 parameter constructor
-    (&ps4){ 2, 5 };		// explicit 2 parameter constructor
-    
-    ^?{}( &ps3 );		// explicit call to default destructor
-    ^ps4{};			// explicit call to default destructor
-
-    // initialize complex structure
-
-    struct T {
-	struct S s;
-    };
-
-    void ?{}( T *t ) {}					// default constructor => implicitly call constructor for field s
-    void ?{}( T *t, int i, int k ) { (&t->s){ i, k }; }	// 2 parameter constructor => explicitly call constructor for field s
-    void ?{}( T *t, S c ) { (&t->s){ c }; }		// 1 parameter constructor => explicitly call copy constructor for field s
-    void ^?{}( T *s, int i ) {}				// destructor => implicitly call destructor for field s
-    {
-	S s;			// default constructor
-	T t1;			// default constructor
-	T t2 = { s };		// 1 parameter constructor
-	^?{}( &t1 );		// explicit call to default destructor => implicit call to t1.s's destructor
-    } // implicit call to default destructor for t2 and implicit call for s;
-    T t3;			// default constructor
-    T t4 @= { { 1, 3 } };	// C initialization
-    (&t4){ 2, 5 };		// explicit 2 parameter constructor
-
-    T *pt = malloc(){ 3, 4 };	// common usage
-} // implicit call to default destructor for t3
Index: amples/forward.c
===================================================================
--- examples/forward.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,29 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// forward.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed May 27 18:11:57 2015
-// Update Count     : 2
-//
-
-forall(type T) lvalue T *?( T* );
-int ?=?( int*, int );
-
-struct q { int y; };
-struct q *x;
-
-void f() {
-	*x;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa forward.c" //
-// End: //
Index: amples/gc_no_raii/.gitignore
===================================================================
--- examples/gc_no_raii/.gitignore	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,4 +1,0 @@
-.tags
-.tags*
-gc-test
-build/
Index: amples/gc_no_raii/bug-repro/assert.c
===================================================================
--- examples/gc_no_raii/bug-repro/assert.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,16 +1,0 @@
-struct gc_object_header{
- int size;
-};
-
-struct gc_state;
-
-inline _Bool needs_collect(gc_state* state) {
- return state->used_space > 0;
-}
-
-struct gc_object_header* gc_get_object_for_ref();
-
-inline gc_object_header* gc_get_object_ptr(void* ptr)
-{
- return 0;
-}
Index: amples/gc_no_raii/bug-repro/blockers/explicit_cast.c
===================================================================
--- examples/gc_no_raii/bug-repro/blockers/explicit_cast.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,22 +1,0 @@
-
-#include <stdbool.h>
-#include <stdint.h>
-
-struct gcpointer_t
-{
-	intptr_t ptr;
-	struct gcpointer_t* next;
-};
-
-forall(otype T)
-struct gcpointer
-{
-	gcpointer_t internal;
-};
-
-forall(otype T)
-static inline gcpointer(T) gcmalloc()
-{
-    gcpointer(T) test;
-    return test;
-}
Index: amples/gc_no_raii/bug-repro/blockers/file_scope.c
===================================================================
--- examples/gc_no_raii/bug-repro/blockers/file_scope.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,18 +1,0 @@
-
-#include <stdbool.h>
-#include <stdlib.hfa>
-
-#define POOL_SIZE_EXP 24
-#define POOL_SIZE_BYTES 0x1 << POOL_SIZE_EXP
-#define POOL_PTR_MASK ~(POOL_SIZE_BYTES - 1)
-
-#define CARDS_SIZE_EXP 12
-#define CARDS_SIZE_BYTES 0x1 << CARDS_SIZE_EXP
-#define CARDS_OFFSET_MASK (~(CARDS_SIZE_BYTES - 1)) & (POOL_SIZE_BYTES - 1)
-#define CARDS_COUNT POOL_SIZE_BYTES / CARDS_SIZE_BYTES
-
-struct card_table_t
-{
-	size_t count;
-	void* cards_start[CARDS_COUNT];
-};
Index: amples/gc_no_raii/bug-repro/blockers/recursive_realloc.c
===================================================================
--- examples/gc_no_raii/bug-repro/blockers/recursive_realloc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,21 +1,0 @@
-
-#include <stdbool.h>
-#include <stdlib.hfa>
-
-trait allocator_c(otype T, otype allocator_t)
-{
-	void realloc(allocator_t* const, size_t);
-};
-
-forall(otype T)
-struct heap_allocator
-{
-	T* storage;
-	size_t capacity;
-};
-
-forall(otype T)
-inline void realloc(heap_allocator(T) *const this, size_t size)
-{
-	this->storage = (T*)realloc((void*)this->storage, this->capacity);
-}
Index: amples/gc_no_raii/bug-repro/crash.c
===================================================================
--- examples/gc_no_raii/bug-repro/crash.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,6 +1,0 @@
-
-void f()
-{
- void* obj;
- (void)obj;
-}
Index: amples/gc_no_raii/bug-repro/deref.c
===================================================================
--- examples/gc_no_raii/bug-repro/deref.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,19 +1,0 @@
-    forall(otype T)
-    struct wrap
-    {
-        T val;
-    };
-
-    forall(otype T)
-    T *? (wrap(T) rhs)
-    {
-        return rhs.val;
-    }
-
-    int main(int argc, char const *argv[])
-    {
-        wrap(int) test;
-        test.val = 3;
-        int i = *test;
-        return 0;
-    }
Index: amples/gc_no_raii/bug-repro/field.c
===================================================================
--- examples/gc_no_raii/bug-repro/field.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,130 +1,0 @@
-extern "C" {
-#include <stdbool.h>
-#include <stdint.h>
-}
-
-#include <stdlib.hfa>
-
-//------------------------------------------------------------------------------
-//Declaration
-trait allocator_c(otype T, otype allocator_t)
-{
-	void ctor(allocator_t* const);
-	void dtor(allocator_t* const);
-	void realloc(allocator_t* const, size_t);
-	T* data(allocator_t* const);
-};
-
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-struct vector
-{
-	allocator_t storage;
-	size_t size;
-};
-
-int global = 3;
-
-struct card_table_t
-{
-	size_t count;
-	void* cards_start[100];
-};
-
-static inline void ctor(card_table_t* const this)
-{
-	this->count = 0;
-}
-
-struct gc_memory_pool
-{
-	struct memory_pool* mirror;
-	struct memory_pool* next;
-
-	uint8_t type_code;
-
-	card_table_t* cards;
-
-	uint8_t* end_p;
-	uint8_t* free_p;
-	uint8_t start_p[1];
-};
-
-void ctor(	gc_memory_pool *const this,
-		size_t size,
-		gc_memory_pool* next,
-		gc_memory_pool* mirror,
-		uint8_t type
-	);
-
-void dtor(gc_memory_pool *const this);
-
-struct gc_pool_object_iterator
-{
-	struct gc_object_header* object;
-	#ifndef NDEBUG
-		intptr_t lower_limit;
-		intptr_t upper_limit;
-	#endif
-};
-
-void ctor(
-		gc_pool_object_iterator* const this,
-		void* start_object
-		#ifndef NDEBUG
-			, intptr_t pool_start
-			, intptr_t pool_end
-		#endif
-	);
-
-bool ?!=?(const gc_pool_object_iterator lhs, const gc_pool_object_iterator rhs);
-
-gc_pool_object_iterator begin(gc_memory_pool* const this);
-gc_pool_object_iterator end(gc_memory_pool* const);
-
-gc_pool_object_iterator* ++?(gc_pool_object_iterator* it);
-
-const void* *?(const gc_pool_object_iterator it);
-void* *?(gc_pool_object_iterator it);
-
-static inline bool gc_pool_is_from_space(const gc_memory_pool* pool)
-{
-	return false;
-}
-
-void gc_reset_pool(gc_memory_pool* const pool);
-
-static inline size_t gc_pool_size_used(const gc_memory_pool* pool)
-{
-	return pool->free_p - pool->start_p;
-}
-
-static inline size_t gc_pool_size_total(const gc_memory_pool* pool)
-{
-	return pool->end_p - pool->start_p;
-}
-
-static inline size_t gc_pool_size_left(const gc_memory_pool* pool)
-{
-	return pool->end_p - pool->free_p;
-}
-
-void* gc_pool_allocate(gc_memory_pool* const pool, size_t size, bool zero);
-
-gc_pool_object_iterator gc_pool_iterator_for(gc_memory_pool* const pool, void* member);
-
-void ctor(gc_memory_pool *const this, size_t size, gc_memory_pool* next, gc_memory_pool* mirror, uint8_t type)
-{
-	this->mirror = mirror;
-	this->next = next;
-	this->type_code = type;
-
-	this->cards = malloc();
-	ctor(this->cards);
-
-	this->end_p = ((uint8_t*)this) + size;
-	this->free_p = this->start_p;
-
-	// check(gc_pool_of(this) == this);
-	// check(this->cards);
-	// gc_reset_pool(this);
-}
Index: amples/gc_no_raii/bug-repro/find.c
===================================================================
--- examples/gc_no_raii/bug-repro/find.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,10 +1,0 @@
-
-void main()
-{
-	int a[3] = {1, 2, 3};
-	int* begin = a;
-	int *const end = begin + 3;
-
-	int* f = find(begin, &end, 2);
-
-}
Index: amples/gc_no_raii/bug-repro/inline.c
===================================================================
--- examples/gc_no_raii/bug-repro/inline.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,8 +1,0 @@
-inline _Bool test(int t){
-	return t == 3;
-}
-
-int main()
-{
-	test(6);
-}
Index: amples/gc_no_raii/bug-repro/malloc.c
===================================================================
--- examples/gc_no_raii/bug-repro/malloc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,33 +1,0 @@
-forall(otype T)
-struct wrapper
-{
-    T val;
-};
-
-forall(otype T)
-void ctor(wrapper(T)* this)
-{
-    this->val = 0;
-}
-
-forall(otype T)
-wrapper(T) gcmalloc()
-{
-    wrapper(T) w;
-    ctor(&w);
-    return w;
-}
-
-forall(otype T)
-wrapper(T)* ?=? (wrapper(T)* lhs, wrapper(T)* rhs)
-{
-    lhs->val = rhs->val;
-    return lhs;
-}
-
-int main(int argc, char *argv[])
-{
-    wrapper(int) test;
-    test = gcmalloc();
-    return 0;
-}
Index: amples/gc_no_raii/bug-repro/not_equal.c
===================================================================
--- examples/gc_no_raii/bug-repro/not_equal.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,10 +1,0 @@
-
-struct pointer_t
-{
-	void* p;
-};
-
-_Bool operator_not_equal_p(pointer_t* lhs, pointer_t* rhs)
-{
-	return lhs->p == rhs->p;
-}
Index: amples/gc_no_raii/bug-repro/oddtype.c
===================================================================
--- examples/gc_no_raii/bug-repro/oddtype.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,13 +1,0 @@
-forall(dtype T)
-struct wrap {
-	int i;
-};
-
-forall(otype T) void ?{}(wrap(T)* this) {}
-forall(otype T) void ?=?(wrap(T)* this) {}
-forall(otype T) void ^?{}(wrap(T)* this) {}
-
-struct List_t {
-	int val;
-	wrap(List_t) next;
-};
Index: amples/gc_no_raii/bug-repro/push_back.c
===================================================================
--- examples/gc_no_raii/bug-repro/push_back.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,16 +1,0 @@
-#include <stddef.h>
-#include <stdint.h>
-
-#include "push_back.h"
-
-typedef vector(intptr_t*, heap_allocator(intptr_t*)) worklist_t;
-
-void test()
-{
-	worklist_t w;
-	if(!empty(&w))
-	{
-		intptr_t zero = 0;
-		push_back(&w, &zero);
-	}
-}
Index: amples/gc_no_raii/bug-repro/push_back.h
===================================================================
--- examples/gc_no_raii/bug-repro/push_back.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,72 +1,0 @@
-//------------------------------------------------------------------------------
-//Declaration
-trait allocator_c(otype T, otype allocator_t) {
-	void ctor(allocator_t* const);
-	void dtor(allocator_t* const);
-	void realloc(allocator_t* const, size_t);
-	T* data(allocator_t* const);
-};
-
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-struct vector
-{
-	allocator_t storage;
-	size_t size;
-};
-
-//------------------------------------------------------------------------------
-//Initialization
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-void vector_ctor(vector(T, allocator_t) *const this);
-
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-void dtor(vector(T, allocator_t) *const this);
-
-//------------------------------------------------------------------------------
-//Allocator
-forall(otype T)
-struct heap_allocator
-{
-	T* storage;
-	size_t capacity;
-};
-
-forall(otype T)
-void ctor(heap_allocator(T) *const this);
-
-forall(otype T)
-void dtor(heap_allocator(T) *const this);
-
-forall(otype T)
-void realloc(heap_allocator(T) *const this, size_t size);
-
-forall(otype T)
-inline T* data(heap_allocator(T) *const this)
-{
-	return this->storage;
-}
-
-//------------------------------------------------------------------------------
-//Capacity
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-inline bool empty(vector(T, allocator_t) *const this)
-{
-	return this->size == 0;
-}
-
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-inline bool size(vector(T, allocator_t) *const this)
-{
-	return this->size;
-}
-
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-inline void reserve(vector(T, allocator_t) *const this, size_t size)
-{
-	realloc(&this->storage, this->size+1);
-}
-
-//------------------------------------------------------------------------------
-//Modifiers
-forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
-void push_back(vector(T, allocator_t) *const this, T value);
Index: amples/gc_no_raii/bug-repro/realloc.c
===================================================================
--- examples/gc_no_raii/bug-repro/realloc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,13 +1,0 @@
-void* realloc(void*, unsigned long int);
-
-forall(otype T)
-struct wrap
-{
-	T* val;
-};
-
-forall(otype T)
-static inline void realloc(wrap(T) *const this, unsigned long int size)
-{
-	this->val = (T*)realloc((void*)this->val, size);
-}
Index: amples/gc_no_raii/bug-repro/return.c
===================================================================
--- examples/gc_no_raii/bug-repro/return.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,27 +1,0 @@
-forall(otype T)
-struct wrapper
-{
-	T value;
-};
-
-forall(otype T)
-wrapper(T) create()
-{
-	wrapper(T) test;
-	return test;
-}
-
-forall(otype T)
-wrapper(T)* ?=?(wrapper(T)* lhs, wrapper(T)* rhs)
-{
-	lhs->value = rhs->value;
-	return lhs;
-}
-
-
-int main(int argc, char const *argv[])
-{
-	wrapper(int) test;
-	test = create();
-	return 0;
-}
Index: amples/gc_no_raii/bug-repro/return_template.c
===================================================================
--- examples/gc_no_raii/bug-repro/return_template.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,17 +1,0 @@
-forall(otype T)
-struct wrap
-{
-	T value;
-};
-
-forall(otype T) void ?{}(wrap(T)* this);
-forall(otype T) void ?{}(wrap(T)* this, wrap(T)* rhs);
-forall(otype T) void ^?{}(wrap(T)* this);
-forall(otype T) void ?=?(wrap(T)* this, wrap(T)* rhs);
-
-forall(otype T)
-wrap(T) test()
-{
-	wrap(T) tester;
-	return tester;
-}
Index: amples/gc_no_raii/bug-repro/slow_malloc.c
===================================================================
--- examples/gc_no_raii/bug-repro/slow_malloc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,20 +1,0 @@
-#include <stdlib.hfa>
-
-forall(otype T)
-struct heap_allocator
-{
-	T* storage;
-	size_t capacity;
-};
-
-struct card_table_t
-{
-	unsigned long int count;
-	void* cards_start[1000];
-};
-
-int main(int argc, char const *argv[])
-{
-	card_table_t* t = (card_table_t*)malloc(sizeof(card_table_t));
-	return 0;
-}
Index: amples/gc_no_raii/bug-repro/static_const_local.c
===================================================================
--- examples/gc_no_raii/bug-repro/static_const_local.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,6 +1,0 @@
-typedef unsigned long long size_t;
-
-int main(int argc, char const *argv[]) {
-	static const size_t GROWTH_RATE = 2;
-	return 0;
-}
Index: amples/gc_no_raii/bug-repro/test-assert.cpp
===================================================================
--- examples/gc_no_raii/bug-repro/test-assert.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,9 +1,0 @@
-#include <cassert>
-#include "../src/tools/checks.h"
-
-int main(int argc, char* argv[])
-{
-	//check(false);
-	assert(false);
-	return 0;
-}
Index: amples/gc_no_raii/bug-repro/void_pointer.c
===================================================================
--- examples/gc_no_raii/bug-repro/void_pointer.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,18 +1,0 @@
-#include <stddef.h>
-#include <stdint.h>
-
-inline void* test(intptr_t address)
-{
-	return (void*)address;
-}
-
-//inline void* test2(void* address)
-//{
-//	return address & 0xFF;
-//}
-
-// inline int test()
-// {
-// 	void* d = 0;
-// 	return (int)d;
-// }
Index: amples/gc_no_raii/bug-repro/while.c
===================================================================
--- examples/gc_no_raii/bug-repro/while.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,14 +1,0 @@
-extern void* get_member();
-extern void* get_next();
-
-void main()
-{
-	void* member = get_member();
-	void* start_obj = get_next();
-
-	do
-	{
-		start_obj = (void*) ( ((unsigned long int)start_obj) + sizeof(void*) );
-	}
-	while(start_obj > member || !(start_obj) );
-}
Index: amples/gc_no_raii/bug-repro/zero.c
===================================================================
--- examples/gc_no_raii/bug-repro/zero.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,25 +1,0 @@
-forall(otype T)
-struct wrap
-{
-    T val;
-};
-
-forall(otype T)
-int ?==? (wrap(T) lhs, wrap(T) rhs)
-{
-    return 0;
-}
-
-/*
-struct wrap(int) 0;
-/*/
-forall(otype T)
-struct wrap(T) 0;
-//*/
-
-int main(int argc, char const *argv[])
-{
-    wrap(int) test;
-    if(test == 0) { return 1; }
-    return 0;
-}
Index: amples/gc_no_raii/pool-alloc/allocate-malign.c
===================================================================
--- examples/gc_no_raii/pool-alloc/allocate-malign.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,30 +1,0 @@
-/*
- * Allocation functions (posix_malign)
- *
- * Copyright (c) 2014, 2015 Gregor Richards
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-static void *allocPool(size_t size, int mustSucceed)
-{
-    void *ret;
-    if ((errno = posix_memalign(&ret, size, size))) {
-        if (mustSucceed) {
-            perror("posix_memalign");
-            abort();
-        }
-        return NULL;
-    }
-    return ret;
-}
Index: amples/gc_no_raii/pool-alloc/allocate-malloc.c
===================================================================
--- examples/gc_no_raii/pool-alloc/allocate-malloc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,53 +1,0 @@
-/*
- * Allocation functions (malloc)
- *
- * Copyright (c) 2014, 2015 Gregor Richards
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-static void *allocPool(int mustSucceed)
-{
-    static ggc_mutex_t poolLock = GGC_MUTEX_INITIALIZER;
-    static unsigned char *space = NULL, *spaceEnd = NULL;
-    void *ret;
-
-    /* do we already have some available space? */
-    ggc_mutex_lock_raw(&poolLock);
-    if (!space || space + GGGGC_POOL_BYTES > spaceEnd) {
-        ggc_size_t i;
-
-        /* since we can't pre-align, align by getting as much as we can manage */
-        for (i = 16; i >= 2; i /= 2) {
-            space = malloc(GGGGC_POOL_BYTES * i);
-            if (space) break;
-        }
-        if (!space) {
-            if (mustSucceed) {
-                perror("malloc");
-                abort();
-            }
-            return NULL;
-        }
-        spaceEnd = space + GGGGC_POOL_BYTES * i;
-
-        /* align it */
-        space = (unsigned char *) GGGGC_POOL_OF(space + GGGGC_POOL_BYTES - 1);
-    }
-
-    ret = (struct GGGGC_Pool *) space;
-    space += GGGGC_POOL_BYTES;
-    ggc_mutex_unlock(&poolLock);
-
-    return ret;
-}
Index: amples/gc_no_raii/pool-alloc/allocate-mmap.c
===================================================================
--- examples/gc_no_raii/pool-alloc/allocate-mmap.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,44 +1,0 @@
-/*
- * Allocation functions (mmap)
- *
- * Copyright (c) 2014, 2015 Gregor Richards
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-static void *allocPool(int mustSucceed)
-{
-    unsigned char *space, *aspace;
-    struct GGGGC_Pool *ret;
-
-    /* allocate enough space that we can align it later */
-    space = mmap(NULL, GGGGC_POOL_BYTES*2, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
-    if (space == NULL) {
-        if (mustSucceed) {
-            perror("mmap");
-            abort();
-        }
-        return NULL;
-    }
-
-    /* align it */
-    ret = GGGGC_POOL_OF(space + GGGGC_POOL_BYTES - 1);
-    aspace = (unsigned char *) ret;
-
-    /* free unused space */
-    if (aspace > space)
-        munmap(space, aspace - space);
-    munmap(aspace + GGGGC_POOL_BYTES, space + GGGGC_POOL_BYTES - aspace);
-
-    return ret;
-}
Index: amples/gc_no_raii/pool-alloc/allocate-win-valloc.c
===================================================================
--- examples/gc_no_raii/pool-alloc/allocate-win-valloc.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,45 +1,0 @@
-/*
- * Allocation functions (mmap)
- *
- * Copyright (c) 2014, 2015 Gregor Richards
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-static void *allocPool(int mustSucceed)
-{
-    unsigned char *space, *aspace;
-    struct GGGGC_Pool *ret;
-
-    /* allocate enough space that we can align it later */
-    space = (unsigned char *)
-        VirtualAlloc(NULL, GGGGC_POOL_BYTES*2, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
-    if (space == NULL) {
-        if (mustSucceed) {
-            perror("mmap");
-            abort();
-        }
-        return NULL;
-    }
-
-    /* align it */
-    ret = GGGGC_POOL_OF(space + GGGGC_POOL_BYTES - 1);
-    aspace = (unsigned char *) ret;
-
-    /* free unused space */
-    if (aspace > space)
-        VirtualFree(space, aspace - space, MEM_RELEASE);
-    VirtualFree(aspace + GGGGC_POOL_BYTES, space + GGGGC_POOL_BYTES - aspace, MEM_RELEASE);
-
-    return ret;
-}
Index: amples/gc_no_raii/premake4.lua
===================================================================
--- examples/gc_no_raii/premake4.lua	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,82 +1,0 @@
-#!lua
-
--- Additional Linux libs: "X11", "Xxf86vm", "Xi", "Xrandr", "stdc++"
-
-includeDirList = {
-	"src/",
-	"../"
-}
-
-libDirectories = {
-
-}
-
-
-if os.get() == "linux" then
-    linkLibs = {
-
-    }
-end
-
--- Build Options:
-buildOptions = {
-      "-g",
-	"-DTEST_FILE=${test}",
-      "\n  test = gctest",
-	"\n  CC = cfa\n  CXX = cfa", }
-
-solution "GC-no-RAII"
-	configurations  { "debug", "release",
-				"cproc-debug", "cproc-release",
-				"cfa-debug", "cfa-release" }
-
-	project "gc-test"
-		kind "ConsoleApp"
-		language "C"
-		location "build"
-		objdir "build"
-		targetdir "."
-		buildoptions (buildOptions)
-		defines {	"bool=_Bool",
-				"\"true=((_Bool)(const signed int)1)\"",
-				"\"false=((_Bool)(const signed int)0)\"",
-				"_GNU_SOURCE",
-				"__cforall"
-			}
-		libdirs (libDirectories)
-		links (linkLibs)
-		linkoptions (linkOptionList)
-		includedirs (includeDirList)
-		files { "src/**.c", "containers/**.c" }
-
-	configuration "debug"
-		defines { "DEBUG" }
-		flags { "Symbols" }
-
-	configuration "release"
-		defines { "NDEBUG" }
-		flags { "Optimize" }
-
-	configuration "cproc-debug"
-		buildoptions ({"-E"})
-		linkoptions ({"-E"})
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
-
-	configuration "cproc-release"
-		buildoptions ({"-E"})
-		linkoptions ({"-E"})
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
-
-	configuration "cfa-debug"
-		linkoptions ({"-E"})
-		files { "build/cproc-debug/*.o" }
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
-
-	configuration "cfa-release"
-		linkoptions ({"-E"})
-		files { "build/cproc-debug/*.o" }
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
Index: amples/gc_no_raii/src/allocate-pool.c
===================================================================
--- examples/gc_no_raii/src/allocate-pool.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,64 +1,0 @@
-#define _BSD_SOURCE /* for MAP_ANON */
-#define _DARWIN_C_SOURCE /* for MAP_ANON on OS X */
-
-#ifdef __cforall
-extern "C"{
-#else
-#error missing cfa define
-#endif
-
-/* for standards info */
-#if defined(unix) || defined(__unix) || defined(__unix__) || \
-    (defined(__APPLE__) && defined(__MACH__))
-#include <unistd.h>
-#endif
-
-#if defined(_WIN32)
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#include <windows.h>
-#endif
-
-#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-
-#if _POSIX_VERSION
-#include <sys/mman.h>
-#endif
-
-/* figure out which allocator to use */
-#if defined(GGGGC_USE_MALLOC)
-#define GGGGC_ALLOCATOR_MALLOC 1
-#include "../pool-alloc/allocate-malloc.c"
-
-#elif _POSIX_ADVISORY_INFO >= 200112L
-#define GGGGC_ALLOCATOR_POSIX_MEMALIGN 1
-#include "../pool-alloc/allocate-malign.c"
-
-#elif defined(MAP_ANON)
-#define GGGGC_ALLOCATOR_MMAP 1
-#include "../pool-alloc/allocate-mmap.c"
-
-#elif defined(_WIN32)
-#define GGGGC_ALLOCATOR_VIRTUALALLOC 1
-#include "../pool-alloc/allocate-win-valloc.c"
-
-#else
-#warning GGGGC: No allocator available other than malloc!
-#define GGGGC_ALLOCATOR_MALLOC 1
-#include "../pool-alloc/allocate-malloc.c"
-
-#endif
-
-void* pal_allocPool(size_t size, int mustSucceed)
-{
-      return allocPool(size, mustSucceed);
-}
-
-#ifdef __cforall
-}
-#endif
Index: amples/gc_no_raii/src/allocate-pool.h
===================================================================
--- examples/gc_no_raii/src/allocate-pool.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,14 +1,0 @@
-#ifndef _GGGGC_ALlOCATE_POOL_H_
-#define _GGGGC_ALlOCATE_POOL_H_
-
-#ifdef __cforall
-extern "C" {
-#endif
-
-void* pal_allocPool(size_t size, int mustSucceed);
-
-#ifdef __cforall
-}
-#endif
-
-#endif
Index: amples/gc_no_raii/src/gc.h
===================================================================
--- examples/gc_no_raii/src/gc.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,21 +1,0 @@
-#pragma once
-
-#include "gcpointers.h"
-#include "internal/collector.h"
-
-// forall(otype T)
-// static inline gcpointer(T) gcmalloc()
-// {
-//     gcpointer(T) ptr = { gc_allocate(sizeof(T)) };
-//     ptr{};
-//     gc_conditional_collect();
-//     return ptr;
-// }
-
-forall(otype T)
-static inline void gcmalloc(gcpointer(T)* ptr)
-{
-	ptr { gc_allocate(sizeof(T)) };
-	get(ptr) {};
-      gc_conditional_collect();
-}
Index: amples/gc_no_raii/src/gcpointers.c
===================================================================
--- examples/gc_no_raii/src/gcpointers.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,147 +1,0 @@
-#include "gcpointers.h"
-
-// #include "gc.h"
-#include "internal/collector.h"
-#include "internal/object_header.h"
-#include "internal/state.h"
-
-void register_ptr(gcpointer_t* this)
-{
-	if(gcpointer_null(this)) return;
-
-	if(gc_is_managed(this))
-	{
-		gc_object_header* obj = gc_get_object_for_ref(gc_get_state(), (void*)this);
-		check(obj);
-		check(is_valid(obj));
-		check(gc_is_managed(this) == gc_is_managed(obj->type_chain) || !obj->type_chain);
-		this->next = obj->type_chain;
-		obj->type_chain = this;
-		check(is_valid(obj));
-	}
-	else
-	{
-		gc_object_header* obj = gc_get_object_ptr((void*)this->ptr);
-		check(obj);
-		check(is_valid(obj));
-		check(!obj->root_chain || this->ptr == obj->root_chain->ptr);
-		check(!obj->root_chain || gc_is_managed(this) == gc_is_managed(obj->root_chain));
-		this->next = obj->root_chain;
-		obj->root_chain = this;
-		check(is_valid(obj));
-	}
-}
-
-void unregister_ptr(gcpointer_t* this)
-{
-	if(gcpointer_null(this)) return;
-
-	gcpointer_t** prev_next_ptr = gc_find_previous_ref(this);
-	check((*prev_next_ptr) == this);
-
-	(*prev_next_ptr) = this->next;
-}
-
-void ?{}(gcpointer_t* this)
-{
-	this->ptr = (intptr_t)NULL;
-	this->next = NULL;
-}
-
-void ?{}(gcpointer_t* this, void* address)
-{
-	this->ptr = (intptr_t)address;
-	this->next = NULL;
-
-	register_ptr(this);
-}
-
-void ?{}(gcpointer_t* this, gcpointer_t other)
-{
-	this->ptr = other.ptr;
-	this->next = NULL;
-
-	register_ptr(this);
-}
-
-void ^?{}(gcpointer_t* this)
-{
-	unregister_ptr(this);
-}
-
-gcpointer_t ?=?(gcpointer_t* this, gcpointer_t rhs)
-{
-	unregister_ptr(this);
-	this->ptr = rhs.ptr;
-	register_ptr(this);
-
-	return *this;
-}
-
-//Logical operators
-bool gcpointer_equal(const gcpointer_t* this, const gcpointer_t* rhs)
-{
-	return this->ptr == rhs->ptr;
-}
-
-bool gcpointer_not_equal(const gcpointer_t* this, const gcpointer_t* rhs)
-{
-	return this->ptr != rhs->ptr;
-}
-
-bool gcpointer_null(const gcpointer_t* this)
-{
-	return this->ptr == (intptr_t)NULL;
-}
-
-#ifndef NDEBUG
-	bool is_valid(const gcpointer_t* this) {
-		if(gcpointer_null(this)) return true;
-
-		gc_object_header* obj = gc_get_object_ptr((void*)this->ptr);
-		check(obj);
-		check(is_valid(obj));
-		check(!obj->root_chain || this->ptr == obj->root_chain->ptr);
-
-		if( !gc_is_managed(this))
-		{
-			check( !(this->next) || this->ptr == this->next->ptr );
-		}
-
-		return true;
-	}
-#endif
-
-forall(otype T) void ?{}(gcpointer(T)* this) {
-	(&this->internal) {};
-}
-
-forall(otype T) void ?{}(gcpointer(T)* this, void* address) {
-	(&this->internal) { address };
-}
-
-forall(otype T) void ?{}(gcpointer(T)* this, gcpointer(T) other) {
-	(&this->internal) { other.internal };
-}
-
-forall(otype T) void ^?{}(gcpointer(T)* this) {
-	^?{}(&this->internal);
-}
-
-forall(otype T) gcpointer(T) ?=?(gcpointer(T)* this, gcpointer(T) rhs) {
-	this->internal = rhs.internal;
-	return *this;
-}
-//
-// forall(otype T) T *?(gcpointer(T) this);
-
-forall(otype T) T* get(gcpointer(T)* this) {
-	return (T*)this->internal.ptr;
-}
-//
-// //Logical operators
-forall(otype T) int ?!=?(gcpointer(T) this, int zero) {
-	return this.internal.ptr != 0;
-}
-// forall(otype T) int ?!=?(gcpointer(T) this, gcpointer(T) rhs);
-// forall(otype T) int ?==?(gcpointer(T) this, gcpointer(T) rhs);
Index: amples/gc_no_raii/src/gcpointers.h
===================================================================
--- examples/gc_no_raii/src/gcpointers.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,51 +1,0 @@
-#pragma once
-
-#include <stdbool.h>
-#include <stdint.h>
-
-forall(dtype T)
-struct gcpointer;
-
-struct gcpointer_t
-{
-	intptr_t ptr;
-	struct gcpointer_t* next;
-};
-
-void ?{}(gcpointer_t* this);
-void ?{}(gcpointer_t* this, void* address);
-void ?{}(gcpointer_t* this, gcpointer_t other);
-void ^?{}(gcpointer_t* this);
-gcpointer_t ?=?(gcpointer_t* this, gcpointer_t rhs);
-
-//Logical operators
-bool gcpointer_equal(gcpointer_t* this, gcpointer_t* rhs);
-bool gcpointer_not_equal(gcpointer_t* this, gcpointer_t* rhs);
-bool gcpointer_null(const gcpointer_t* this);
-
-
-#ifndef NDEBUG
-	bool is_valid(const gcpointer_t* this);
-#endif
-
-forall(dtype T)
-struct gcpointer
-{
-	gcpointer_t internal;
-};
-
-//
-forall(otype T) void ?{}(gcpointer(T)* this);
-forall(otype T) void ?{}(gcpointer(T)* this, void* address);
-forall(otype T) void ?{}(gcpointer(T)* this, gcpointer(T) other);
-forall(otype T) void ^?{}(gcpointer(T)* this);
-forall(otype T) gcpointer(T) ?=?(gcpointer(T)* this, gcpointer(T) rhs);
-
-
-// forall(otype T) T *?(gcpointer(T) this);
-forall(otype T) T* get(gcpointer(T)* this);
-
-//Logical operators
-forall(otype T) int ?!=?(gcpointer(T) this, int zero);
-forall(otype T) int ?!=?(gcpointer(T) this, gcpointer(T) rhs);
-forall(otype T) int ?==?(gcpointer(T) this, gcpointer(T) rhs);
Index: amples/gc_no_raii/src/internal/card_table.h
===================================================================
--- examples/gc_no_raii/src/internal/card_table.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,62 +1,0 @@
-#pragma once
-
-#include "globals.h"
-#include "tools.h"
-
-static inline size_t card_of(void* address)
-{
-	size_t card = ( ((intptr_t)address) & CARDS_OFFSET_MASK ) >> CARDS_SIZE_EXP;
-	checkf(card < CARDS_COUNT, (const char*)"%lu %lu = (%lx & %lx) >> %lu\n", (size_t)CARDS_COUNT, (size_t)card, (size_t)address, (size_t)CARDS_OFFSET_MASK, (size_t)CARDS_SIZE_EXP);
-	check(card < CARDS_COUNT);
-	return card;
-}
-
-struct card_table_t
-{
-	size_t count;
-	void* cards_start[CARDS_COUNT];
-};
-
-static inline void ?{}(card_table_t* this)
-{
-	this->count = 0;
-}
-
-static inline void ^?{}(card_table_t* this)
-{
-
-}
-
-static inline void* object_at(card_table_t* const this, size_t card_number)
-{
-	return card_number < this->count ? this->cards_start[card_number] : NULL;
-}
-
-static inline void register_object(card_table_t* const this, void* object)
-{
-	size_t card = card_of(object);
-	if(card < this->count)
-	{
-		intptr_t card_obj_add = (intptr_t)object_at(this, card);
-		intptr_t obj_add = (intptr_t)object;
-		if(card_obj_add > obj_add)
-		{
-			this->cards_start[card] = object;
-		}
-	}
-	else
-	{
-		check(card == this->count);
-		this->count++;
-		this->cards_start[card] = object;
-	}
-}
-
-static inline void reset(card_table_t* const this)
-{
-	for(size_t i = 0; i < this->count; i++)
-	{
-		this->cards_start[i] = NULL;
-	}
-	this->count = 0;
-}
Index: amples/gc_no_raii/src/internal/collector.c
===================================================================
--- examples/gc_no_raii/src/internal/collector.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,152 +1,0 @@
-#include "collector.h"
-
-#ifdef __cforall
-extern "C" {
-#endif
-#include <string.h>
-#ifdef __cforall
-}
-#endif
-
-#include <fstream.hfa>
-
-#include "state.h"
-#include "gcpointers.h"
-#include "memory_pool.h"
-
-void* gc_finish_alloc_block(void* block, size_t actual_size, size_t target_size);
-void gc_assign_reference(void** ref, gc_object_header* ptr);
-
-gcpointer_t** gc_find_previous_ref(gcpointer_t* target)
-{
-	if(!(target)) return NULL;
-
-	bool managed = gc_is_managed(target);
-	gc_object_header* obj = gc_get_object_ptr((void*)target->ptr);
-
-	check(is_valid(obj));
-
-	gcpointer_t** prev_next_ptr = managed ? &obj->type_chain : &obj->root_chain;
-	while((*prev_next_ptr) && (*prev_next_ptr) != target)
-	{
-		prev_next_ptr = &(*prev_next_ptr)->next;
-	}
-
-	return prev_next_ptr;
-}
-
-void* gc_allocate(size_t target_size)
-{
-	// sout | "Allocating " | target_size | " bytes";
-
-	size_t size = gc_compute_size(target_size + sizeof(gc_object_header));
-
-	// sout | "Object header size: " | sizeof(gc_object_header) | " bytes";
-	// sout | "Actual allocation size: " | size | " bytes";
-
-	check(size < POOL_SIZE_BYTES);
-
-	void* block = NULL;
-	gc_state* gc = gc_get_state();
-
-	if((intptr_t)(block = gc_try_allocate(gc, size))) return gc_finish_alloc_block(block, size, target_size);
-
-	gc_collect(gc);
-
-	if((intptr_t)(block = gc_try_allocate(gc, size))) return gc_finish_alloc_block(block, size, target_size);
-
-	gc_allocate_pool(gc);
-
-	if((intptr_t)(block = gc_try_allocate(gc, size))) return gc_finish_alloc_block(block, size, target_size);
-
-	checkf( (int) 0, "ERROR: allocation in new pool failed");
-
-	return NULL;
-}
-
-void* gc_finish_alloc_block(void* block, size_t actual_size, size_t target_size)
-{
-	intptr_t data = ((intptr_t)block) + sizeof(gc_object_header);
-	void* header = block;
-
-	check( data > ((intptr_t)block));
-	check( data >= ((intptr_t)header));
-	check( gc_is_aligned( (void*)data ) );
-	check( data + target_size <= ((intptr_t)block) + actual_size );
-
-	gc_object_header* obj = placement_ctor(header, actual_size);
-
-	(void)obj; //remove unsused warning since this is for debug
-	check(obj == gc_get_object_ptr( (void*)data ));
-
-	gc_register_allocation(gc_get_state(), actual_size);
-
-	return (void*)data;
-}
-
-void gc_process_reference(void** ref, worklist_t* worklist)
-{
-	check(!gc_is_in_heap(gc_get_state(), ref));
-
-	gc_object_header* ptr = gc_get_object_ptr(*ref);
-	if(ptr)
-	{
-		if(!ptr->is_forwarded)
-		{
-			gc_copy_object(ptr);
-
-			gc_scan_object(ptr->forward, worklist);
-
-			gc_assign_reference(ref, ptr->forward);
-		}
-		else
-		{
-			//duplication to help debug
-			gc_assign_reference(ref, ptr->forward);
-		}
-	}
-}
-
-void gc_assign_reference(void** ref, gc_object_header* ptr)
-{
-	void* address = (void*)(((intptr_t)ptr) + sizeof(gc_object_header));
-
-	gc_write_aligned_ptr(ref, address);
-}
-
-gc_object_header* gc_copy_object(gc_object_header* ptr)
-{
-	check(!ptr->forward);
-	check(!ptr->is_forwarded);
-	check(gc_pool_is_from_space(gc_pool_of(ptr)));
-
-	gc_memory_pool* pool = gc_pool_of(ptr)->mirror;
-
-	void* new_block = gc_pool_allocate(pool, ptr->size, true);
-
-	memcpy(new_block, ptr, ptr->size);
-
-	gc_object_header* fwd_ptr = placement_copy_ctor(new_block, ptr);
-
-	ptr->forward = fwd_ptr;
-	ptr->is_forwarded = true;
-
-	return fwd_ptr;
-}
-
-void gc_scan_object(gc_object_header* object, worklist_t* worklist)
-{
-	gcpointer_t* field = object->type_chain;
-	while(field)
-	{
-		check(((intptr_t)field) > ((intptr_t)object));
-		check(((intptr_t)field) < ((intptr_t)((intptr_t)object) + object->size));
-
-		check(gc_is_in_to_space(gc_get_state(), &field->ptr));
-
-		intptr_t* ref = &field->ptr;
-		push_back(worklist, ref);
-
-		field = field->next;
-	}
-}
Index: amples/gc_no_raii/src/internal/collector.h
===================================================================
--- examples/gc_no_raii/src/internal/collector.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,47 +1,0 @@
-#pragma once
-
-#include <stdlib.h>
-
-#include "tools.h"
-//
-#include "gcpointers.h"
-#include "state.h"
-#include "internal/gc_tools.h"
-#include "internal/globals.h"
-#include "internal/object_header.h"
-#include "internal/state.h"
-#include "tools/worklist.h"
-
-static inline bool gc_is_managed(void* address)
-{
-	return gc_is_in_heap(gc_get_state(), address);
-}
-
-static inline gc_object_header* gc_get_object_ptr(void* ptr)
-{
-	void* clean = gc_get_aligned_ptr(ptr);
-	return ((gc_object_header*)clean) - 1;
-}
-
-static inline struct gc_memory_pool* gc_pool_of(void* address)
-{
-	return (struct gc_memory_pool*)(((intptr_t)address) & POOL_PTR_MASK);
-}
-
-static inline void gc_conditional_collect()
-{
-	if(gc_needs_collect(gc_get_state()))
-	{
-		gc_collect(gc_get_state());
-	}
-}
-
-gcpointer_t** gc_find_previous_ref(gcpointer_t* target);
-
-void* gc_allocate(size_t size);
-
-void gc_process_reference(void** ref, worklist_t* worklist);
-
-struct gc_object_header* gc_copy_object(struct gc_object_header* ptr);
-
-void gc_scan_object(struct gc_object_header* object, worklist_t* worklist);
Index: amples/gc_no_raii/src/internal/gc_tools.h
===================================================================
--- examples/gc_no_raii/src/internal/gc_tools.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,40 +1,0 @@
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "tools.h"
-#include "globals.h"
-
-static inline bool gc_is_aligned(void* address)
-{
-	return (((intptr_t)address) & (~OBJECT_PTR_MASK)) == 0;
-}
-
-static inline void* gc_get_aligned_ptr(void* address)
-{
-	return (void*)(((intptr_t)address) & (OBJECT_PTR_MASK));
-}
-
-static inline void* gc_write_aligned_ptr(void** reference, void* address)
-{
-	size_t ref_last_bits = ((intptr_t)*reference) & (~OBJECT_PTR_MASK);
-
-      size_t new_val = ((intptr_t)address) & OBJECT_PTR_MASK;
-
-      (*reference) = (void*)(new_val | ref_last_bits);
-
-	return *reference;
-}
-
-static inline size_t gc_compute_size(size_t size)
-{
-	size_t word_size = ((size - 1) / OBJECT_ALLIGNMENT) + 1;
-	size_t ret = word_size * OBJECT_ALLIGNMENT;
-
-	check(ret >= size);
-	check((ret % OBJECT_ALLIGNMENT) == 0);
-	check( ((size % OBJECT_ALLIGNMENT) != 0) || (ret == size) );
-
-	return ret;
-}
Index: amples/gc_no_raii/src/internal/globals.h
===================================================================
--- examples/gc_no_raii/src/internal/globals.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,30 +1,0 @@
-#pragma once
-
-// #include <stddef.h>
-// #include <stdint.h>
-//
-// static const size_t POOL_SIZE_EXP = 24;
-// static const size_t POOL_SIZE_BYTES = 0x1 << POOL_SIZE_EXP;
-// static const size_t POOL_PTR_MASK = ~(POOL_SIZE_BYTES - 1);
-//
-// static const size_t CARDS_SIZE_EXP = 12;
-// static const size_t CARDS_SIZE_BYTES = 0x1 << CARDS_SIZE_EXP;
-// static const size_t CARDS_OFFSET_MASK = (~(CARDS_SIZE_BYTES - 1)) & (POOL_SIZE_BYTES - 1);
-// static const size_t CARDS_COUNT = POOL_SIZE_BYTES / CARDS_SIZE_BYTES;
-//
-// static const size_t OBJECT_ALLIGNMENT = sizeof(size_t);
-// static const size_t OBJECT_PTR_MASK = ~(OBJECT_ALLIGNMENT - 1);
-
-enum {
-	POOL_SIZE_EXP 	= 24,
-	POOL_SIZE_BYTES 	= 0x1 << POOL_SIZE_EXP,
-	POOL_PTR_MASK 	= ~(POOL_SIZE_BYTES - 1),
-
-	CARDS_SIZE_EXP 	= 12,
-	CARDS_SIZE_BYTES 	= 0x1 << CARDS_SIZE_EXP,
-	CARDS_OFFSET_MASK	= (~(CARDS_SIZE_BYTES - 1)) & (POOL_SIZE_BYTES - 1),
-	CARDS_COUNT 	= POOL_SIZE_BYTES / CARDS_SIZE_BYTES,
-
-	OBJECT_ALLIGNMENT	= sizeof(size_t),
-	OBJECT_PTR_MASK 	= ~(OBJECT_ALLIGNMENT - 1),
-};
Index: amples/gc_no_raii/src/internal/memory_pool.c
===================================================================
--- examples/gc_no_raii/src/internal/memory_pool.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,160 +1,0 @@
-#include "memory_pool.h"
-
-extern "C" {
-	#include <stdlib.h>
-	#include <string.h>
-}
-
-#include "collector.h"
-#include "object_header.h"
-
-const size_t gc_pool_header_size = (size_t)(  &(((gc_memory_pool*)NULL)->start_p) );
-
-void ?{}(gc_memory_pool* this, size_t size, gc_memory_pool* next, gc_memory_pool* mirror, uint8_t type)
-{
-	this->mirror = mirror;
-	this->next = next;
-	this->type_code = type;
-
-	this->cards = ( (card_table_t*)malloc(sizeof(card_table_t)) ){};
-
-	this->end_p = ((uint8_t*)this) + size;
-	this->free_p = this->start_p;
-
-	check( gc_pool_of( (void*)this ) == this);
-	check(this->cards);
-	gc_reset_pool(this);
-}
-
-void ^?{}(gc_memory_pool* this)
-{
-	^(&this->cards){};
-	free(this->cards);
-}
-
-void gc_reset_pool(gc_memory_pool *const this)
-{
-	this->free_p = this->start_p;
-	#ifndef NDEBUG
-		memset(this->start_p, 0xCD, gc_pool_size_total(this));
-	#endif
-
-	check(this->cards);
-	reset(this->cards);
-
-	check(gc_pool_size_left(this) == gc_pool_size_total(this));
-}
-
-void* gc_pool_allocate(gc_memory_pool *const this, size_t size, bool zero)
-{
-	void* ret = this->free_p;
-
-	this->free_p += size;
-
-	if (zero) memset(ret, 0x00, size);
-
-	check(this->cards);
-	register_object(this->cards, ret);
-
-	return ret;
-}
-
-void ?{}(	gc_pool_object_iterator* this,
-		struct gc_object_header* start_object
-		#ifndef NDEBUG
-			, intptr_t pool_start
-			, intptr_t pool_end
-		#endif
-	)
-{
-	this->object = start_object;
-	#ifndef NDEBUG
-		this->lower_limit = pool_start;
-		this->upper_limit = pool_end;
-	#endif
-
-	check( ((intptr_t)start_object) >= this->lower_limit );
-	check( ((intptr_t)start_object) <= this->upper_limit );
-}
-
-void ^?{}( gc_pool_object_iterator* this ) {}
-
-gc_pool_object_iterator gc_pool_iterator_for(gc_memory_pool* const this, void* member)
-{
-	size_t card = card_of(member);
-	intptr_t member_add = (intptr_t)member;
-	intptr_t start_obj;
-
-	do
-	{
-		check(card < CARDS_COUNT);
-		start_obj = (intptr_t)object_at(this->cards, card);
-		check(card != 0 || start_obj);
-		card--;
-	}
-	while(start_obj > member_add || !(start_obj));
-
-	check( start_obj );
-
-	struct gc_object_header* start_obj_typed = (struct gc_object_header*)start_obj;
-
-	return (gc_pool_object_iterator) {
-		start_obj_typed
-		#ifndef NDEBUG
-			, (intptr_t)this->start_p
-			, (intptr_t)this->free_p
-		#endif
-	};
-}
-
-bool ?!=?(const gc_pool_object_iterator lhs, const gc_pool_object_iterator rhs)
-{
-	return lhs.object != rhs.object;
-}
-
-gc_pool_object_iterator begin(gc_memory_pool* const this)
-{
-	struct gc_object_header* start_obj = (struct gc_object_header*)this->start_p;
-	return (gc_pool_object_iterator) {
-		start_obj
-		#ifndef NDEBUG
-			, (intptr_t)this->start_p
-			, (intptr_t)this->free_p
-		#endif
-	};
-}
-
-gc_pool_object_iterator end(gc_memory_pool* const this)
-{
-	return (gc_pool_object_iterator) {
-		(struct gc_object_header*)this->free_p
-		#ifndef NDEBUG
-			, (intptr_t)this->start_p
-			, (intptr_t)this->free_p
-		#endif
-	};
-}
-
-gc_pool_object_iterator* ++?(gc_pool_object_iterator* it)
-{
-	struct gc_object_header* object = it->object;
-	intptr_t next_ptr = ((intptr_t)object) + object->size;
-	check(next_ptr > it->lower_limit);
-	check(next_ptr <= it->upper_limit);
-
-	struct gc_object_header* next_obj = ((struct gc_object_header*)next_ptr);
-	check(next_ptr == it->upper_limit || is_valid(next_obj));
-
-	it->object = next_obj;
-	return it;
-}
-
-const struct gc_object_header* *?(const gc_pool_object_iterator it)
-{
-	return it.object;
-}
-
-struct gc_object_header* *?(gc_pool_object_iterator it)
-{
-	return it.object;
-}
Index: amples/gc_no_raii/src/internal/memory_pool.h
===================================================================
--- examples/gc_no_raii/src/internal/memory_pool.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,92 +1,0 @@
-#pragma once
-
-extern "C" {
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-}
-
-#include "tools.h"
-
-#include "card_table.h"
-#include "globals.h"
-#include "state.h"
-
-struct gc_memory_pool
-{
-	struct memory_pool* mirror;
-	struct memory_pool* next;
-
-	uint8_t type_code;
-
-	card_table_t* cards;
-
-	uint8_t* end_p;
-	uint8_t* free_p;
-	uint8_t start_p[1];
-};
-
-void ?{}(	gc_memory_pool* this,
-		size_t size,
-		gc_memory_pool* next,
-		gc_memory_pool* mirror,
-		uint8_t type
-	);
-
-void ^?{}(gc_memory_pool* this);
-
-struct gc_pool_object_iterator
-{
-	struct gc_object_header* object;
-	#ifndef NDEBUG
-		intptr_t lower_limit;
-		intptr_t upper_limit;
-	#endif
-};
-
-
-void ?{}( 	gc_pool_object_iterator* this,
-		struct gc_object_header* start_object
-		#ifndef NDEBUG
-			, intptr_t pool_start
-			, intptr_t pool_end
-		#endif
-	);
-
-void ^?{}( gc_pool_object_iterator* this );
-
-bool ?!=?(const gc_pool_object_iterator lhs, const gc_pool_object_iterator rhs);
-
-gc_pool_object_iterator begin(gc_memory_pool* const this);
-gc_pool_object_iterator end(gc_memory_pool* const);
-
-gc_pool_object_iterator* ++?(gc_pool_object_iterator* it);
-
-const struct gc_object_header* *?(const gc_pool_object_iterator it);
-struct gc_object_header* *?(gc_pool_object_iterator it);
-
-static inline bool gc_pool_is_from_space(const gc_memory_pool* pool)
-{
-	return gc_from_space_code(gc_get_state()) == pool->type_code;
-}
-
-void gc_reset_pool(gc_memory_pool* const pool);
-
-static inline size_t gc_pool_size_used(const gc_memory_pool* pool)
-{
-	return pool->free_p - pool->start_p;
-}
-
-static inline size_t gc_pool_size_total(const gc_memory_pool* pool)
-{
-	return pool->end_p - pool->start_p;
-}
-
-static inline size_t gc_pool_size_left(const gc_memory_pool* pool)
-{
-	return pool->end_p - pool->free_p;
-}
-
-void* gc_pool_allocate(gc_memory_pool* const pool, size_t size, bool zero);
-
-gc_pool_object_iterator gc_pool_iterator_for(gc_memory_pool* const pool, void* member);
Index: amples/gc_no_raii/src/internal/object_header.c
===================================================================
--- examples/gc_no_raii/src/internal/object_header.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,110 +1,0 @@
-#include "object_header.h"
-
-#include <stdint.h>
-
-#include "collector.h"
-#include "globals.h"
-#include "gcpointers.h"
-
-void ctor(gc_object_header* const this, size_t inSize)
-{
-	#ifndef NDEBUG
-		this->canary_start = CANARY_VALUE;
-	#endif
-
-	this->size = inSize;
-	this->root_chain = NULL;
-	this->type_chain = NULL;
-	this->forward = NULL;
-	this->is_forwarded = false;
-
-	#ifndef NDEBUG
-		this->canary_end = CANARY_VALUE;
-	#endif
-}
-
-void copy_ctor(gc_object_header* const this, const gc_object_header* const other)
-{
-	#ifndef NDEBUG
-		this->canary_start = CANARY_VALUE;
-	#endif
-
-	this->size = other->size;
-	this->root_chain = other->root_chain;
-	this->type_chain = NULL;
-	this->forward = NULL;
-	this->is_forwarded = false;
-
-	#ifndef NDEBUG
-		this->canary_end = CANARY_VALUE;
-	#endif
-
-	gcpointer_t* root = this->root_chain;
-	while(root)
-	{
-		check(gc_get_object_ptr( (void*)root->ptr ) == other);
-		root->ptr = ((intptr_t)this) + sizeof(gc_object_header);
-
-		check(gc_get_object_ptr( (void*)root->ptr ) == this);
-		root = root->next;
-	}
-
-	gcpointer_t* type = other->type_chain;
-
-	while(type)
-	{
-		check((intptr_t)type < (intptr_t)((intptr_t)other + other->size));
-
-		size_t offset = (intptr_t)type - (intptr_t)other;
-		check(offset < this->size);
-
-		gcpointer_t* member_ptr = (gcpointer_t*)( (intptr_t)this + offset );
-
-		if(!this->type_chain) this->type_chain = member_ptr;
-
-		size_t next_offset = type->next ? (intptr_t)type->next - (intptr_t)other : 0;
-		check(next_offset < this->size);
-
-		gcpointer_t* next_ptr = type->next ? (gcpointer_t*)((intptr_t)this + next_offset) : NULL;
-
-		member_ptr->ptr = type->ptr;
-		member_ptr->next = next_ptr;
-
-		type = type->next;
-	}
-
-	check(is_valid(this));
-}
-
-#ifndef NDEBUG
-	bool is_valid(const gc_object_header* const this)
-	{
-		check((intptr_t)this->canary_start == (intptr_t)CANARY_VALUE);
-		check((intptr_t)this->canary_end == (intptr_t)CANARY_VALUE);
-
-		check(this->is_forwarded == ( (intptr_t)this->forward != (intptr_t)NULL));
-
-		check(this->size < POOL_SIZE_BYTES);
-
-		gcpointer_t* root = this->root_chain;
-		while(root)
-		{
-			checkf(gc_get_object_ptr( (void*)root->ptr ) == this, (const char*)"Expected %lX got %lX\n", gc_get_object_ptr( (void*)root->ptr ), this);
-
-			root = root->next;
-		}
-
-		gcpointer_t* type = this->type_chain;
-		while(type)
-		{
-			check((intptr_t)type > (intptr_t)this);
-			check((intptr_t)type < (intptr_t)(((intptr_t)this) + this->size));
-
-			type = type->next;
-		}
-
-		return true;
-	}
-	#else
-	#error blarg
-#endif
Index: amples/gc_no_raii/src/internal/object_header.h
===================================================================
--- examples/gc_no_raii/src/internal/object_header.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,52 +1,0 @@
-#pragma once
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include "tools.h"
-
-#ifndef NDEBUG
-	static void* const CANARY_VALUE = (void*)0xCAFEBABACAFEBABA;
-#endif
-
-struct gcpointer_t;
-struct gc_object_header;
-
-struct gc_object_header
-{
-	#ifndef NDEBUG
-		void* canary_start;
-	#endif
-
-	size_t		size;
-	gcpointer_t* 	root_chain;
-	gcpointer_t*	type_chain;
-	gc_object_header*	forward;
-	bool			is_forwarded;
-
-	#ifndef NDEBUG
-		void* canary_end;
-	#endif
-};
-
-void ctor(gc_object_header* const this, size_t size);
-void copy_ctor(gc_object_header* const this, const gc_object_header* const other);
-
-static inline gc_object_header* placement_ctor(void* address, size_t size)
-{
-	gc_object_header* const this = (gc_object_header* const) address;
-	ctor(this, size);
-	return this;
-}
-
-static inline gc_object_header* placement_copy_ctor(void* address, const gc_object_header* const other)
-{
-	gc_object_header* const this = (gc_object_header* const) address;
-	copy_ctor(this, other);
-	return this;
-}
-
-#ifndef NDEBUG
-	bool is_valid(const gc_object_header* const this);
-#endif
Index: amples/gc_no_raii/src/internal/state.c
===================================================================
--- examples/gc_no_raii/src/internal/state.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,312 +1,0 @@
-#include "state.h"
-
-#include <stdlib.hfa>
-
-//general purpouse includes
-#include "tools.h"
-
-//platform abstraction includes
-#include "allocate-pool.h"
-
-//gc internal includes
-#include "collector.h"
-#include "globals.h"
-#include "memory_pool.h"
-#include "object_header.h"
-#include "tools/worklist.h"
-
-void gc_state_swap(gc_state *const this);
-void gc_state_sweep_roots(gc_state *const this, worklist_t* worklist);
-void gc_state_clear(gc_state *const this);
-void gc_state_calc_usage(gc_state *const this);
-
-#ifndef NDEBUG
-	bool gc_state_roots_match(gc_state *const this);
-	bool gc_state_no_from_space_ref(gc_state *const this);
-#endif
-
-static gc_state s;
-
-gc_state* gc_get_state()
-{
-	if(!s.is_initialized) ctor(&s);
-	return &s;
-}
-
-void ctor(gc_state *const this)
-{
-	this->from_code = 0;
-	this->to_space = NULL;
-	this->from_space = NULL;
-	this->total_space = 0;
-	this->used_space = 0;
-	ctor(&this->pools_table);
-
-	gc_allocate_pool(this);
-
-	this->is_initialized = true;
-}
-
-void dtor(gc_state *const this)
-{
-	dtor(&this->pools_table);
-	this->is_initialized = false;
-}
-
-bool gc_is_in_heap(const gc_state* const this, const void* const address)
-{
-	gc_memory_pool* target_pool = gc_pool_of(address);
-
-	gc_memory_pool** first = cbegin(&this->pools_table);
-	gc_memory_pool** last = cend(&this->pools_table);
-	gc_memory_pool** result = find(first, &last, target_pool);
-	return result != last && gc_pool_is_from_space(*result);
-}
-
-bool gc_is_in_to_space(const gc_state* const this, const void* const address)
-{
-	gc_memory_pool* target_pool = gc_pool_of(address);
-
-	gc_memory_pool** first = cbegin(&this->pools_table);
-	gc_memory_pool** last = cend(&this->pools_table);
-	gc_memory_pool** result = find(first, &last, target_pool);
-	return result != last && !gc_pool_is_from_space(*result);
-}
-
-gc_object_header* gc_get_object_for_ref(gc_state* state, void* member)
-{
-	volatile int stage = 0;
-	intptr_t target = ((intptr_t)member);
-	if(!gc_is_in_heap(state, member)) return NULL;
-	stage++;
-
-	gc_memory_pool* pool = gc_pool_of(member);
-	stage++;
-	gc_pool_object_iterator it = gc_pool_iterator_for(pool, member);
-	stage++;
-	gc_pool_object_iterator end = end(pool);
-	stage++;
-
-	while(it != end)
-	{
-		gc_object_header* object = *it;
-		check(object);
-		check( is_valid(object) );
-		{
-			intptr_t start = ((intptr_t)object);
-			intptr_t end = ((intptr_t)start + object->size);
-			if(start < target && end > target)
-			{
-				return object;
-			}
-		}
-		stage++;
-		++it;
-	}
-
-	checkf( (int) 0, "is_in_heap() and iterator_for() return inconsistent data");
-	abort();
-	return NULL;
-}
-
-void* gc_try_allocate(gc_state* const this, size_t size)
-{
-	gc_memory_pool* pool = this->from_space;
-	while(pool != (gc_memory_pool*)0)
-	{
-		if(gc_pool_size_left(pool) > size)
-		{
-			return gc_pool_allocate(pool, size, true);
-		}
-		pool = pool->next;
-	}
-
-	return (void*)0;
-}
-
-void gc_allocate_pool(gc_state *const this)
-{
-	gc_memory_pool* old_from_space = this->from_space;
-      gc_memory_pool* old_to_space = this->to_space;
-
-      this->from_space = (gc_memory_pool*)(pal_allocPool(POOL_SIZE_BYTES, 1));
-      this->to_space   = (gc_memory_pool*)(pal_allocPool(POOL_SIZE_BYTES, 1));
-
-      this->from_space{ POOL_SIZE_BYTES, old_from_space, this->to_space,   this->from_code };
-      this->to_space  { POOL_SIZE_BYTES, old_to_space,   this->from_space, (~this->from_code) & 0x01 };
-
-	this->total_space += gc_pool_size_used(this->from_space);
-
-	push_back(&this->pools_table, this->from_space);
-	push_back(&this->pools_table, this->to_space);
-}
-
-void gc_collect(gc_state* const this)
-{
-	// DEBUG("collecting");
-	// DEBUG("previous usage " << this->used_space << " / " << this->total_space);
-
-	worklist_t worklist;
-	ctor(&worklist);
-	gc_state_sweep_roots(this, &worklist);
-
-	while(!empty(&worklist))
-	{
-		intptr_t* ref = back(&worklist);
-		pop_back(&worklist);
-		gc_process_reference((void**)ref, &worklist);
-	}
-
-	check(gc_state_roots_match(this));
-	check(gc_state_no_from_space_ref(this));
-
-	gc_state_swap(this);
-
-	gc_state_calc_usage(this);
-
-	if(gc_needs_collect(this)) gc_allocate_pool(this);
-
-	// DEBUG("done");
-	dtor(&worklist);
-}
-
-void gc_state_swap(gc_state* const this)
-{
-	swap(&this->from_space, &this->to_space);
-
-	gc_memory_pool* pool = this->to_space;
-	while(pool)
-	{
-		gc_reset_pool(pool);
-		pool = pool->next;
-	}
-
-	this->from_code = (~this->from_code) & 0x01;
-
-	#ifndef NDEBUG
-		{
-			gc_memory_pool* pool = this->from_space;
-			while(pool)
-			{
-				check(gc_pool_is_from_space(pool));
-				pool = pool->next;
-			}
-
-			pool = this->to_space;
-			while(pool)
-			{
-				check(!gc_pool_is_from_space(pool));
-				pool = pool->next;
-			}
-		}
-	#endif
-}
-
-void gc_state_sweep_roots(gc_state* const this, worklist_t* worklist)
-{
-	gc_memory_pool* pool = this->from_space;
-	while(pool)
-	{
-		gc_pool_object_iterator it = begin(pool);
-		gc_pool_object_iterator end = end(pool);
-		for(;it != end; ++it)
-		{
-			gc_object_header* object = *it;
-			if(!object->root_chain) continue;
-
-			gc_copy_object(object);
-
-			gc_scan_object(object->forward, worklist);
-		}
-
-		pool = pool->next;
-	}
-}
-
-void gc_state_clear(gc_state* const this)
-{
-	gc_memory_pool* pool = this->from_space;
-	while(pool)
-	{
-		gc_reset_pool(pool);
-		pool = pool->next;
-	}
-
-	pool = this->to_space;
-	while(pool)
-	{
-		gc_reset_pool(pool);
-		pool = pool->next;
-	}
-}
-
-void gc_state_calc_usage(gc_state* const this)
-{
-	this->total_space = 0;
-	this->used_space = 0;
-
-	gc_memory_pool* pool = this->from_space;
-	while(pool)
-	{
-		size_t size = gc_pool_size_total(pool);
-		size_t used = gc_pool_size_used(pool);
-		check(used <= size);
-		this->total_space += size;
-		this->used_space += used;
-
-		pool = pool->next;
-	}
-}
-
-#ifndef NDEBUG
-	bool gc_state_roots_match(gc_state* const this)
-	{
-		gc_memory_pool* pool = this->to_space;
-		while(pool)
-		{
-			size_t size = 0;
-			gc_pool_object_iterator it = begin(pool);
-			gc_pool_object_iterator end = end(pool);
-			for(;it != end; ++it)
-			{
-				gc_object_header* object = *it;
-				size += object->size;
-
-				gcpointer_t* ptr = object->root_chain;
-				while(ptr)
-				{
-					check(gc_get_object_ptr( (void*)ptr->ptr ) == object);
-					ptr = ptr->next;
-				}
-			}
-
-			checkf(size + gc_pool_size_left(pool) == gc_pool_size_total(pool),
-				(const char*)"expected %lu + %lu == %lu\n",
-				(size_t)size,
-				(size_t)gc_pool_size_left(pool),
-				(size_t)gc_pool_size_total(pool));
-
-			pool = pool->next;
-		}
-
-		return true;
-	}
-
-	bool gc_state_no_from_space_ref(gc_state* const this)
-	{
-		gc_memory_pool* pool = this->to_space;
-		while(pool)
-		{
-			void** potential_ref = (void**)pool->start_p;
-			while(potential_ref < (void**)pool->free_p)
-			{
-				check(!gc_is_in_heap(this, *potential_ref));
-				potential_ref++;
-			}
-
-			pool = pool->next;
-		}
-
-		return true;
-	}
-#endif
Index: amples/gc_no_raii/src/internal/state.h
===================================================================
--- examples/gc_no_raii/src/internal/state.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,64 +1,0 @@
-#pragma once
-
-#ifdef __cforall
-extern "C" {
-#endif
-#include <stddef.h>
-#include <stdint.h>
-#ifdef __cforall
-}
-#endif
-#include <fstream.hfa>
-#include <vector>
-
-#include "tools.h"
-
-typedef vector(struct gc_memory_pool*, heap_allocator(struct gc_memory_pool*)) pools_table_t;
-
-struct gc_state
-{
-	bool is_initialized;
-	uint8_t from_code;
-	struct gc_memory_pool* to_space;
-	struct gc_memory_pool* from_space;
-
-	size_t total_space;
-	size_t used_space;
-
-	pools_table_t 	pools_table;
-	size_t 		pools_table_count;
-};
-
-void ctor(gc_state* const state);
-
-void dtor(gc_state* const state);
-
-gc_state* gc_get_state();
-
-static inline bool gc_needs_collect(gc_state* state)
-{
-	// sout | "Used Space: " | state->used_space | " bytes";
-	return state->used_space * 2 > state->total_space;
-}
-
-void gc_collect(gc_state* const this);
-
-void* gc_try_allocate(gc_state* const this, size_t size);
-
-void gc_allocate_pool(gc_state* const state);
-
-bool gc_is_in_heap(const gc_state* const state, const void* const address);
-
-bool gc_is_in_to_space(const gc_state* const state, const void* const address);
-
-static inline uint8_t gc_from_space_code(const gc_state *const this)
-{
-	return this->from_code;
-}
-
-struct gc_object_header* gc_get_object_for_ref(gc_state* state, void*);
-
-static inline void gc_register_allocation(gc_state* state, size_t size)
-{
-	state->used_space += size;
-}
Index: amples/gc_no_raii/src/test_include.c
===================================================================
--- examples/gc_no_raii/src/test_include.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,4 +1,0 @@
-/* definition to expand macro for string conversion*/
-#define xstr(s) sstr(s)
-#define sstr(s) #s
-#include xstr(../test/TEST_FILE.c)
Index: amples/gc_no_raii/src/tools.h
===================================================================
--- examples/gc_no_raii/src/tools.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,37 +1,0 @@
-#pragma once
-
-#include "tools/checks.h"
-#include "tools/print.h"
-
-// forall(otype T)
-// inline void swap(T* const a, T* const b)
-// {
-// 	T* temp = a;
-// 	*a = *b;
-// 	*b = *temp;
-// }
-
-trait has_equal(otype T)
-{
-	signed int ?==?(T a, T b);
-};
-
-trait InputIterator_t(otype T, otype InputIterator)
-{
-	signed int ?==?(InputIterator a, InputIterator b);
-	signed int ?!=?(InputIterator a, InputIterator b);
-	T *?(InputIterator a);
-	InputIterator ++?(InputIterator* a);
-	InputIterator ?++(InputIterator* a);
-};
-
-forall(otype T | has_equal(T), otype InputIterator | InputIterator_t(T, InputIterator))
-static inline InputIterator find( InputIterator first, const InputIterator* const last, T val)
-{
-	while ( first != *last)
-	{
-		if(*first == val) return first;
-		++first;
-	}
-	return *last;
-}
Index: amples/gc_no_raii/src/tools/checks.h
===================================================================
--- examples/gc_no_raii/src/tools/checks.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,29 +1,0 @@
-#pragma once
-
-#ifdef NDEBUG
-
-#define check(x)
-
-#define checkf(x, format, ...)
-
-#warning no debug checks
-
-#else
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#define check(x) do {\
-	if(!(x)) {\
-		printf("CHECK failed : %s at %s:%i\n", #x, __FILE__, __LINE__);\
-		abort();\
-	}}while( (int)0 )\
-
-#define checkf(x, ...) do {\
-	if(!(x)) {\
-		printf("CHECK failed : %s at %s:%i\n", #x, __FILE__, __LINE__);\
-		printf(__VA_ARGS__);\
-		abort();\
-	}}while( (int)0 )\
-
-#endif //NO_CHECKS
Index: amples/gc_no_raii/src/tools/print.c
===================================================================
--- examples/gc_no_raii/src/tools/print.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,5 +1,0 @@
-#include "tools.h"
-
-#ifndef NDEBUG
-	// ofstream *sout = ofstream_stdout();
-#endif
Index: amples/gc_no_raii/src/tools/print.h
===================================================================
--- examples/gc_no_raii/src/tools/print.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,13 +1,0 @@
-#pragma once
-
-// #ifndef NDEBUG
-//
-// #include <fstream.hfa>
-//
-// #define DEBUG_OUT(x) sout | x;
-//
-// #else
-
-#define DEBUG_OUT(x)
-
-// #endif //NO_CHECKS
Index: amples/gc_no_raii/src/tools/worklist.h
===================================================================
--- examples/gc_no_raii/src/tools/worklist.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,14 +1,0 @@
-#pragma once
-
-#ifdef __cforall
-extern "C" {
-#endif
-#include <stddef.h>
-#include <stdint.h>
-#ifdef __cforall
-}
-#endif
-
-#include <vector.hfa>
-
-typedef vector(intptr_t*, heap_allocator(intptr_t*)) worklist_t;
Index: amples/gc_no_raii/test/badlll.c
===================================================================
--- examples/gc_no_raii/test/badlll.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,71 +1,0 @@
-#include "gc.h"
-
-#include <stdio.h>
-
-struct List_t
-{
-	gcpointer(List_t) next;
-	int val;
-};
-
-typedef gcpointer(List_t) LLL;
-
-#define MAX (1024 * 1)
-
-LLL buildLLL(int sz)
-{
-	int i = 0;
-	LLL ll0;
-
-	gcmalloc( &ll0 );
-	List_t* ll0_ptr = get( &ll0 );
-	ll0_ptr->val = i;
-	LLL lll = ll0;
-
-	for (i = 1; i < sz; i++)
-	{
-		LLL llc;
-		gcmalloc( &llc );
-		List_t* llc_ptr = get( &llc );
-		llc_ptr->val = i;
-		List_t* lll_ptr = get( &lll );
-		lll_ptr->next = llc;
-
-		lll = llc;
-	}
-
-	check(is_valid( &ll0.internal ));
-
-	return ll0;
-}
-
-void testLLL(LLL lll)
-{
-	unsigned char *counted;
-
-	counted = (unsigned char *) calloc(MAX, sizeof(unsigned char));
-	while (lll)
-	{
-		List_t* lll_ptr = get( &lll );
-		counted[lll_ptr->val]++;
-		if (counted[lll_ptr->val] > 1)
-		{
-			fprintf(stderr, "ERROR! Encountered %d twice!\n", lll_ptr->val);
-			exit(1);
-		}
-		lll = lll_ptr->next;
-	}
-
-	return;
-}
-
-int main(void)
-{
-	LLL mylll;
-
-	mylll = buildLLL(MAX);
-
-	testLLL(mylll);
-
-	return 0;
-}
Index: amples/gc_no_raii/test/gctest.c
===================================================================
--- examples/gc_no_raii/test/gctest.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,25 +1,0 @@
-#include <fstream.hfa>
-
-#include "gc.h"
-#include "internal/collector.h"
-
-#warning default test
-
-int main() {
-	sout | "Bonjour au monde!\n";
-
-	gcpointer(int) theInt;
-	gcmalloc(&theInt);
-
-	for(int i = 0; i < 10; i++) {
-		int a;
-		{
-			gcpointer(int) anInt;
-			gcmalloc(&anInt);
-		}
-		int p;
-	}
-
-	gc_collect(gc_get_state());
-	gc_conditional_collect();
-}
Index: amples/gc_no_raii/test/operators.c
===================================================================
--- examples/gc_no_raii/test/operators.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,22 +1,0 @@
-#include "gc.h"
-
-#include <assert.h>
-
-int main(int argc, char *argv[])
-{
-	gcpointer(int) test, test1;
-
-	if(test != test1) { return 1; }
-	if(test == test1) { return 1; }
-	// if(test == 0)  { return 1; }
-	// if(test != 0)  { return 1; }
-	// if(test) { return 1; }
-
-	// *test.internal.ptr = 3;
-	// int i = *test;
-
-	gcmalloc();
-	// test = gcmalloc();
-
-	return 0;
-}
Index: amples/hashtable.cfa
===================================================================
--- examples/hashtable.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,243 +1,0 @@
-
-#include <containers/list.hfa>
-
-#include <exception.hfa>
-TRIVIAL_EXCEPTION(ht_fill_limit_crossed);
-
-
-
-void defaultResumptionHandler(ht_fill_limit_crossed &) {
-    printf("default resumption ht_fill_limit_crossed\n");
-}
-
-void defaultTerminationHandler(ht_fill_limit_crossed &) = void;
-
-
-trait has_hash( otype K ) {
-    size_t hash(K);
-    int ?==?( K, K );
-};
-
-trait hkey( otype K, dtype tN | has_hash(K) ) {
-    K key(tN &);
-};
-
-forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) ) {
-
-    struct hashtable {
-
-        size_t n_buckets;
-        dlist(tN, tE) *buckets;
-        
-        size_t item_count;
-        float ff_next_warn_up;
-
-        void (*defaultResumptionHandler) (ht_fill_limit_crossed &);
-    };
-
-    void ?{}( hashtable(K, tN, tE) & this ) = void;
-}
-
-forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) | { void defaultResumptionHandler(ht_fill_limit_crossed &); } ) {
-
-    void ?{}( hashtable(K, tN, tE) & this, size_t n_buckets, dlist(tN, tE) *buckets ) {
-
-        this.n_buckets = n_buckets;
-        this.buckets = buckets;
-
-        this.item_count = 0;
-        this.ff_next_warn_up = 0.5;
-
-        this.defaultResumptionHandler = defaultResumptionHandler;
-
-        for ( i; n_buckets ) {
-            ?{}( this.buckets[i] );
-        }
-    }
-}
-
-forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) ) {
-
-    float fill_frac( hashtable(K, tN, tE) & this ) with(this) {
-        return ((float)item_count) / n_buckets;
-    }
-
-    size_t bucket_of( hashtable(K, tN, tE) & this, K k ) {
-        return hash(k) % this.n_buckets;
-    }
-
-    tE & get( hashtable(K, tN, tE) & this, K k ) with (this) {
-
-        dlist(tN, tE) & bucket = buckets[ bucket_of(this, k) ];
-
-        for ( tN * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
-            if ( key(*item) == k ) {
-                return *item;
-            }
-        }
-
-        return *0p;
-    }
-
-    void check_ff_warning( hashtable(K, tN, tE) & this ) with (this) {
-        if (fill_frac(this) > ff_next_warn_up) {
-            throwResume (ht_fill_limit_crossed){};
-            ff_next_warn_up *= 2;
-        }
-    }
-
-    void put( hashtable(K, tN, tE) & this, tE & v ) with (this) {
-
-        check_ff_warning(this);
-
-        K k = key( $tempcv_e2n(v) );
-        dlist(tN, tE) & bucket = buckets[ bucket_of(this, k) ];
-
-        for ( tN * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
-            if ( key(*item) == k ) {
-                remove(*item);
-                break;
-            }
-        }
-
-        insert_first(bucket, v);
-        this.item_count ++;
-    }
-
-}
-
-// tactical usage:
-// HASHTABLE_STATIC(int, item_by_prority, item, n, ht)
-//
-// intended equivalent:
-// hashtable_static(int, item_by_prority, item, Z(n)) ht;
-#define HASHTABLE_STATIC(K, tN, tE, n_buckets, obj) \
-    struct __hashtable_static_ ## obj { \
-        inline hashtable(K, tN, tE); \
-        dlist(tN, tE) $items[n_buckets]; \
-    }; \
-    void ?{}( __hashtable_static_ ## obj & this )  { \
-        ((hashtable(K, tN, tE) &)this){ n_buckets, this.$items }; \
-    } \
-    __hashtable_static_ ## obj obj;
-
-
-
-trait heaped(dtype T) {
-    T * alloc( size_t );
-    void free( void * ); 
-};
-
-void __dynamic_defaultResumptionHandler(ht_fill_limit_crossed & ex) {
-    printf("dynamic limit crossed\n");
-}
-
-forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) | heaped( dlist(tN, tE) ) ) {
-
-    struct hashtable_dynamic { 
-        inline hashtable(K, tN, tE); 
-    };
-    void ?{}( hashtable_dynamic(K, tN, tE) & this, size_t n_buckets )  {
-        void (*defaultResumptionHandler) (ht_fill_limit_crossed &) = __dynamic_defaultResumptionHandler;
-        dlist(tN, tE) *buckets = alloc(n_buckets);
-        ((hashtable(K, tN, tE) &)this){ n_buckets, buckets };
-    }
-    void ^?{}( hashtable_dynamic(K, tN, tE) & this ) {
-        free(this.buckets);
-    }
-}
-
-
-
-
-struct request {
-
-    unsigned int src_id;
-    unsigned int tgt_id;
-
-    DLISTED_MGD_EXPL_IN(request, ht_by_src)
-    DLISTED_MGD_EXPL_IN(request, ht_by_tgt)
-};
-DLISTED_MGD_EXPL_OUT(request, ht_by_src)
-DLISTED_MGD_EXPL_OUT(request, ht_by_tgt)
-
-size_t hash( unsigned int k ) {
-    // not really a hash function, not really the point
-    return k;
-}
-
-unsigned int key( request_in_ht_by_src & v ) {
-    return v.src_id;
-}
-
-
-#include <stdlib.hfa>
-
-int main() {
-
-
-    HASHTABLE_STATIC(unsigned int, request_in_ht_by_src, request, 67, h_src)
-
-    request & wasnt_found = get(h_src, 17);
-    assert( &wasnt_found == 0p );
-
-    request r;
-    r.src_id = 117;
-    r.tgt_id = 998;
-
-    put(h_src, r);
-
-    request & found = get(h_src, 117);
-    assert( &found == &r );
-
-    & wasnt_found = & get(h_src, 998);
-    assert( &wasnt_found == 0p );
-
-    printf( "%f\n", fill_frac(h_src) );
-
-
-    request rs[500];
-    try {
-        for (i; 500) {
-            rs[i].src_id = 8000 * i;
-            put(h_src, rs[i]);
-        }
-    } catchResume(ht_fill_limit_crossed*) {
-        printf("fill limit tripped with h_src filled at %f\n", fill_frac(h_src));
-        throwResume;
-    }
-
-    assert(  & get(h_src, 117      ) );
-    assert(  & get(h_src, 8000*25  ) );
-    assert(! & get(h_src, 8000*25+1) );
-
-
-
-    dlist(request_in_ht_by_src, request) * (*old_alloc)( size_t ) = alloc;
-    dlist(request_in_ht_by_src, request) * alloc( size_t n ) {
-        dlist(request_in_ht_by_src, request) * ret = old_alloc(n);
-        printf("alloc'ed at %p\n", ret);
-        return ret;
-    }
-
-    void (*old_free)( void * ) = free;
-    void free( void * o ) {
-        printf("free'ing at %p\n", o);
-        old_free(o);
-    }
-
-    hashtable_dynamic(unsigned int, request_in_ht_by_src, request) ht2 = { 113 };
-    request rs2[500];
-    try {
-        for (i; 500) {
-            if (i % 10 == 0) {printf("%d(%f),", i, fill_frac(ht2));}
-            rs2[i].src_id = 8000 * i;
-            put(ht2, rs2[i]);
-        }
-    } catchResume(ht_fill_limit_crossed*) {
-        printf("fill limit tripped with ht2 filled at %f\n", fill_frac(ht2));
-        throwResume;
-    }
-
-
-}
Index: amples/hashtable2.cfa
===================================================================
--- examples/hashtable2.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,466 +1,0 @@
-
-#include <containers/list.hfa>
-
-typedef unsigned int K;
-
-// workaround type for trac#185; used here as the ticket's examples use a spontaneous float
-typedef struct {} t_unused;
-
-struct request {
-
-    unsigned int src_id;
-    unsigned int tgt_id;
-
-    DLISTED_MGD_EXPL_IN(request, ht_by_src)
-    DLISTED_MGD_EXPL_IN(request, ht_by_tgt)
-};
-DLISTED_MGD_EXPL_OUT(request, ht_by_src)
-DLISTED_MGD_EXPL_OUT(request, ht_by_tgt)
-
-size_t hash( unsigned int k ) {
-    // not really a hash function, not really the point
-    return k;
-}
-
-unsigned int key( request_in_ht_by_src & v ) {
-    return v.src_id;
-}
-
-
-#include <exception.hfa>
-
-DATA_EXCEPTION(ht_fill_limit_crossed)(
-	void * theHashtable;
-    bool want_throwResume_ht_auto_resize_pending;
-    size_t size_for_ht_auto_resize_pending;
-);
-
-void ?{}(ht_fill_limit_crossed & this, void * theHashtable) {
-	VTABLE_INIT(this, ht_fill_limit_crossed);
-	this.theHashtable = theHashtable;
-    this.want_throwResume_ht_auto_resize_pending = false;
-    this.size_for_ht_auto_resize_pending = 0;
-}
-
-const char * ht_fill_limit_crossed_msg(ht_fill_limit_crossed * this) {
-	return "ht_fill_limit_crossed";
-}
-
-VTABLE_INSTANCE(ht_fill_limit_crossed)(ht_fill_limit_crossed_msg);
-
-
-DATA_EXCEPTION(ht_auto_resize_pending)(
-	void * theHashtable;
-    size_t new_size;
-);
-
-void ?{}(ht_auto_resize_pending & this, void * theHashtable, size_t new_size) {
-	VTABLE_INIT(this, ht_auto_resize_pending);
-	this.theHashtable = theHashtable;
-    this.new_size = new_size;
-}
-
-const char * ht_auto_resize_pending_msg(ht_auto_resize_pending * this) {
-	return "ht_auto_resize_pending";
-}
-
-VTABLE_INSTANCE(ht_auto_resize_pending)(ht_auto_resize_pending_msg);
-
-
-
-trait pretendsToMatter( dtype TTT ) {
-    void actsmart(TTT &);
-};
-
-forall( dtype TTTx )
-void actsmart(TTTx &) {}
-
-// probable bug, wrt otype Tt_unused...
-//   1. changing to dtype Tt_unused crashes GenPoly
-//   2. declaring function check_ff_warning as concrete, i.e. operating on type hashtable_rbs(t_unused) makes cfa-cc generate bad C
-// in both cases, it's on the throwResume call
-// where it implicitly uses this.defaultResumptionHandler as a throwResume argument
-// whereas that, of course, has to be surrounded in a cast
-// at GenPoly breakpoint, type of said cast appears as CFA generic struct, even as the "this" parameter appears as C struct; casted type ...
-//   1. is hashtable_rbs(Tt_unused); assertion complains you don't need a type arg here
-//   2. shows up in -CFA output as hashtable_rbs(), which is bad C; expecting hashtable_rbs*
-
-forall( otype Tt_unused | pretendsToMatter(Tt_unused) ) {
-
-    // hashtable of request by source
-    struct hashtable_rbs {
-
-        size_t n_buckets;
-        dlist(request_in_ht_by_src, request) *buckets;
-        
-        size_t item_count;
-        float ff_next_warn_up;
-        float ff_warn_step_factor;
-
-        void (*defaultResumptionHandler) (ht_fill_limit_crossed &);
-    };
-
-    void ?{}( hashtable_rbs(Tt_unused) & this ) = void;
-}
-
-forall( otype Tt_unused | pretendsToMatter(Tt_unused) | { void defaultResumptionHandler(ht_fill_limit_crossed &); } ) {
-
-    void ?{}( hashtable_rbs(Tt_unused) & this, size_t n_buckets, dlist(request_in_ht_by_src, request) *buckets,
-        float ff_next_warn_up, float ff_warn_step_factor ) {
-
-        printf( "base hashtable ctor with %ld buckets at %p\n", n_buckets, buckets);
-
-        this.n_buckets = n_buckets;
-        this.buckets = buckets;
-
-        this.item_count = 0;
-        this.ff_next_warn_up = ff_next_warn_up;
-        this.ff_warn_step_factor = ff_warn_step_factor;
-
-        this.defaultResumptionHandler = defaultResumptionHandler;
-
-        for ( i; n_buckets ) {
-            ?{}( this.buckets[i] );
-        }
-    }
-
-    void ?{}( hashtable_rbs(Tt_unused) & this, size_t n_buckets, dlist(request_in_ht_by_src, request) *buckets ) {
-        printf( "base hashtable ctor with default warning steps\n" );
-        ( this ) { n_buckets, buckets, 0.5, 2 };
-    }
-
-}
-
-// this fwd declaration is artifact of workaround trac#192
-void defaultResumptionHandler( ht_auto_resize_pending & ex );
-
-forall( otype Tt_unused | pretendsToMatter(Tt_unused) ) {
-
-    float fill_frac( hashtable_rbs(Tt_unused) & this ) with(this) {
-        return ((float)item_count) / n_buckets;
-    }
-
-    size_t bucket_of( hashtable_rbs(Tt_unused) & this, K k ) {
-        return hash(k) % this.n_buckets;
-    }
-
-    request & get( hashtable_rbs(Tt_unused) & this, K k ) with (this) {
-
-        dlist(request_in_ht_by_src, request) & bucket = buckets[ bucket_of(this, k) ];
-
-        for ( request_in_ht_by_src * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
-            if ( key(*item) == k ) {
-                return *item;
-            }
-        }
-
-        return *0p;
-    }
-
-    void check_ff_warning( hashtable_rbs(Tt_unused) & this ) with (this) {
-        if (fill_frac(this) > ff_next_warn_up) {
-            ht_fill_limit_crossed ex1 = { &this };
-            throwResume ex1;
-            // workaround trac#192: want the second throwResume to be in __dynamic_defaultResumptionHandler
-            // ... want base hashtable decoupled from resize
-            if ( ex1.want_throwResume_ht_auto_resize_pending ) {
-                throwResume( (ht_auto_resize_pending) { & this, ex1.size_for_ht_auto_resize_pending } );
-            }
-        }
-    }
-
-    void put( hashtable_rbs(Tt_unused) & this, request & v ) with (this) {
-
-        check_ff_warning(this);
-
-        K k = key( $tempcv_e2n(v) );
-        dlist(request_in_ht_by_src, request) & bucket = buckets[ bucket_of(this, k) ];
-
-        for ( request_in_ht_by_src * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
-            if ( key(*item) == k ) {
-                remove(*item);
-                break;
-            }
-        }
-
-        insert_first(bucket, v);
-        this.item_count ++;
-    }
-}
-
-
-
-
-// tactical usage:
-// HASHTABLE_RBS_STATIC(n, ht)
-//
-// intended equivalent:
-// hashtable_rbs_static(Z(n)) ht;
-#define HASHTABLE_RBS_STATIC(n_buckets, obj) \
-    struct __hashtable_static_ ## obj { \
-        inline hashtable_rbs(t_unused); \
-        dlist(request_in_ht_by_src, request) $items[n_buckets]; \
-    }; \
-    void ?{}( __hashtable_static_ ## obj & this )  { \
-        ((hashtable_rbs(t_unused) &)this){ n_buckets, this.$items }; \
-    } \
-    __hashtable_static_ ## obj obj;
-
-
-
-void defaultResumptionHandler(ht_fill_limit_crossed & ex) {
-    hashtable_rbs(t_unused) & ht = *(hashtable_rbs(t_unused) *)ex.theHashtable;
-    printf("base default resumption handler ht_fill_limit_crossed with ht filled at %f\n", fill_frac(ht));
-    ht.ff_next_warn_up *= ht.ff_warn_step_factor;
-}
-
-void defaultTerminationHandler(ht_fill_limit_crossed &) = void;
-
-
-
-
-
-trait heaped(dtype T) {
-    T * alloc( size_t );
-    void free( void * ); 
-};
-
-void __dynamic_defaultResumptionHandler(ht_fill_limit_crossed &);
-
-forall( otype Tt_unused ) {
-
-    struct hashtable_rbs_dynamic { 
-        inline hashtable_rbs(Tt_unused);
-
-        struct resize_policy {
-            // When fill factor exceeds grow limit, grow big enough for
-            // resulting fill factor to be lower than grow_target.  Vice versa.
-            // Using different grow and shrink limits prevents noisy current
-            // size from triggering grow-shrink oscillation.  OK to use same
-            // grow and shrink targets.
-            float grow_limit, shrink_limit, grow_target, shrink_target;
-
-            // warn with exception but do nothing, this many -1 times, then actually resize
-            unsigned short int warns_per_grow, warns_per_shrink;
-
-            // Don't shrink below.
-            size_t nbuckets_floor;
-        } policy;
-
-        dlist(request_in_ht_by_src, request) * (*alloc)( size_t );
-        void (*free)( void * ); 
-    };
-}
-
-// will be in list api
-void splice_all_to_last( dlist(request_in_ht_by_src, request) & src_to_empty, dlist(request_in_ht_by_src, request) & snk_to_fill_at_last ) {
-
-    // will re-implement as an actual splice
-    while ( & src_to_empty`first != 0p ) {
-        insert_last( snk_to_fill_at_last, pop_first( src_to_empty ) );
-    }
-}
-
-
-forall( otype Tt_unused | heaped( dlist(request_in_ht_by_src, request) ) ) {
-
-    void ?{}( hashtable_rbs_dynamic(Tt_unused).resize_policy & this, size_t nbuckets_floor ) {
-        printf("default dynamic policy ctor\n");
-
-        (this.grow_limit)      {2.0};
-        (this.shrink_limit)    {0.5};
-        (this.grow_target)     {1.0};
-        (this.shrink_target)   {1.0};
-        (this.warns_per_grow)  {4};
-        (this.warns_per_shrink){4};
-        (this.nbuckets_floor)  {nbuckets_floor};
-    }
-
-    void ?{}( hashtable_rbs_dynamic(Tt_unused) & this, size_t n_buckets, hashtable_rbs_dynamic(Tt_unused).resize_policy rp )  {
-        printf("ctor hashtable_rbs_dynamic{ size_t, resize_policy }\n");
-
-        float first_first_warn_up = rp.grow_target;
-        float ff_warn_step_factor = (rp.grow_limit / rp.grow_target) \ ( 1. / rp.warns_per_grow );
-
-        void (*defaultResumptionHandler) (ht_fill_limit_crossed &) = __dynamic_defaultResumptionHandler;
-        dlist(request_in_ht_by_src, request) *buckets = alloc(n_buckets);
-        ( ( hashtable_rbs( Tt_unused ) & ) this ){ n_buckets, buckets, first_first_warn_up, ff_warn_step_factor };
-        ( this.policy ){ rp };
-        this.alloc = alloc;
-        this.free = free;
-    }
-    void ?{}( hashtable_rbs_dynamic(Tt_unused) & this, hashtable_rbs_dynamic(Tt_unused).resize_policy rp )  {
-        printf("ctor hashtable_rbs_dynamic{ resize_policy }\n");
-        ( this ) { rp.nbuckets_floor, rp };
-    }
-    void ?{}( hashtable_rbs_dynamic(Tt_unused) & this, size_t n_buckets )  {
-        printf("ctor hashtable_rbs_dynamic{ size_t }\n");
-        ( this ) { n_buckets, (hashtable_rbs_dynamic(Tt_unused).resize_policy){ n_buckets } };
-    }
-    void ^?{}( hashtable_rbs_dynamic(Tt_unused) & this ) {
-        free(this.buckets);
-    }
-    void rehashToLarger( hashtable_rbs_dynamic(Tt_unused) & this, size_t new_n_buckets ) with(this) {
-        printf("resizing from %ld to %ld, old buckets at %p\n", n_buckets, new_n_buckets, buckets);
-
-        // collect hash items from old buckets
-        dlist(request_in_ht_by_src, request) items;
-        for (i; n_buckets) {
-            splice_all_to_last( buckets[i], items );
-        }
-
-        // make empty hash table of new size
-        dlist(request_in_ht_by_src, request) *oldBuckets = buckets;
-        float oldFfWarnStepFactor = ff_warn_step_factor;
-        float newFfNextWarnUp = ((float)item_count) / ((float) new_n_buckets);
-        ^?{}( (hashtable_rbs(Tt_unused) &)this );
-        free( oldBuckets );
-        ?{}( (hashtable_rbs(Tt_unused) &)this, new_n_buckets, alloc(new_n_buckets), newFfNextWarnUp, oldFfWarnStepFactor );
-
-        // fill new table with old items
-        while ( & items`first != 0p ) {
-            put( this, pop_first( items ) );
-        }
-    }
-}
-
-forall( otype Tt_unused ) {
-    void rehashToLarger_STEP( hashtable_rbs_dynamic(Tt_unused) & this, size_t new_n_buckets ) with (this) {
-        rehashToLarger( this, new_n_buckets );
-    }
-}
-
-void defaultResumptionHandler( ht_auto_resize_pending & ex ) {
-    hashtable_rbs_dynamic(t_unused) & ht = *(hashtable_rbs_dynamic(t_unused) *)ex.theHashtable;
-    printf("auto-resize unhandled: proceeding with resize\n");
-    rehashToLarger_STEP( ht, ex.new_size );
-}
-
-void __dynamic_defaultResumptionHandler(ht_fill_limit_crossed & ex) {
-    hashtable_rbs_dynamic(t_unused) & ht = *(hashtable_rbs_dynamic(t_unused) *)ex.theHashtable;
-    printf("dynamic warning received with fill_frac = %f and buckets at %p\n", fill_frac(ht), ht.buckets);
-    if ( fill_frac( ht ) >= ht.policy.grow_limit ) {
-        float grow_amount =  ht.policy.grow_limit / ht.policy.grow_target;
-        ex.want_throwResume_ht_auto_resize_pending = true;
-        ex.size_for_ht_auto_resize_pending = ( size_t )( grow_amount * ht.n_buckets );
-    } else {
-        // base handler, not specialized for dynamic
-        defaultResumptionHandler( ex );
-    }
-}
-
-
-
-
-
-
-#include <stdlib.hfa>
-
-void basicFillingTestHelper( hashtable_rbs(t_unused) & ht, size_t n_elems ) {
-
-    request & wasnt_found = get(ht, 17);
-    assert( &wasnt_found == 0p );
-
-    request r;
-    r.src_id = 117;
-    r.tgt_id = 998;
-
-    put(ht, r);
-
-    request & found = get(ht, 117);
-    assert( &found == &r );
-
-    & wasnt_found = & get(ht, 998);
-    assert( &wasnt_found == 0p );
-
-    request rs[n_elems];
-    for (i; n_elems) {
-        rs[i].src_id = 8000 * i;
-        put(ht, rs[i]);
-    }
-
-    assert(  & get(ht, 117      ) );
-    assert(  & get(ht, 8000*25  ) );
-    assert(! & get(ht, 8000*25+1) );
-}
-
-void basicFillingTest_static() {
-
-    printf("---start basic fill test static ----\n");
-
-    HASHTABLE_RBS_STATIC(67, ht)
-
-    basicFillingTestHelper(ht, 500);
-}
-
-void basicFillingTest_dynamic() {
-
-    dlist(request_in_ht_by_src, request) * (*old_alloc)( size_t ) = alloc;
-    dlist(request_in_ht_by_src, request) * alloc( size_t n ) {
-        dlist(request_in_ht_by_src, request) * ret = old_alloc(n);
-        printf("alloc'ed at %p\n", ret);
-        return ret;
-    }
-
-    void (*old_free)( void * ) = free;
-    void free( void * o ) {
-        printf("free'ing at %p\n", o);
-        old_free(o);
-    }
-
-    printf("---start basic fill test dynamic ----\n");
-
-    hashtable_rbs_dynamic(t_unused) ht = { 113 };
-
-    basicFillingTestHelper(ht, 500);
-}
-
-// Demonstrates user-provided instrumentation monitoring a fixed-size hash table
-void logTest() {
-
-    printf("---start log test ----\n");
-
-    HASHTABLE_RBS_STATIC(67, ht)
-
-    try {
-        basicFillingTestHelper(ht, 500);
-    } catchResume( ht_fill_limit_crossed * ) {
-        printf("log test instrumentation runs\n");
-        throwResume;
-    }
-}
-
-// Demonstrates "snoozing" a growing hash table's auto-resize event,
-// in that that next call to put will get the resize exception instead.
-void snoozeTest() {
-
-    printf("---start snooze test ----\n");
-
-    hashtable_rbs_dynamic(t_unused) ht = { 113 };
-
-    bool lastResizeSnoozed = false;
-
-    try {
-        basicFillingTestHelper(ht, 500);
-    } catchResume( ht_auto_resize_pending * ) {
-
-        if ( lastResizeSnoozed == false ) {
-            lastResizeSnoozed = true;
-            printf("snooze test intervention decides to snooze this time\n");
-        } else {
-            lastResizeSnoozed = false;
-            printf("snooze test intervention decides to allow the resize\n");
-            throwResume;
-        }
-
-    }
-}
-
-int main() {
-
-    basicFillingTest_static();
-    basicFillingTest_dynamic();
-
-    logTest();
-    snoozeTest();
-}
Index: amples/huge.c
===================================================================
--- examples/huge.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,26 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// huge.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  8 22:16:32 2016
-// Update Count     : 2
-//
-
-int huge( int n, forall( otype T ) T (*f)( T ) ) {
-	if ( n <= 0 )
-		return f( 0 );
-	else
-		return huge( n - 1, f( f ) );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa huge.c" //
-// End: //
Index: amples/includes.c
===================================================================
--- examples/includes.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,248 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// includes.c -- 
-//
-// Author           : Peter A. Buhr
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Nov 15 23:06:24 2017
-// Update Count     : 597
-//
-
-// ***********************************************
-// USE -std=c99 WITH gxx TO GET SAME OUTPUT AS cfa
-// ***********************************************
-
-#ifdef __CFA__
-extern "C" {
-#endif // __CFA__
-
-#if 1
-#define _GNU_SOURCE
-#include <a.out.h>
-#include <aio.h>
-#include <aliases.h>
-#include <alloca.h>
-#include <ansidecl.h>
-#include <ar.h>
-#include <argp.h>
-#include <argz.h>
-#include <assert.h>
-//#include <bfd.h>
-// #include <bfdlink.h>				// keyword with
-#include <byteswap.h>
-#include <bzlib.h>
-#include <cblas.h>
-#include <cblas_f77.h>
-#include <complex.h>
-#include <com_err.h>
-#include <cpio.h>
-#include <crypt.h>
-#include <ctype.h>
-#include <curses.h>
-#include <dialog.h>
-#include <dirent.h>
-#include <dis-asm.h>
-#include <dlfcn.h>
-#include <dlg_colors.h>
-#include <dlg_config.h>
-#include <dlg_keys.h>
-#include <elf.h>
-#include <endian.h>
-#include <envz.h>
-#include <err.h>
-#include <errno.h>
-#include <error.h>
-#include <eti.h>
-#include <evdns.h>
-#include <event.h>
-
-// #include <evhttp.h>
-// #include <sys/queue.h>
-// #include <evrpc.h>					// evrpc.h depends on sys/queue.h
-// #include <evutil.h>
-// #include <execinfo.h>
-// #include <expat.h>
-// #include <expat_config.h>
-// #include <expat_external.h>
-// #include <fcntl.h>
-// #include <features.h>
-// #include <fenv.h>
-// #include <fmtmsg.h>
-// #include <fnmatch.h>
-// #include <form.h>
-// #include <fpu_control.h>
-// #include <fstab.h>
-// #include <fts.h>
-// #include <ftw.h>
-// #include <gconv.h>
-// #include <getopt.h>
-// #include <gettext-po.h>
-// #include <glob.h>
-// #include <gmp.h>
-// #include <gnu-versions.h>
-// #include <grp.h>
-// #include <gshadow.h>
-// #include <gssapi.h>
-// #include <hwloc.h>					// keyword thread (setjmp)
-// #include <iconv.h>
-// #include <idna.h>
-// #include <idn-free.h>
-// #include <idn-int.h>
-// #include <idn-int.h>
-// #include <ifaddrs.h>
-// #include <inttypes.h>
-// #include <jerror.h>
-
-//#include <jmorecfg.h>
-//#include <jpegint.h>
-// #include <jpeglib.h>
-// #include <kdb.h>
-// #include <krb5.h>					// keyword enable
-// #include <langinfo.h>
-// #include <lastlog.h>
-// #include <lber.h>
-// #include <lber_types.h>
-// #include <ldap.h>
-// #include <ldap_cdefs.h>
-// #include <ldap_features.h>
-// #include <ldap_schema.h>
-// #include <ldap_utf8.h>
-// #include <ldif.h>
-// #include <libgen.h>
-// #include <libintl.h>
-// #include <libio.h>
-// #include <libtasn1.h>
-// #include <libudev.h>
-// #include <limits.h>
-// #include <link.h>
-// #include <locale.h>
-// #include <ltdl.h>
-// #include <lzma.h>
-// #include <malloc.h>
-// #include <math.h>
-// #include <mcheck.h>
-// #include <memory.h>
-// #include <menu.h>
-// #include <mntent.h>
-// #include <monetary.h>
-// #include <mqueue.h>
-// #include <ncurses.h>
-// #include <ncurses_dll.h>
-// #include <nc_tparm.h>
-// #include <netdb.h>
-// #include <nl_types.h>
-// #include <nss.h>
-// #include <numa.h>
-// #include <numacompat1.h>
-// #include <numaif.h>
-// #include <obstack.h>
-// #include <panel.h>
-// #include <paths.h>
-// #include <pciaccess.h>
-// #include <pcre.h>
-// //#include <pcreposix.h>			// conflicts with regex.h
-// #include <plugin-api.h>
-// #include <png.h>										// setjmp
-// #include <pngconf.h>									// setjmp
-// #include <poll.h>
-// #include <pr29.h>
-// #include <printf.h>
-// #include <profile.h>
-// #include <pthread.h>									// setjmp
-// #include <pty.h>
-// #include <punycode.h>
-// #include <pwd.h>
-// #define INIT ;						// needed for regex.h
-// #define GETC() 'a'
-// #define PEEKC() 'a'
-// #define UNGETC( c ) ;
-// #define RETURN( ptr ) ;
-// #define ERROR( val ) ;
-// #include <regex.h>
-// //#include <regexp.h>				// GNU C Library no longer implements
-// #include <resolv.h>
-// #include <re_comp.h>
-// #include <sched.h>
-// #include <search.h>
-// #include <semaphore.h>
-// #include <setjmp.h>
-// #include <sgtty.h>
-// #include <shadow.h>
-// #include <signal.h>
-// #include <spawn.h>
-// #include <stab.h>
-// #include <stdatomic.h>
-// #include <stdarg.h>
-// #include <stdbool.h>
-// #include <stdint.h>
-// #include <stddef.h>
-// #include <stdio.h>
-// #include <stdio_ext.h>
-// #include <stdlib.h>
-// #include <string.h>
-// #include <stringprep.h>
-// #include <strings.h>
-// #include <stropts.h>
-// #include <sudo_plugin.h>
-// #include <symcat.h>
-// #include <syscall.h>
-// #include <sysexits.h>
-// #include <syslog.h>
-// #include <tar.h>
-// #include <term.h>
-// #include <termcap.h>
-// #include <termio.h>
-// #include <termios.h>
-// //#include <term_entry.h>
-// #include <tgmath.h>
-// #include <thread_db.h>			// CFA bug
-// #include <tic.h>
-// #include <time.h>
-// #include <tld.h>
-// #include <ttyent.h>
-// #include <turbojpeg.h>
-// #include <ucontext.h>
-// #include <ulimit.h>
-// #include <unctrl.h>
-// #include <unistd.h>
-// #include <ustat.h>
-// #include <utime.h>
-// #include <utmp.h>
-// #include <utmpx.h>
-// #include <wait.h>
-// #include <wchar.h>
-// #include <wctype.h>
-// #include <wordexp.h>
-// #include <xlocale.h>
-// #include <values.h>
-// #include <zconf.h>
-// #include <zlib.h>
-// #include <_G_config.h>
-
-// #include <jpeglib.h>				// after stdlib.h/stdio.h
-// #include <jpegint.h>
-// #include <jmorecfg.h>
-#if 0
-#endif // 0
-
-#else
-
-#define _GNU_SOURCE
-
-#include <setjmp.h>
-
-#endif // 0
-
-#ifdef __CFA__
-} // extern "C"
-#endif // __CFA__
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa includes.c" //
-// End: //
Index: amples/index.h
===================================================================
--- examples/index.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,25 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// index.h -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Mar  2 18:10:46 2016
-// Update Count     : 2
-//
-
-trait index( type T ) {
-	T ?+?( T, T );
-	T ?-?( T, T );
-	const T 0, 1;
-};
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa index.c" //
-// End: //
Index: amples/io/cat.c
===================================================================
--- examples/io/cat.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,78 +1,0 @@
-/*
-This is a simple "cat" example that uses io_uring in IORING_SETUP_IOPOLL mode.
-It demonstrates the bare minimum needed to use io_uring in polling mode.
-It uses liburing for simplicity.
-*/
-
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <fcntl.h>
-#include <liburing.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-struct io_uring ring;
-
-__attribute__((aligned(1024))) char data[1024];
-
-int main(int argc,  char * argv[]) {
-      if(argc != 2) {
-            printf("usage:   %s FILE - prints file to console.\n", argv[0]);
-            return 1;
-      }
-
-      int fd = open(argv[1], O_DIRECT);
-      if(fd < 0) {
-            printf("Could not open file %s.\n", argv[1]);
-            return 2;
-      }
-
-      /* prep the array */
-      struct iovec iov = { data, 1024 };
-
-      /* init liburing */
-      io_uring_queue_init(256, &ring, IORING_SETUP_IOPOLL);
-
-      /* declare required structs */
-      struct io_uring_sqe * sqe;
-      struct io_uring_cqe * cqe;
-
-      /* get an sqe and fill in a READV operation */
-      sqe = io_uring_get_sqe(&ring);
-      io_uring_prep_readv(sqe, fd, &iov, 1, 0);
-      // io_uring_prep_read(sqe, fd, data, 1024, 0);
-
-      sqe->user_data = (uint64_t)(uintptr_t)data;
-
-      /* tell the kernel we have an sqe ready for consumption */
-      io_uring_submit(&ring);
-
-      /* wait for the sqe to complete */
-      int ret = io_uring_wait_cqe(&ring, &cqe);
-
-      /* read and process cqe event */
-      if(ret == 0) {
-            char * out = (char *)(uintptr_t)cqe->user_data;
-            signed int len = cqe->res;
-            io_uring_cqe_seen(&ring, cqe);
-
-            if(len > 0) {
-                  printf("%.*s", len, out);
-            }
-            else if( len < 0 ) {
-                  fprintf(stderr, "readv/read returned error : %s\n", strerror(-len));
-            }
-      }
-      else {
-            printf("%d\n", ret);
-            io_uring_cqe_seen(&ring, cqe);
-      }
-
-      io_uring_queue_exit(&ring);
-
-      close(fd);
-}
Index: amples/io/filereader.c
===================================================================
--- examples/io/filereader.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,122 +1,0 @@
-/*
-This is a file reading example that users io_uring in non-blocking mode.
-It demonstrates the bare minimum needed to use io_uring.
-It also optionally pre-registers the file descriptors (and a pipe, just to show it works).
-It uses liburing for simplicity.
-*/
-
-
-#include <errno.h>
-#include <fcntl.h>
-#include <liburing.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-int main(int argc, char * argv[]) {
-	if(argc != 3 && argc != 4) {
-            printf("usage:   %s FILE TIMES [fixed] - read FILE from disk TIMES times\n", argv[0]);
-            return EXIT_FAILURE;
-      }
-
-	bool fixed = false;
-	if(argc == 4) {
-		fixed = 0 == strcmp(argv[3], "fixed");
-	}
-
-      int times = atoi( argv[2] );
-      if(times <= 0) {
-            printf("Invalid number of times %d (from %s).\n", times, argv[2]);
-            return EXIT_FAILURE;
-      }
-
-      int fd = open(argv[1], 0);
-      if(fd < 0) {
-            printf("Could not open file %s.\n", argv[1]);
-            return EXIT_FAILURE;
-      }
-
-	int rfd = fd;
-
-	/* prep the array */
-      char data[100];
-      struct iovec iov = { data, 100 };
-
-	/* init liburing */
-	struct io_uring ring;
-      io_uring_queue_init(256, &ring, 0);
-
-	int pipefds[2];
-	if(fixed) {
-		int ret = pipe(pipefds);
-		if( ret < 0 ) {
-			printf("Pipe Error : %s\n", strerror( errno ));
-			return EXIT_FAILURE;
-		}
-		rfd = 0;
-		int fds[] = {
-			fd, pipefds[0], pipefds[1]
-		};
-		int cnt = sizeof(fds) / sizeof(fds[0]);
-		printf("Registering %d files as fixed\n", cnt);
-		ret = io_uring_register_files(&ring, fds, cnt);
-		if( ret < 0 ) {
-			printf("Register Error : %s\n", strerror( -ret ));
-			return EXIT_FAILURE;
-		}
-	}
-
-      /* declare required structs */
-	printf("Reading %s(%d) %d times\n", argv[1], fd, times);
-	size_t counter = 0;
-	for(int i = 0; i < times; i++) {
-		/* get an sqe and fill in a READV operation */
-	      struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
-		io_uring_prep_readv(sqe, rfd, &iov, 1, 0);
-		if(fixed) {
-			sqe->flags = IOSQE_FIXED_FILE;
-		}
-
-		/* tell the kernel we have an sqe ready for consumption */
-      	io_uring_submit(&ring);
-
-		/* poll the cq and count how much polling we did */
-		while(true) {
-			struct io_uring_cqe * cqe = NULL;
-			/* wait for the sqe to complete */
-			int ret = io_uring_wait_cqe_nr(&ring, &cqe, 0);
-
-			/* read and process cqe event */
-			switch(ret) {
-			case 0:
-				if( cqe->res < 0 ) {
-					printf("Completion Error : %s\n", strerror( -cqe->res ));
-					return EXIT_FAILURE;
-				}
-				io_uring_cqe_seen(&ring, cqe);
-				goto LOOP;
-			case -EAGAIN:
-				counter++;
-				break;
-			default:
-				printf("Wait Error : %s\n", strerror( -ret ));
-				return EXIT_FAILURE;
-			}
-		}
-
-		LOOP:;
-	}
-
-	printf("%zu\n", counter);
-
-      io_uring_queue_exit(&ring);
-
-      close(fd);
-
-	if(fixed) {
-		close(pipefds[0]);
-		close(pipefds[1]);
-	}
-}
Index: amples/io/simple/client.c
===================================================================
--- examples/io/simple/client.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,85 +1,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <netdb.h>
-#include <unistd.h>
-
-#include <sys/types.h>
-#include <sys/socket.h>
-
-int main(int argc, char * argv[]) {
-      if(argc != 2) {
-            printf("usage:    %s portnumber\n", argv[0]);
-            exit( EXIT_FAILURE );
-      }
-      int port = atoi(argv[1]);
-      if(port < 1) {
-            printf("Invalid port : %d (from %s)\n", port, argv[1]);
-            exit( EXIT_FAILURE );
-      }
-
-      int sock = socket(AF_INET, SOCK_STREAM, 0);
-      if(sock < 0) {
-            perror( "socket" );
-            exit( EXIT_FAILURE );
-      }
-
-      struct hostent * server = gethostbyname("localhost");
-      if(server == NULL) {
-            perror("localhost not found");
-            exit( EXIT_FAILURE );
-      }
-
-      struct sockaddr_in serv_addr;
-      memset(&serv_addr, 0, sizeof(serv_addr));
-      serv_addr.sin_family = AF_INET;
-      memcpy(&serv_addr.sin_addr.s_addr, server->h_addr, server->h_length);
-      serv_addr.sin_port = htons(port);
-
-      int ret = connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
-      if(ret < 0) {
-            perror( "connect" );
-            exit( EXIT_FAILURE );
-      }
-
-      char buffer[256];
-      struct iovec iov = { buffer, 0 };
-      struct msghdr msg;
-      msg.msg_name = NULL;
-      msg.msg_namelen = 0;
-      msg.msg_control = NULL;
-      msg.msg_controllen = 0;
-      msg.msg_iov = &iov;
-      msg.msg_iovlen = 1;
-      msg.msg_flags = 0;
-
-
-      int rd;
-      while(0 != (rd = read(STDIN_FILENO, buffer, 256))) {
-            if(rd < 0) {
-                  perror( "read" );
-                  exit( EXIT_FAILURE );
-            }
-
-            iov.iov_len = rd;
-            int sent = sendmsg(sock, &msg, 0);
-            if( sent < 0 ) {
-                  perror( "read" );
-                  exit( EXIT_FAILURE );
-            }
-
-            if(sent != rd) {
-                  printf("Expected to send %d bytes, sent %d\n", rd, sent);
-                  exit( EXIT_FAILURE );
-            }
-      }
-
-      ret = close(sock);
-      if(ret < 0) {
-            perror( "close" );
-            exit( EXIT_FAILURE );
-      }
-
-      exit( EXIT_SUCCESS );
-}
Index: amples/io/simple/server.c
===================================================================
--- examples/io/simple/server.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,143 +1,0 @@
-/*
-This is a simple server that users io_uring in blocking mode.
-It demonstrates the bare minimum needed to use io_uring.
-It uses liburing for simplicity.
-*/
-
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <unistd.h>
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include <liburing.h>
-
-struct io_uring ring;
-
-char data[256];
-struct iovec iov = { data, 256 };
-struct msghdr msg = { (void *)"", 0, &iov, 1, NULL, 0, 0 };
-static void async_read(int sock) {
-	/* get an sqe and fill in a READ operation */
-      struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
-      io_uring_prep_recvmsg(sqe, sock, &msg, 0);
-      sqe->user_data = 0;
-
-      /* tell the kernel we have an sqe ready for consumption */
-      int ret = io_uring_submit(&ring);
-      assert(ret == 1);
-}
-
-int main(int argc, char *argv[]) {
-	if(argc != 2) {
-            printf("usage:    %s portnumber\n", argv[0]);
-            exit( EXIT_FAILURE );
-      }
-      int port = atoi(argv[1]);
-      if(port < 1) {
-            printf("Invalid port : %d (from %s)\n", port, argv[1]);
-            exit( EXIT_FAILURE );
-      }
-
-	int sock = socket(AF_INET, SOCK_STREAM, 0);
-	if(sock < 0) {
-		perror( "socket" );
-		exit( EXIT_FAILURE );
-	}
-
-	struct sockaddr_in serv_addr;
-      memset(&serv_addr, 0, sizeof(serv_addr));
-      serv_addr.sin_family = AF_INET;
-      serv_addr.sin_addr.s_addr = INADDR_ANY;
-      serv_addr.sin_port = htons(port);
-
-	int ret = bind(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
-	if(ret < 0) {
-		perror( "bind" );
-		exit( EXIT_FAILURE );
-	}
-
-
-     	listen(sock,1);
-
-	struct sockaddr_in cli_addr;
-     	__socklen_t clilen = sizeof(cli_addr);
-	int newsock = accept(sock, (struct sockaddr *) &cli_addr, &clilen);
-     	if (newsock < 0) {
-		perror( "accept" );
-		exit( EXIT_FAILURE );
-	}
-
-	io_uring_queue_init( 16, &ring, 0 );
-
-	async_read( newsock );
-
-	while(1) {
-		struct io_uring_cqe * cqe;
-		struct __kernel_timespec ts = { 2, 0 };
-		// int ret = io_uring_wait_cqes( &ring, &cqe, 1, &ts, NULL); // Requires Linux 5.4
-		int ret = io_uring_wait_cqe( &ring, &cqe );
-
-		if( ret < 0 ) {
-                  printf( "Main Loop Error : %s\n", strerror(-ret) );
-			close( sock );
-                  exit( EXIT_FAILURE );
-            }
-
-		switch(cqe->user_data) {
-                  // Read completed
-                  case 0:
-                        // If it is the end of file we are done
-                        if( cqe->res == 0 ) {
-                              goto END;
-                        }
-
-				if( cqe->res < 0 ) {
-					perror( "Main Loop Error" );
-					close( sock );
-					exit( EXIT_FAILURE );
-				}
-
-				printf("'%.*s'\n", cqe->res, data);
-
-				async_read( newsock );
-
-                        // otherwise prepare a new read
-                        break;
-                  // Wait timed out, time to print
-			// Requires Linux 5.4
-                  case LIBURING_UDATA_TIMEOUT:
-                  	printf(".");
-                        break;
-                  // Problem
-                  default:
-                        printf("Unexpected user data : %llu", cqe->user_data);
-                        exit( EXIT_FAILURE );
-            }
-
-     		io_uring_cqe_seen( &ring, cqe );
-	}
-END:
-
-	io_uring_queue_exit( &ring );
-
-	ret = close(newsock);
-      if(ret < 0) {
-            perror( "close new" );
-            exit( EXIT_FAILURE );
-      }
-
-	ret = close(sock);
-      if(ret < 0) {
-            perror( "close old" );
-            exit( EXIT_FAILURE );
-      }
-
-	return 0;
-}
Index: amples/io/simple/server.cfa
===================================================================
--- examples/io/simple/server.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,137 +1,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <unistd.h>
-
-extern "C" {
-	#include <sys/types.h>
-	#include <sys/socket.h>
-	#include <netinet/in.h>
-}
-
-#include <time.hfa>
-#include <thread.hfa>
-
-//----------
-monitor Printer {};
-
-void heartbeat( Printer & mutex ) {
-	fprintf(stderr, ".");
-}
-
-void message( Printer & mutex, char * msg, size_t len ) {
-	fprintf(stderr, "'%.*s'", len, msg);
-}
-
-void status( Printer & mutex, const char * st ) {
-	fprintf(stderr, "%s\n", st);
-}
-
-void error( Printer & mutex, const char * msg, int error) {
-	fprintf(stderr, "%s - %s\n", msg, strerror(error));
-}
-
-Printer printer;
-
-//----------
-thread HeartBeat {};
-
-void ^?{}( HeartBeat & mutex ) {}
-
-void main( HeartBeat & this ) {
-	while(true) {
-		waitfor( ^?{} : this ) { break; }
-		or else{
-			sleep( 5`s );
-			heartbeat( printer );
-		}
-	}
-}
-
-//----------
-extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags);
-extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
-extern int cfa_close(int fd);
-
-//----------
-thread Server { int port; };
-void main( Server & this ) {
-	char data[256];
-	struct iovec iov = { data, 256 };
-	struct msghdr msg = { "", 0, &iov, 1, NULL, 0, 0 };
-
-	int sock = socket(AF_INET, SOCK_STREAM, 0);
-	if(sock < 0) {
-		error( printer, "socket", -sock);
-		exit( EXIT_FAILURE );
-	}
-
-	status( printer, "Socket created" );
-
-	struct sockaddr_in serv_addr;
-      memset(&serv_addr, 0, sizeof(serv_addr));
-      serv_addr.sin_family = AF_INET;
-      serv_addr.sin_addr.s_addr = INADDR_ANY;
-      serv_addr.sin_port = htons(this.port);
-
-	int ret = bind(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
-	if(ret < 0) {
-		error( printer, "bind", -ret);
-		exit( EXIT_FAILURE );
-	}
-
-	status( printer, "Socket bound" );
-
-     	listen(sock,1);
-
-	struct sockaddr_in cli_addr;
-     	__socklen_t clilen = sizeof(cli_addr);
-	int newsock = cfa_accept4(sock, (struct sockaddr *) &cli_addr, &clilen, 0);
-     	if (newsock < 0) {
-		error( printer, "accept", -newsock);
-		exit( EXIT_FAILURE );
-	}
-
-	status( printer, "Socket accepted, looping" );
-
-	while(1) {
-		int res = cfa_recvmsg(newsock, &msg, 0);
-		if(res == 0) break;
-		if(res < 0) {
-			error( printer, "recvmsg", -res);
-			exit( EXIT_FAILURE );
-		}
-
-		message(printer, data, res);
-	}
-
-	ret = cfa_close(newsock);
-      if(ret < 0) {
-            error( printer, "close new", -ret);
-            exit( EXIT_FAILURE );
-      }
-
-	ret = cfa_close(sock);
-      if(ret < 0) {
-            error( printer, "close old", -ret);
-            exit( EXIT_FAILURE );
-      }
-}
-
-//----------
-int main(int argc, char * argv []) {
-	if(argc != 2) {
-            printf("usage:    %s portnumber\n", argv[0]);
-            exit( EXIT_FAILURE );
-      }
-      int port = atoi(argv[1]);
-      if(port < 1) {
-            printf("Invalid port : %d (from %s)\n", port, argv[1]);
-            exit( EXIT_FAILURE );
-      }
-
-	HeartBeat heartbeat;
-	Server server = { port };
-	// while(true);
-}
Index: amples/io/simple/server_epoll.c
===================================================================
--- examples/io/simple/server_epoll.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,177 +1,0 @@
-/*
-Similar to the server in servier.c, this is a simple server
-that instead uses epoll to block.
-It opens the door to have several polling user-thread per cluster.
-It uses liburing for simplicity.
-*/
-
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <unistd.h>
-
-#include <sys/epoll.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include <liburing.h>
-
-#define MAX_EVENTS 10
-struct epoll_event ev, events[MAX_EVENTS];
-
-struct io_uring ring;
-
-char data[256];
-struct iovec iov = { data, 256 };
-struct msghdr msg = { (void *)"", 0, &iov, 1, NULL, 0, 0 };
-static void async_read(int sock) {
-	/* get an sqe and fill in a READ operation */
-      struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
-      io_uring_prep_recvmsg(sqe, sock, &msg, 0);
-      sqe->user_data = 0;
-
-      /* tell the kernel we have an sqe ready for consumption */
-      int ret = io_uring_submit(&ring);
-      assert(ret == 1);
-}
-
-int main(int argc, char *argv[]) {
-	if(argc != 2) {
-            printf("usage:    %s portnumber\n", argv[0]);
-            exit( EXIT_FAILURE );
-      }
-      int port = atoi(argv[1]);
-      if(port < 1) {
-            printf("Invalid port : %d (from %s)\n", port, argv[1]);
-            exit( EXIT_FAILURE );
-      }
-
-	int sock = socket(AF_INET, SOCK_STREAM, 0);
-	if(sock < 0) {
-		perror( "socket" );
-		exit( EXIT_FAILURE );
-	}
-
-	struct sockaddr_in serv_addr;
-      memset(&serv_addr, 0, sizeof(serv_addr));
-      serv_addr.sin_family = AF_INET;
-      serv_addr.sin_addr.s_addr = INADDR_ANY;
-      serv_addr.sin_port = htons(port);
-
-	int ret = bind(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
-	if(ret < 0) {
-		perror( "bind" );
-		exit( EXIT_FAILURE );
-	}
-
-
-     	listen(sock,1);
-
-	struct sockaddr_in cli_addr;
-     	__socklen_t clilen = sizeof(cli_addr);
-	int newsock = accept(sock, (struct sockaddr *) &cli_addr, &clilen);
-     	if (newsock < 0) {
-		perror( "accept" );
-		exit( EXIT_FAILURE );
-	}
-
-	io_uring_queue_init( 16, &ring, 0 );
-
-      int epollfd = epoll_create1(0);
-      if (epollfd == -1) {
-            perror("epoll_create1");
-            exit(EXIT_FAILURE);
-      }
-
-      ev.events = EPOLLIN | EPOLLONESHOT;
-      ev.data.u64 = (uint64_t)&ring;
-      if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ring.ring_fd, &ev) == -1) {
-            perror("epoll_ctl: first");
-            exit(EXIT_FAILURE);
-      }
-
-
-	async_read( newsock );
-
-	while(1) {
-            BLOCK:
-            int nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1);
-            if (nfds == -1) {
-                  perror("epoll_wait");
-                  exit(EXIT_FAILURE);
-            }
-
-
-		while(1) {
-                  struct io_uring_cqe * cqe;
-                  int ret = io_uring_peek_cqe( &ring, &cqe );
-
-                  if( ret < 0 ) {
-                        if(-ret == EAGAIN) {
-                              if (epoll_ctl(epollfd, EPOLL_CTL_MOD, ring.ring_fd, &ev) == -1) {
-                                    perror("epoll_ctl: loop");
-                                    exit(EXIT_FAILURE);
-                              }
-                              goto BLOCK;
-                        }
-                        printf( "Main Loop Error : %s\n", strerror(-ret) );
-                        close( sock );
-                        exit( EXIT_FAILURE );
-                  }
-
-                  switch(cqe->user_data) {
-                        // Read completed
-                        case 0:
-                              // If it is the end of file we are done
-                              if( cqe->res == 0 ) {
-                                    goto END;
-                              }
-
-                              if( cqe->res < 0 ) {
-                                    perror( "Main Loop Error" );
-                                    close( sock );
-                                    exit( EXIT_FAILURE );
-                              }
-
-                              printf("'%.*s'\n", cqe->res, data);
-
-                              async_read( newsock );
-
-                              // otherwise prepare a new read
-                              break;
-                        // Wait timed out, time to print
-                        // Requires Linux 5.4
-                        case LIBURING_UDATA_TIMEOUT:
-                              printf(".");
-                              break;
-                        // Problem
-                        default:
-                              printf("Unexpected user data : %llu", cqe->user_data);
-                              exit( EXIT_FAILURE );
-                  }
-
-                  io_uring_cqe_seen( &ring, cqe );
-            }
-	}
-END:
-
-	io_uring_queue_exit( &ring );
-
-	ret = close(newsock);
-      if(ret < 0) {
-            perror( "close new" );
-            exit( EXIT_FAILURE );
-      }
-
-	ret = close(sock);
-      if(ret < 0) {
-            perror( "close old" );
-            exit( EXIT_FAILURE );
-      }
-
-	return 0;
-}
Index: amples/io_uring.txt
===================================================================
--- examples/io_uring.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,1 +1,0 @@
-Hello World!
Index: amples/it_out.c
===================================================================
--- examples/it_out.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,69 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// it_out.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  8 22:14:39 2016
-// Update Count     : 8
-//
-
-typedef unsigned long streamsize_type;
-
-trait ostream( dtype os_type ) {
-	os_type *write( os_type *, const char *, streamsize_type );
-	int fail( os_type * );
-};
-
-trait writeable( otype T ) {
-	forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, T );
-};
-
-forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, char );
-forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, int );
-forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, const char * );
-
-trait istream( dtype is_type ) {
-	is_type *read( is_type *, char *, streamsize_type );
-	is_type *unread( is_type *, char );
-	int fail( is_type * );
-	int eof( is_type * );
-};
-
-trait readable( otype T ) {
-	forall( dtype is_type | istream( is_type ) ) is_type * ?<<?( is_type *, T );
-};
-
-forall( dtype is_type | istream( is_type ) ) is_type * ?>>?( is_type *, char* );
-forall( dtype is_type | istream( is_type ) ) is_type * ?>>?( is_type *, int* );
-
-trait iterator( otype iterator_type, otype elt_type ) {
-	iterator_type ?++( iterator_type* );
-	iterator_type ++?( iterator_type* );
-	int ?==?( iterator_type, iterator_type );
-	int ?!=?( iterator_type, iterator_type );
-
-	lvalue elt_type *?( iterator_type );
-};
-
-forall( otype elt_type | writeable( elt_type ),
-		otype iterator_type | iterator( iterator_type, elt_type ),
-		dtype os_type | ostream( os_type ) )
-void write_all( iterator_type begin, iterator_type end, os_type *os );
-
-forall( otype elt_type | writeable( elt_type ),
-		otype iterator_type | iterator( iterator_type, elt_type ),
-		dtype os_type | ostream( os_type ) )
-void write_all( elt_type begin, iterator_type end, os_type *os ) {
-	os << begin;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa it_out.c" //
-// End: //
Index: amples/multicore.c
===================================================================
--- examples/multicore.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,25 +1,0 @@
-#include <kernel.hfa>
-#include <thread.hfa>
-
-struct MyThread { thread_desc __thrd; };
-
-DECL_THREAD(MyThread);
-
-void ?{}( MyThread * this ) {}
-
-void main( MyThread* this ) {
-	for(int i = 0; i < 1000000; i++) {
-		yield();
-	}
-}
-
-int main(int argc, char* argv[]) {
-	// sout | "User main begin";
-	{
-		processor p;
-		{
-			scoped(MyThread) f[4];
-		}
-	}
-	// sout | "User main end";
-}
Index: amples/new.c
===================================================================
--- examples/new.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,32 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// new.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  8 22:13:20 2016
-// Update Count     : 4
-//
-
-forall( otype T )
-void f( T *t ) {
-	t--;
-	*t;
-	++t;
-	t += 2;
-	t + 2;
-	--t;
-	t -= 2;
-	t - 4;
-	t[7];
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa new.c" //
-// End: //
Index: amples/poly-bench.c
===================================================================
--- examples/poly-bench.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,207 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// poly-bench.cc -- 
-//
-// Author           : Aaron Moss
-// Created On       : Sat May 16 07:26:30 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed May 27 18:25:19 2015
-// Update Count     : 5
-//
-
-extern "C" {
-#include <stdio.h>
-//#include "my_time.h"
-}
-
-#define N 200000000
-
-struct ipoint {
-	int x;
-	int y;
-};
-
-struct ipoint ?+?(struct ipoint a, struct ipoint b) {
-	struct ipoint r;
-	r.x = a.x + b.x;
-	r.y = a.y + b.y;
-	return r;
-}
-
-struct ipoint ?-?(struct ipoint a, struct ipoint b) {
-	struct ipoint r;
-	r.x = a.x - b.x;
-	r.y = a.y - b.y;
-	return r;
-}
-
-struct ipoint ?*?(struct ipoint a, struct ipoint b) {
-	struct ipoint r;
-	r.x = a.x * b.x;
-	r.y = a.y * b.y;
-	return r;
-}
-
-struct dpoint {
-	double x;
-	double y;
-};
-
-struct dpoint ?+?(struct dpoint a, struct dpoint b) {
-	struct dpoint r;
-	r.x = a.x + b.x;
-	r.y = a.y + b.y;
-	return r;
-}
-
-struct dpoint ?-?(struct dpoint a, struct dpoint b) {
-	struct dpoint r;
-	r.x = a.x - b.x;
-	r.y = a.y - b.y;
-	return r;
-}
-
-struct dpoint ?*?(struct dpoint a, struct dpoint b) {
-	struct dpoint r;
-	r.x = a.x * b.x;
-	r.y = a.y * b.y;
-	return r;
-}
-
-int a2b2_mono_int(int a, int b) {
-	return (a - b)*(a + b);
-}
-
-double a2b2_mono_double(double a, double b) {
-	return (a - b)*(a + b);
-}
-
-struct ipoint a2b2_mono_ipoint(struct ipoint a, struct ipoint b) {
-	return (a - b)*(a + b);
-}
-
-struct dpoint a2b2_mono_dpoint(struct dpoint a, struct dpoint b) {
-	return (a - b)*(a + b);
-}
-
-forall(type T | { T ?+?(T,T); T ?-?(T,T); T ?*?(T,T); })
-T a2b2_poly(T a, T b) {
-	return (a - b)*(a + b);
-}
-
-typedef int clock_t;
-long ms_between(clock_t start, clock_t end) {
-//	return (end - start) / (CLOCKS_PER_SEC / 1000);
-	return 0;
-}
-int clock() { return 3; }
-
-int main(int argc, char** argv) {
-	clock_t start, end;
-	int i;
-	
-	int a, b;
-	double c, d;
-	struct ipoint p, q;
-	struct dpoint r, s;
-	
-	printf("\n## a^2-b^2 ##\n");
-	
-	a = 5, b = 3;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		a = a2b2_mono_int(a, b);
-		b = a2b2_mono_int(b, a);
-	}
-	end = clock();
-	printf("mono_int:   %7ld  [%d,%d]\n", ms_between(start, end), a, b);
-	
-	a = 5, b = 3;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		a = a2b2_poly(a, b);
-		b = a2b2_poly(b, a);
-	}
-	end = clock();
-	printf("poly_int:   %7ld  [%d,%d]\n", ms_between(start, end), a, b);
-	
-/*	{
-	a = 5, b = 3;
-	// below doesn't actually work; a2b2_poly isn't actually assigned, just declared
-	* [int] (int, int) a2b2_poly = a2b2_mono_int;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-//			printf("\t[%d,%d]\n", a, b);
-a = a2b2_poly(a, b);
-//			printf("\t[%d,%d]\n", a, b);
-b = a2b2_poly(b, a);
-}
-end = clock();
-printf("spec_int:   %7ld  [%d,%d]\n", ms_between(start, end), a, b);
-}
-*/	
-	c = 5.0, d = 3.0;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		c = a2b2_mono_double(c, d);
-		d = a2b2_mono_double(d, c);
-	}
-	end = clock();
-	printf("mono_double:%7ld  [%f,%f]\n", ms_between(start, end), c, d);
-		
-	c = 5.0, d = 3.0;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		c = a2b2_poly(c, d);
-		d = a2b2_poly(d, c);
-	}
-	end = clock();
-	printf("poly_double:%7ld  [%f,%f]\n", ms_between(start, end), c, d);
-	
-	p.x = 5, p.y = 5, q.x = 3, q.y = 3;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		p = a2b2_mono_ipoint(p, q);
-		q = a2b2_mono_ipoint(q, p);
-	}
-	end = clock();
-	printf("mono_ipoint:%7ld  [(%d,%d),(%d,%d)]\n", ms_between(start, end), p.x, p.y, q.x, q.y);
-		
-	p.x = 5, p.y = 5, q.x = 3, q.y = 3;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		p = a2b2_poly(p, q);
-		q = a2b2_poly(q, p);
-	}
-	end = clock();
-	printf("poly_ipoint:%7ld  [(%d,%d),(%d,%d)]\n", ms_between(start, end), p.x, p.y, q.x, q.y);
-	
-	r.x = 5.0, r.y = 5.0, s.x = 3.0, s.y = 3.0;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		r = a2b2_mono_dpoint(r, s);
-		s = a2b2_mono_dpoint(s, r);
-	}
-	end = clock();
-	printf("mono_dpoint:%7ld  [(%f,%f),(%f,%f)]\n", ms_between(start, end), r.x, r.y, s.x, s.y);
-		
-	r.x = 5.0, r.y = 5.0, s.x = 3.0, s.y = 3.0;
-	start = clock();
-	for (i = 0; i < N/2; ++i) {
-		r = a2b2_poly(r, s);
-		s = a2b2_poly(s, r);
-	}
-	end = clock();
-	printf("poly_dpoint:%7ld  [(%f,%f),(%f,%f)]\n", ms_between(start, end), r.x, r.y, s.x, s.y);
-
-	return 0;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa poly-bench.c" //
-// End: //
Index: amples/prolog.c
===================================================================
--- examples/prolog.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,50 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// prolog.c --
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Dec 11 23:27:19 2018
-// Update Count     : 6
-//
-
-#include <fstream.hfa>
-
-void printResult( int x ) { sout | "int"; }
-void printResult( double x ) { sout | "double"; }
-void printResult( char * x ) { sout | "char*"; }
-
-void is_arithmetic( int x ) {}
-void is_arithmetic( double x ) {}
-
-void is_integer( int x ) {}
-
-trait ArithmeticType( otype T ) {
-	void is_arithmetic( T );
-};
-
-trait IntegralType( otype T | ArithmeticType( T ) ) {
-	void is_integer( T );
-};
-
-forall( otype T | IntegralType( T ) | { void printResult( T ); } )
-void hornclause( T param ) {
-	printResult( param );
-}
-
-int main() {
-	int x;
-	double x;
-	char * x;
-	hornclause( x );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa prolog.c" //
-// End: //
Index: amples/quad.c
===================================================================
--- examples/quad.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,36 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// quad.c --
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Dec 11 23:26:58 2018
-// Update Count     : 9
-//
-
-#include <fstream.hfa>
-
-forall( otype T | { T ?*?( T, T ); } )
-T square( T t ) {
-	return t * t;
-}
-
-forall( otype U | { U square( U ); } )
-U quad( U u ) {
-	return square( square( u ) );
-}
-
-int main() {
-	int N = 2;
-	sout | "result of quad of" | N | "is" | quad( N );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa quad.c" //
-// End: //
Index: amples/s.c
===================================================================
--- examples/s.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,28 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// s.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Jan  3 22:38:45 2016
-// Update Count     : 3
-//
-
-//int ?!=?( int, int );
-
-void f() {
-	int a;
-	a ? 4 : 5;
-	1 ? 4 : 5;
-	0 ? 4 : 5;
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa s.c" //
-// End: //
Index: amples/simplePoly.c
===================================================================
--- examples/simplePoly.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,34 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// simplePoly.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  8 22:06:41 2016
-// Update Count     : 3
-//
-
-forall( otype T, otype U | { T f( T, U ); } )
-T q( T t, U u ) {
-	return f( t, u );
-//  return t;
-}
-
-int f( int, double* );
-
-void g( void ) {
-	int y;
-	double x;
-//  if ( y )
-	q( 3, &x );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa simplePoly.c" //
-// End: //
Index: amples/simpler.c
===================================================================
--- examples/simpler.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,25 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// simpler.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  8 22:06:30 2016
-// Update Count     : 2
-//
-
-forall( otype T ) T id( T, T );
-
-int main() {
-	id( 0, 7 );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa simpler.c" //
-// End: //
Index: amples/specialize.c
===================================================================
--- examples/specialize.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,59 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// specialize.c -- 
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar  8 22:06:17 2016
-// Update Count     : 3
-//
-
-/// void f( const int * );
-/// 
-/// void m()
-/// {
-///   f( 0 );
-/// }
-
-/// forall( dtype T ) T* f( T* );
-/// void g( int* (*)(int*) );
-/// 
-/// int m() {
-///   g( f );
-/// }
-
-/// void f1( void (*q)( forall( dtype U ) U* (*p)( U* ) ) );
-/// void g1( int* (*)(int*) );
-/// 
-/// int m1() {
-///   f1( g1 );
-/// }
-
-extern "C" {
-	int printf( const char*, ... );
-}
-
-forall( otype T ) T f( T t )
-{
-	printf( "in f; sizeof T is %d\n", sizeof( T ) );
-	return t;
-}
-
-void g( int (*p)(int) )
-{
-	printf( "g: f(7) returned %d\n", f(7) );
-}
-
-int main() {
-	g( f );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa specialize.c" //
-// End: //
Index: amples/square.c
===================================================================
--- examples/square.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,71 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// square.c --
-//
-// Author           : Richard C. Bilson
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Dec 11 23:28:24 2018
-// Update Count     : 28
-//
-
-#include <fstream.hfa>
-
-forall( otype T | { T ?*?( T, T ); } )
-T square( T t ) {
-	return t * t;
-} // square
-
-int main() {
-#if 0
-	sout | "result of squaring 9 is ";
-
-	// char does not have multiplication.
-	char ?*?( char a1, char a2 ) {
-		return (char)((int)a1 * (int)a2);
-	} // ?*?
-	char c = 9;
-	sout | "char\t\t\t" | square( c );
-
-	sout | square( s );
-#endif
-	short s = 9;
-	square( s );
-#if 0
-	signed int i = 9;
-	sout | "signed int\t\t" | square( i );
-
-	unsigned int ui = 9;
-	sout | "unsigned int\t\t" | square( ui );
-
-	long int li = 9;
-	sout | "signed long int\t\t" | square( li );
-
-	unsigned long int uli = 9;
-	sout | "unsigned long int\t" | square( uli );
-
-	signed long long int lli = 9;
-	sout | "signed long long int\t" | square( lli );
-
-	unsigned long long int ulli = 9;
-	sout | "unsigned long long int\t" | square( ulli );
-
-	float f = 9.0;
-	sout | "float\t\t\t" | square( f );
-
-	double d = 9.0;
-	sout | "double\t\t\t" | square( d );
-
-	long double ld = 9.0;
-	sout | "long double\t\t" | square( ld );
-#endif
-} // main
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa square.c" //
-// End: //
Index: amples/twice.c
===================================================================
--- examples/twice.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,36 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// twice.c --
-//
-// Author           : Peter A. Buhr
-// Created On       : Wed May 27 17:56:53 2015
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Dec 11 23:28:08 2018
-// Update Count     : 47
-//
-
-#include <fstream.hfa>
-
-forall( otype T | { T ?+?( T, T ); } )
-T twice( const T t ) {
-	return t + t;
-}
-
-// char does not have addition
-char ?+?( char op1, char op2 ) { return (int)op1 + op2; } // cast forces integer addition or recursion
-
-// signed char does not have addition
-signed char ?+?( signed char op1, signed char op2 ) { return (int)op1 + op2; } // cast forces integer addition or recursion
-
-int main( void ) {
-	sout | twice( ' ' ) | ' ' | twice( (signed char)0 ) | twice( (int)1 ) | twice( 3.2 );
-}
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa twice.c" //
-// End: //
Index: amples/wrapper/.gitignore
===================================================================
--- examples/wrapper/.gitignore	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,3 +1,0 @@
-.tags
-build/
-test
Index: amples/wrapper/premake4.lua
===================================================================
--- examples/wrapper/premake4.lua	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,79 +1,0 @@
-#!lua
-
--- Additional Linux libs: "X11", "Xxf86vm", "Xi", "Xrandr", "stdc++"
-
-includeDirList = {
-	"src/",
-	"../",
-}
-
-libDirectories = {
-
-}
-
-
-if os.get() == "linux" then
-    linkLibs = {
-	"bsd"
-    }
-end
-
--- Build Options:
-buildOptions = {"\n  CC = cfa\n  CXX = cfa"}
-
-solution "strings"
-	configurations  { "debug", "release",
-				"cproc-debug", "cproc-release",
-				"cfa-debug", "cfa-release" }
-
-	project "test"
-		kind "ConsoleApp"
-		language "C"
-		location "build"
-		objdir "build"
-		targetdir "."
-		buildoptions (buildOptions)
-		defines {	"bool=_Bool",
-				"\"true=((_Bool)(const signed int)1)\"",
-				"\"false=((_Bool)(const signed int)0)\"",
-				"_GNU_SOURCE",
-				"__cforall",
-				"USE_BSD_LIB"
-			}
-		libdirs (libDirectories)
-		links (linkLibs)
-		linkoptions (linkOptionList)
-		includedirs (includeDirList)
-		files { "src/**.c" }
-
-	configuration "debug"
-		defines { "DEBUG" }
-		flags { "Symbols" }
-
-	configuration "release"
-		defines { "NDEBUG" }
-		flags { "Optimize" }
-
-	configuration "cproc-debug"
-		buildoptions ({"-E"})
-		linkoptions ({"-E"})
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
-
-	configuration "cproc-release"
-		buildoptions ({"-E"})
-		linkoptions ({"-E"})
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
-
-	configuration "cfa-debug"
-		linkoptions ({"-E"})
-		files { "build/cproc-debug/*.o" }
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
-
-	configuration "cfa-release"
-		linkoptions ({"-E"})
-		files { "build/cproc-debug/*.o" }
-	      defines { "DEBUG" }
-	      flags { "Symbols" }
Index: amples/wrapper/src/main.c
===================================================================
--- examples/wrapper/src/main.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,16 +1,0 @@
-#include "pointer.h"
-
-wrapper_t make_copy(wrapper_t copy) {
-	return copy;
-}
-
-int main(int argc, char const *argv[]) {
-	wrapper_t p = wrap(6);
-	sout | nl | "test started";
-	wrapper_t p2 = p;
-	clear(&p);
-	p = p2;
-	wrapper_t p3 = make_copy(p2);
-	sout | nl | "test ended";
-	return 0;
-}
Index: amples/wrapper/src/pointer.h
===================================================================
--- examples/wrapper/src/pointer.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,122 +1,0 @@
-#pragma once
-
-#include <fstream.hfa>
-#include <stddef.h>
-#include <stdlib.hfa>
-
-//==============================================================================
-// type safe malloc / free
-
-forall(otype T)
-T* new()
-{
-	T* p = malloc();
-	p{};
-	return p;
-}
-
-forall(otype T)
-void delete(T* p)
-{
-	^p{};
-	free(p);
-}
-
-//==============================================================================
-// ref counter content
-
-struct content_t
-{
-	int value;
-	size_t count;
-};
-
-void ?{}(content_t* this)
-{
-	sout | "Constructing content";
-	this->count = 0;
-}
-
-void ^?{}(content_t* this)
-{
-	sout | "Destroying content";
-}
-
-//==============================================================================
-// ref counter wrapper
-
-struct wrapper_t
-{
-	content_t* ptr;
-};
-
-void ?{}(wrapper_t* this)
-{
-	sout | "Constructing empty ref pointer" | nl;
-	this->ptr = NULL;
-}
-
-void ?{}(wrapper_t* this, wrapper_t rhs)
-{
-	sout | "Constructing ref pointer from copy";
-	this->ptr = rhs.ptr;
-	this->ptr->count++;
-	sout | "Reference is " | this->ptr->count | nl;
-}
-
-void ^?{}(wrapper_t* this)
-{
-	if(this->ptr)
-	{
-		sout | "Destroying ref pointer";
-		this->ptr->count--;
-		sout | "Reference is " | this->ptr->count | nl;
-		if(!this->ptr->count) delete(this->ptr);
-	}
-	else
-	{
-		sout | "Destroying empty ref pointer" | nl;
-	}
-}
-
-wrapper_t ?=?(wrapper_t* this, wrapper_t rhs)
-{
-	sout | "Setting ref pointer";
-	if(this->ptr)
-	{
-		this->ptr->count--;
-		sout | "Reference is " | this->ptr->count | nl;
-		if(!this->ptr->count) delete(this->ptr);
-	}
-	this->ptr = rhs.ptr;
-	this->ptr->count++;
-	sout | "Reference is " | this->ptr->count | nl;
-}
-
-void set(wrapper_t* this, content_t* c)
-{
-	this->ptr = c;
-	this->ptr->count++;
-	sout | "Setting ref pointer";
-	sout | "Reference is " | this->ptr->count | nl;
-}
-
-void clear(wrapper_t* this)
-{
-	sout | "Clearing ref pointer";
-	this->ptr->count--;
-	sout | "Reference is " | this->ptr->count | nl;
-	if(!this->ptr->count) delete(this->ptr);
-	this->ptr = NULL;
-}
-
-
-wrapper_t wrap(int val)
-{
-	wrapper_t w;
-	content_t* c = malloc();
-	c{};
-	c->value = val;
-	set(&w, c);
-	return w;
-}
Index: amples/zero_one.c
===================================================================
--- examples/zero_one.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,24 +1,0 @@
-#include <fstream.hfa>
-
-void foo(zero_t o)
-{
-	sout | "It's a Zero!";
-}
-
-void foo(one_t o)
-{
-	sout | "It's a One!";
-}
-
-void foo(int o)
-{
-	sout | "It's a Number!";
-}
-
-int main()
-{
-	foo(0);
-	foo(1);
-	foo(2);
-	return 0;
-}
Index: libcfa/prelude/builtins.c
===================================================================
--- libcfa/prelude/builtins.c	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/prelude/builtins.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Fri Jul 21 16:21:03 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Jul 13 21:10:02 2020
-// Update Count     : 109
+// Last Modified On : Fri Oct  9 18:26:19 2020
+// Update Count     : 110
 //
 
@@ -94,4 +94,5 @@
 // universal typed pointer constant
 static inline forall( dtype DT ) DT * intptr( uintptr_t addr ) { return (DT *)addr; }
+static inline forall( ftype FT ) FT * intptr( uintptr_t addr ) { return (FT *)addr; }
 
 #if defined(__SIZEOF_INT128__)
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/Makefile.am	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -88,4 +88,5 @@
 inst_thread_headers_nosrc = \
 	bits/random.hfa \
+	concurrency/clib/cfathread.h \
 	concurrency/invoke.h \
 	concurrency/kernel/fwd.hfa
@@ -103,4 +104,5 @@
 	concurrency/alarm.cfa \
 	concurrency/alarm.hfa \
+	concurrency/clib/cfathread.cfa \
 	concurrency/CtxSwitch-@ARCHITECTURE@.S \
 	concurrency/invoke.c \
Index: libcfa/src/bits/containers.hfa
===================================================================
--- libcfa/src/bits/containers.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/bits/containers.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -157,4 +157,15 @@
 			tail = &get_next( *val );
 			*tail = 1p;
+		}
+
+		T * peek( __queue(T) & this ) {
+			verify(*this.tail == 1p);
+			T * head = this.head;
+			if( head != 1p ) {
+				verify(*this.tail == 1p);
+				return head;
+			}
+			verify(*this.tail == 1p);
+			return 0p;
 		}
 
Index: libcfa/src/bits/locks.hfa
===================================================================
--- libcfa/src/bits/locks.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/bits/locks.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -164,6 +164,6 @@
 
 	struct $thread;
-	extern void park( __cfaabi_dbg_ctx_param );
-	extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+	extern void park( void );
+	extern void unpark( struct $thread * this );
 	static inline struct $thread * active_thread ();
 
@@ -191,5 +191,5 @@
 					/* paranoid */ verify( expected == 0p );
 					if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-						park( __cfaabi_dbg_ctx );
+						park();
 						return true;
 					}
@@ -210,5 +210,5 @@
 				else {
 					if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-						unpark( expected __cfaabi_dbg_ctx2 );
+						unpark( expected );
 						return true;
 					}
@@ -244,5 +244,5 @@
 				/* paranoid */ verify( expected == 0p );
 				if(__atomic_compare_exchange_n(&this.ptr, &expected, active_thread(), false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-					park( __cfaabi_dbg_ctx );
+					park();
 					/* paranoid */ verify( this.ptr == 1p );
 					return true;
@@ -256,5 +256,5 @@
 			struct $thread * got = __atomic_exchange_n( &this.ptr, 1p, __ATOMIC_SEQ_CST);
 			if( got == 0p ) return false;
-			unpark( got __cfaabi_dbg_ctx2 );
+			unpark( got );
 			return true;
 		}
Index: libcfa/src/concurrency/CtxSwitch-i386.S
===================================================================
--- libcfa/src/concurrency/CtxSwitch-i386.S	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/CtxSwitch-i386.S	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 6 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Aug 16 08:46:22 2020
-// Update Count     : 4
+// Last Modified On : Sun Sep  6 18:23:37 2020
+// Update Count     : 5
 //
 
@@ -35,5 +35,5 @@
 
 	// Copy the "from" context argument from the stack to register eax
-	// Return address is at 0(%esp), with parameters following
+	// Return address is at 0(%esp), with parameters following.
 
 	movl 4(%esp),%eax
@@ -50,7 +50,7 @@
 	movl %ebp,FP_OFFSET(%eax)
 
-	// Copy the "to" context argument from the stack to register eax
-	// Having pushed three words (= 12 bytes) on the stack, the
-	// argument is now at 8 + 12 = 20(%esp)
+	// Copy the "to" context argument from the stack to register eax. Having
+	// pushed 3 words (= 12 bytes) on the stack, the argument is now at
+	// 8 + 12 = 20(%esp).
 
 	movl 20(%esp),%eax
Index: libcfa/src/concurrency/alarm.cfa
===================================================================
--- libcfa/src/concurrency/alarm.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/alarm.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -130,5 +130,5 @@
 
 	register_self( &node );
-	park( __cfaabi_dbg_ctx );
+	park();
 
 	/* paranoid */ verify( !node.set );
Index: libcfa/src/concurrency/clib/cfathread.cfa
===================================================================
--- libcfa/src/concurrency/clib/cfathread.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ libcfa/src/concurrency/clib/cfathread.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,66 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// clib/cfathread.cfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Sep 22 15:31:20 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include "kernel.hfa"
+#include "thread.hfa"
+
+thread CRunner {
+	void (*themain)( CRunner * );
+};
+
+static void ?{}( CRunner & this, void (*themain)( CRunner * ) ) {
+	this.themain = themain;
+}
+
+void main( CRunner & this ) {
+	this.themain( &this );
+}
+
+processor * procs = 0p;
+int proc_cnt = 1;
+
+extern "C" {
+	//--------------------
+	// Basic thread management
+	CRunner * cfathread_create( void (*main)( CRunner * ) ) {
+		return new( main );
+	}
+
+	void cfathread_join( CRunner * thrd ) {
+		delete( thrd );
+	}
+
+	void cfathread_park( void ) {
+		park();
+	}
+
+	void cfathread_unpark( CRunner * thrd ) {
+		unpark( *thrd );
+	}
+
+	void cfathread_yield( void ) {
+		yield();
+	}
+
+	//--------------------
+	// Basic kernel features
+	void cfathread_setproccnt( int ncnt ) {
+		assert( ncnt >= 1 );
+		adelete(proc_cnt, procs);
+
+		proc_cnt = ncnt - 1;
+		procs = anew(proc_cnt);
+	}
+}
Index: libcfa/src/concurrency/clib/cfathread.h
===================================================================
--- libcfa/src/concurrency/clib/cfathread.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ libcfa/src/concurrency/clib/cfathread.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,43 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// clib/cfathread.h --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Sep 22 15:31:20 2020
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include "stddef.h"
+#include "invoke.h"
+
+#if defined(__cforall) || defined(__cplusplus)
+extern "C" {
+#endif
+	//--------------------
+	// Basic types
+	struct cfathread_CRunner_t;
+	typedef struct cfathread_CRunner_t * cfathread_t;
+
+	//--------------------
+	// Basic thread support
+	cfathread_t cfathread_create( void (*main)( cfathread_t ) );
+	void cfathread_join( cfathread_t );
+
+	void cfathread_park( void );
+	void cfathread_unpark( cfathread_t );
+	void cfathread_yield( void );
+
+	//--------------------
+	// Basic kernel features
+	void cfathread_setproccnt( int );
+
+
+#if defined(__cforall) || defined(__cplusplus)
+}
+#endif
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -47,6 +47,5 @@
 
 //-----------------------------------------------------------------------------
-FORALL_DATA_INSTANCE(CoroutineCancelled,
-		(dtype coroutine_t | sized(coroutine_t)), (coroutine_t))
+FORALL_DATA_INSTANCE(CoroutineCancelled, (dtype coroutine_t), (coroutine_t))
 
 struct __cfaehm_node {
@@ -59,5 +58,5 @@
 void mark_exception(CoroutineCancelled(T) *) {}
 
-forall(dtype T | sized(T))
+forall(dtype T)
 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
 	dst->the_coroutine = src->the_coroutine;
@@ -77,5 +76,7 @@
 	exception_t * except = (exception_t *)(1 + (__cfaehm_node *)desc->cancellation);
 
+	// TODO: Remove explitate vtable set once trac#186 is fixed.
 	CoroutineCancelled(T) except;
+	except.virtual_table = &get_exception_vtable(&except);
 	except.the_coroutine = &cor;
 	except.the_exception = except;
@@ -91,5 +92,5 @@
 
 // minimum feasible stack size in bytes
-#define MinStackSize 1000
+static const size_t MinStackSize = 1000;
 extern size_t __page_size;				// architecture pagesize HACK, should go in proper runtime singleton
 
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -22,7 +22,5 @@
 //-----------------------------------------------------------------------------
 // Exception thrown from resume when a coroutine stack is cancelled.
-// Should not have to be be sized (see trac #196).
-FORALL_DATA_EXCEPTION(CoroutineCancelled,
-		(dtype coroutine_t | sized(coroutine_t)), (coroutine_t)) (
+FORALL_DATA_EXCEPTION(CoroutineCancelled, (dtype coroutine_t), (coroutine_t)) (
 	coroutine_t * the_coroutine;
 	exception_t * the_exception;
@@ -30,7 +28,4 @@
 
 forall(dtype T)
-void mark_exception(CoroutineCancelled(T) *);
-
-forall(dtype T | sized(T))
 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src);
 
@@ -42,7 +37,5 @@
 // Anything that implements this trait can be resumed.
 // Anything that is resumed is a coroutine.
-trait is_coroutine(dtype T | sized(T)
-		| is_resumption_exception(CoroutineCancelled(T))
-		| VTABLE_ASSERTION(CoroutineCancelled, (T))) {
+trait is_coroutine(dtype T | IS_RESUMPTION_EXCEPTION(CoroutineCancelled, (T))) {
 	void main(T & this);
 	$coroutine * get_coroutine(T & this);
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/invoke.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -93,4 +93,6 @@
 
 	};
+	// Wrapper for gdb
+	struct cfathread_coroutine_t { struct $coroutine debug; };
 
 	static inline struct __stack_t * __get_stack( struct $coroutine * cor ) {
@@ -129,4 +131,6 @@
 		struct __condition_node_t * dtor_node;
 	};
+	// Wrapper for gdb
+	struct cfathread_monitor_t { struct $monitor debug; };
 
 	struct __monitor_group_t {
@@ -186,16 +190,10 @@
 		} node;
 
-		#ifdef __CFA_DEBUG__
-			// previous function to park/unpark the thread
-			const char * park_caller;
-			int park_result;
-			enum __Coroutine_State park_state;
-			bool park_stale;
-			const char * unpark_caller;
-			int unpark_result;
-			enum __Coroutine_State unpark_state;
-			bool unpark_stale;
+		#if defined( __CFA_WITH_VERIFY__ )
+			unsigned long long canary;
 		#endif
 	};
+	// Wrapper for gdb
+	struct cfathread_thread_t { struct $thread debug; };
 
 	#ifdef __CFA_DEBUG__
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/io.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -69,5 +69,5 @@
 		if( block ) {
 			enable_interrupts( __cfaabi_dbg_ctx );
-			park( __cfaabi_dbg_ctx );
+			park();
 			disable_interrupts();
 		}
@@ -97,5 +97,5 @@
 
 		if(nextt) {
-			unpark( nextt __cfaabi_dbg_ctx2 );
+			unpark( nextt );
 			enable_interrupts( __cfaabi_dbg_ctx );
 			return true;
@@ -134,5 +134,5 @@
 		int ret = 0;
 		if( need_sys_to_submit || need_sys_to_complete ) {
-			ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, 0p, _NSIG / 8);
+			ret = syscall( __NR_io_uring_enter, ring.fd, to_submit, 0, flags, (sigset_t *)0p, _NSIG / 8);
 			if( ret < 0 ) {
 				switch((int)errno) {
Index: libcfa/src/concurrency/io/call.cfa.in
===================================================================
--- libcfa/src/concurrency/io/call.cfa.in	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/io/call.cfa.in	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -47,37 +47,30 @@
 	#include "kernel/fwd.hfa"
 
-	#if defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN | IOSQE_ASYNC)
-	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_ASYNC)
-	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE) && defined(CFA_HAVE_IOSQE_IO_DRAIN)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_DRAIN)
-	#elif defined(CFA_HAVE_IOSQE_IO_DRAIN) && defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_IO_DRAIN | IOSQE_ASYNC)
-	#elif defined(CFA_HAVE_IOSQE_FIXED_FILE)
-		#define REGULAR_FLAGS (IOSQE_FIXED_FILE)
-	#elif defined(CFA_HAVE_IOSQE_IO_DRAIN)
-		#define REGULAR_FLAGS (IOSQE_IO_DRAIN)
-	#elif defined(CFA_HAVE_IOSQE_ASYNC)
-		#define REGULAR_FLAGS (IOSQE_ASYNC)
-	#else
-		#define REGULAR_FLAGS (0)
-	#endif
-
-	#if defined(CFA_HAVE_IOSQE_IO_LINK) && defined(CFA_HAVE_IOSQE_IO_HARDLINK)
-		#define LINK_FLAGS (IOSQE_IO_LINK | IOSQE_IO_HARDLINK)
-	#elif defined(CFA_HAVE_IOSQE_IO_LINK)
-		#define LINK_FLAGS (IOSQE_IO_LINK)
-	#elif defined(CFA_HAVE_IOSQE_IO_HARDLINK)
-		#define LINK_FLAGS (IOSQE_IO_HARDLINK)
-	#else
-		#define LINK_FLAGS (0)
-	#endif
-
-	#if defined(CFA_HAVE_SPLICE_F_FD_IN_FIXED)
-		#define SPLICE_FLAGS (SPLICE_F_FD_IN_FIXED)
-	#else
-		#define SPLICE_FLAGS (0)
-	#endif
+	static const __u8 REGULAR_FLAGS = 0
+		#if defined(CFA_HAVE_IOSQE_FIXED_FILE)
+			| IOSQE_FIXED_FILE
+		#endif
+		#if defined(CFA_HAVE_IOSQE_IO_DRAIN)
+			| IOSQE_IO_DRAIN
+		#endif
+		#if defined(CFA_HAVE_IOSQE_ASYNC)
+			| IOSQE_ASYNC
+		#endif
+	;
+
+	static const __u32 LINK_FLAGS = 0
+		#if defined(CFA_HAVE_IOSQE_IO_LINK)
+			| IOSQE_IO_LINK
+		#endif
+		#if defined(CFA_HAVE_IOSQE_IO_HARDLINK)
+			| IOSQE_IO_HARDLINK
+		#endif
+	;
+
+	static const __u32 SPLICE_FLAGS = 0
+		#if defined(CFA_HAVE_SPLICE_F_FD_IN_FIXED)
+			| SPLICE_F_FD_IN_FIXED
+		#endif
+	;
 
 	extern [* struct io_uring_sqe, __u32] __submit_alloc( struct __io_data & ring, __u64 data );
@@ -347,5 +340,5 @@
 	# CFA_HAVE_IORING_OP_MADVISE
 	Call('MADVISE', 'int madvise(void *addr, size_t length, int advice)', {
-		'addr': 'addr',
+		'addr': '(__u64)addr',
 		'len': 'length',
 		'fadvise_advice': 'advice'
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -147,4 +147,5 @@
 	static void * iopoll_loop( __attribute__((unused)) void * args ) {
 		__processor_id_t id;
+		id.full_proc = false;
 		id.id = doregister(&id);
 		__cfaabi_dbg_print_safe( "Kernel : IO poller thread starting\n" );
@@ -246,5 +247,4 @@
 					thrd.link.next = 0p;
 					thrd.link.prev = 0p;
-					__cfaabi_dbg_debug_do( thrd.unpark_stale = true );
 
 					// Fixup the thread state
@@ -266,5 +266,5 @@
 
 				// unpark the fast io_poller
-				unpark( &thrd __cfaabi_dbg_ctx2 );
+				unpark( &thrd );
 			}
 			else {
@@ -275,5 +275,5 @@
 			}
 		} else {
-			unpark( &thrd __cfaabi_dbg_ctx2 );
+			unpark( &thrd );
 		}
 
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/kernel.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -237,7 +237,4 @@
 	$coroutine * proc_cor = get_coroutine(this->runner);
 
-	// Update global state
-	kernelTLS.this_thread = thrd_dst;
-
 	// set state of processor coroutine to inactive
 	verify(proc_cor->state == Active);
@@ -249,24 +246,29 @@
 		thrd_dst->state = Active;
 
-		__cfaabi_dbg_debug_do(
-			thrd_dst->park_stale   = true;
-			thrd_dst->unpark_stale = true;
-		)
+		// Update global state
+		kernelTLS.this_thread = thrd_dst;
 
 		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
+		/* paranoid */ verify( thrd_dst->context.SP );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_dst->canary );
+
+
 
 		// set context switch to the thread that the processor is executing
-		verify( thrd_dst->context.SP );
 		__cfactx_switch( &proc_cor->context, &thrd_dst->context );
 		// when __cfactx_switch returns we are back in the processor coroutine
 
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_dst->canary );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
 		/* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
+		/* paranoid */ verify( thrd_dst->context.SP );
 		/* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
 		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 
+		// Reset global state
+		kernelTLS.this_thread = 0p;
 
 		// We just finished running a thread, there are a few things that could have happened.
@@ -286,5 +288,5 @@
 			// The thread has halted, it should never be scheduled/run again
 			// We may need to wake someone up here since
-			unpark( this->destroyer __cfaabi_dbg_ctx2 );
+			unpark( this->destroyer );
 			this->destroyer = 0p;
 			break RUNNING;
@@ -296,5 +298,4 @@
 		// set state of processor coroutine to active and the thread to inactive
 		int old_ticket = __atomic_fetch_sub(&thrd_dst->ticket, 1, __ATOMIC_SEQ_CST);
-		__cfaabi_dbg_debug_do( thrd_dst->park_result = old_ticket; )
 		switch(old_ticket) {
 			case 1:
@@ -313,5 +314,4 @@
 	// Just before returning to the processor, set the processor coroutine to active
 	proc_cor->state = Active;
-	kernelTLS.this_thread = 0p;
 
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
@@ -334,6 +334,8 @@
 			__x87_store;
 		#endif
-		verify( proc_cor->context.SP );
+		/* paranoid */ verify( proc_cor->context.SP );
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_src->canary );
 		__cfactx_switch( &thrd_src->context, &proc_cor->context );
+		/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd_src->canary );
 		#if defined( __i386 ) || defined( __x86_64 )
 			__x87_load;
@@ -367,4 +369,6 @@
 	/* paranoid */ #endif
 	/* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
+	/* paranoid */ verify( 0x0D15EA5E0D15EA5E == thrd->canary );
+
 
 	if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
@@ -403,10 +407,6 @@
 
 // KERNEL ONLY unpark with out disabling interrupts
-void __unpark(  struct __processor_id_t * id, $thread * thrd __cfaabi_dbg_ctx_param2 ) {
-	// record activity
-	__cfaabi_dbg_record_thrd( *thrd, false, caller );
-
+void __unpark(  struct __processor_id_t * id, $thread * thrd ) {
 	int old_ticket = __atomic_fetch_add(&thrd->ticket, 1, __ATOMIC_SEQ_CST);
-	__cfaabi_dbg_debug_do( thrd->unpark_result = old_ticket; thrd->unpark_state = thrd->state; )
 	switch(old_ticket) {
 		case 1:
@@ -426,20 +426,17 @@
 }
 
-void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
+void unpark( $thread * thrd ) {
 	if( !thrd ) return;
 
 	disable_interrupts();
-	__unpark( (__processor_id_t*)kernelTLS.this_processor, thrd __cfaabi_dbg_ctx_fwd2 );
+	__unpark( (__processor_id_t*)kernelTLS.this_processor, thrd );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
 
-void park( __cfaabi_dbg_ctx_param ) {
+void park( void ) {
 	/* paranoid */ verify( kernelTLS.preemption_state.enabled );
 	disable_interrupts();
 	/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
 	/* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
-
-	// record activity
-	__cfaabi_dbg_record_thrd( *kernelTLS.this_thread, true, caller );
 
 	returnToKernel();
@@ -521,5 +518,5 @@
 	disable_interrupts();
 		/* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
-		bool ret = post( this->idle );
+		post( this->idle );
 	enable_interrupts( __cfaabi_dbg_ctx );
 }
@@ -649,5 +646,5 @@
 		// atomically release spin lock and block
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 		return true;
 	}
@@ -670,5 +667,5 @@
 
 	// make new owner
-	unpark( thrd __cfaabi_dbg_ctx2 );
+	unpark( thrd );
 
 	return thrd != 0p;
@@ -681,5 +678,5 @@
 	count += diff;
 	for(release) {
-		unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
+		unpark( pop_head( waiting ) );
 	}
 
@@ -697,15 +694,4 @@
 			this.prev_thrd = kernelTLS.this_thread;
 		}
-
-		void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]) {
-			if(park) {
-				this.park_caller   = prev_name;
-				this.park_stale    = false;
-			}
-			else {
-				this.unpark_caller = prev_name;
-				this.unpark_stale  = false;
-			}
-		}
 	}
 )
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/kernel.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -48,5 +48,6 @@
 // Processor id, required for scheduling threads
 struct __processor_id_t {
-	unsigned id;
+	unsigned id:24;
+	bool full_proc:1;
 
 	#if !defined(__CFA_NO_STATISTICS__)
Index: libcfa/src/concurrency/kernel/fwd.hfa
===================================================================
--- libcfa/src/concurrency/kernel/fwd.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/kernel/fwd.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -118,6 +118,6 @@
 
 	extern "Cforall" {
-		extern void park( __cfaabi_dbg_ctx_param );
-		extern void unpark( struct $thread * this __cfaabi_dbg_ctx_param2 );
+		extern void park( void );
+		extern void unpark( struct $thread * this );
 		static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
 
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -451,4 +451,7 @@
 	link.next = 0p;
 	link.prev = 0p;
+	#if defined( __CFA_WITH_VERIFY__ )
+		canary = 0x0D15EA5E0D15EA5E;
+	#endif
 
 	node.next = 0p;
@@ -470,5 +473,5 @@
 	this.name = name;
 	this.cltr = &_cltr;
-	id = -1u;
+	full_proc = true;
 	destroyer = 0p;
 	do_terminate = false;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -64,5 +64,5 @@
 
 // KERNEL ONLY unpark with out disabling interrupts
-void __unpark( struct __processor_id_t *, $thread * thrd __cfaabi_dbg_ctx_param2 );
+void __unpark( struct __processor_id_t *, $thread * thrd );
 
 static inline bool __post(single_sem & this, struct __processor_id_t * id) {
@@ -77,5 +77,5 @@
 		else {
 			if(__atomic_compare_exchange_n(&this.ptr, &expected, 0p, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
-				__unpark( id, expected __cfaabi_dbg_ctx2 );
+				__unpark( id, expected );
 				return true;
 			}
Index: libcfa/src/concurrency/locks.cfa
===================================================================
--- libcfa/src/concurrency/locks.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ libcfa/src/concurrency/locks.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,428 @@
+#include "locks.hfa"
+#include "kernel_private.hfa"
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <kernel.hfa>
+#include <stdlib.hfa>
+#include <thread.hfa>
+
+///////////////////////////////////////////////////////////////////
+//// info_thread
+///////////////////////////////////////////////////////////////////
+forall(dtype L | is_blocking_lock(L)) {
+	void ?{}( info_thread(L) & this, $thread * t ) {
+		this.t = t;
+		this.lock = 0p;
+	}
+
+	void ?{}( info_thread(L) & this, $thread * t, uintptr_t info ) {
+		this.t = t;
+		this.info = info;
+		this.lock = 0p;
+	}
+
+	void ^?{}( info_thread(L) & this ){
+		// default
+	}
+
+	info_thread(L) *& get_next( info_thread(L) & this ) {
+		return this.next;
+	}
+}
+///////////////////////////////////////////////////////////////////
+//// Blocking Locks
+///////////////////////////////////////////////////////////////////
+
+void ?{}( blocking_lock & this, bool multi_acquisition, bool strict_owner ) {
+	this.lock{};
+	this.blocked_threads{};
+	this.wait_count = 0;
+	this.multi_acquisition = multi_acquisition;
+	this.strict_owner = strict_owner;
+	this.owner = 0p;
+	this.recursion_count = 0;
+}
+
+void ^?{}( blocking_lock & this ) {
+	// default
+}
+
+void ?{}( mutex_lock & this ) {
+	((blocking_lock &)this){ false, false };
+}
+
+void ^?{}( mutex_lock & this ) {
+	// default
+}
+
+void ?{}( owner_lock & this ) {
+	((blocking_lock &)this){ true, true };
+}
+
+void ^?{}( owner_lock & this ) {
+	// default
+}
+
+void ?{}( recursive_mutex_lock & this ) {
+	((blocking_lock &)this){ true, false };
+}
+
+void ^?{}( recursive_mutex_lock & this ) {
+	// default
+}
+
+void lock( blocking_lock & this ) with( this ) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	if ( owner == kernelTLS.this_thread && !multi_acquisition) {
+		fprintf(stderr, "A single acquisition lock holder attempted to reacquire the lock resulting in a deadlock."); // Possibly throw instead
+		exit(EXIT_FAILURE);
+	} else if ( owner != 0p && owner != kernelTLS.this_thread ) {
+		append( blocked_threads, kernelTLS.this_thread );
+		wait_count++;
+		unlock( lock );
+		park( __cfaabi_dbg_ctx );
+	} else if ( owner == kernelTLS.this_thread && multi_acquisition ) {
+		recursion_count++;
+		unlock( lock );
+	} else {
+		owner = kernelTLS.this_thread;
+		recursion_count = 1;
+		unlock( lock );
+	}
+}
+
+bool try_lock( blocking_lock & this ) with( this ) {
+	bool ret = false;
+	lock( lock __cfaabi_dbg_ctx2 );
+	if ( owner == 0p ) {
+		owner = kernelTLS.this_thread;
+		if ( multi_acquisition ) recursion_count = 1;
+		ret = true;
+	} else if ( owner == kernelTLS.this_thread && multi_acquisition ) {
+		recursion_count++;
+		ret = true;
+	}
+	unlock( lock );
+	return ret;
+}
+
+void unlock( blocking_lock & this ) with( this ) {
+	lock( lock __cfaabi_dbg_ctx2 );
+	if ( owner == 0p ){ // no owner implies lock isn't held
+		fprintf( stderr, "There was an attempt to release a lock that isn't held" );
+		return;
+	} else if ( strict_owner && owner != kernelTLS.this_thread ) {
+		fprintf( stderr, "A thread other than the owner attempted to release an owner lock" );
+		return;
+	}
+	recursion_count--;
+	if ( recursion_count == 0 ) {
+		$thread * thrd = pop_head( blocked_threads );
+		owner = thrd;
+		recursion_count = ( thrd && multi_acquisition ? 1 : 0 );
+		wait_count--;
+		unpark( thrd __cfaabi_dbg_ctx2 );
+	}
+	unlock( lock );
+}
+
+size_t wait_count( blocking_lock & this ) with( this ) {
+	return wait_count;
+}
+
+
+void set_recursion_count( blocking_lock & this, size_t recursion ) with( this ) {
+	recursion_count = recursion;
+}
+
+size_t get_recursion_count( blocking_lock & this ) with( this ) {
+	return recursion_count;
+}
+
+void add_( blocking_lock & this, $thread * t ) with( this ) {
+    lock( lock __cfaabi_dbg_ctx2 );
+	if ( owner != 0p ) {
+		append( blocked_threads, t );
+		wait_count++;
+		unlock( lock );
+	} else {
+		owner = t;
+		if ( multi_acquisition ) recursion_count = 1;
+		unpark( t __cfaabi_dbg_ctx2 );
+		unlock( lock );
+	}
+}
+
+void remove_( blocking_lock & this ) with( this ) {
+    lock( lock __cfaabi_dbg_ctx2 );
+	if ( owner == 0p ){ // no owner implies lock isn't held
+		fprintf( stderr, "A lock that is not held was passed to a synchronization lock" );
+	} else if ( strict_owner && owner != kernelTLS.this_thread ) {
+		fprintf( stderr, "A thread other than the owner of a lock passed it to a synchronization lock" );
+	} else {
+		$thread * thrd = pop_head( blocked_threads );
+		owner = thrd;
+		recursion_count = ( thrd && multi_acquisition ? 1 : 0 );
+		wait_count--;
+		unpark( thrd __cfaabi_dbg_ctx2 );
+	}
+	unlock( lock );
+}
+
+///////////////////////////////////////////////////////////////////
+//// Overloaded routines for traits
+///////////////////////////////////////////////////////////////////
+
+// In an ideal world this may not be necessary
+// Is it possible for nominal inheritance to inherit traits??
+// If that occurs we would avoid all this extra code
+
+void lock( mutex_lock & this ){
+	lock( (blocking_lock &)this );
+}
+
+void unlock( mutex_lock & this ){
+	unlock( (blocking_lock &)this );
+}
+
+void add_( mutex_lock & this, struct $thread * t ){
+	add_( (blocking_lock &)this, t );
+}
+
+void remove_( mutex_lock & this ){
+	remove_( (blocking_lock &)this );
+}
+
+void set_recursion_count( mutex_lock & this, size_t recursion ){
+	set_recursion_count( (blocking_lock &)this, recursion );
+}
+
+size_t get_recursion_count( mutex_lock & this ){
+	get_recursion_count( (blocking_lock &)this );
+}
+
+void lock( recursive_mutex_lock & this ){
+	lock( (blocking_lock &)this );
+}
+
+void unlock( recursive_mutex_lock & this ){
+	unlock( (blocking_lock &)this );
+}
+
+void add_( recursive_mutex_lock & this, struct $thread * t ){
+	add_( (blocking_lock &)this, t );
+}
+
+void remove_( recursive_mutex_lock & this ){
+	remove_( (blocking_lock &)this );
+}
+
+void set_recursion_count( recursive_mutex_lock & this, size_t recursion ){
+	set_recursion_count( (blocking_lock &)this, recursion );
+}
+
+size_t get_recursion_count( recursive_mutex_lock & this ){
+	get_recursion_count( (blocking_lock &)this );
+}
+
+///////////////////////////////////////////////////////////////////
+//// Synchronization Locks
+///////////////////////////////////////////////////////////////////
+
+forall(dtype L | is_blocking_lock(L)) {
+	void ?{}( synchronization_lock(L) & this, bool reacquire_after_signal ){
+		this.lock{};
+		this.blocked_threads{};
+		this.count = 0;
+		this.reacquire_after_signal = reacquire_after_signal;
+	}
+
+	void ^?{}( synchronization_lock(L) & this ){
+		// default
+	}
+
+	void ?{}( condition_variable(L) & this ){
+		((synchronization_lock(L) &)this){ true };
+	}
+
+	void ^?{}( condition_variable(L) & this ){
+		// default
+	}
+
+	void ?{}( thread_queue(L) & this ){
+		((synchronization_lock(L) &)this){ false };
+	}
+
+	void ^?{}( thread_queue(L) & this ){
+		// default
+	}
+
+	bool notify_one( synchronization_lock(L) & this ) with( this ) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		bool ret = !!blocked_threads;
+		info_thread(L) * popped = pop_head( blocked_threads );
+		if(popped != 0p) {
+			if( reacquire_after_signal ){
+				add_(*popped->lock, popped->t);
+			} else {
+				unpark(
+					popped->t __cfaabi_dbg_ctx2
+				);
+			}
+		}
+		unlock( lock );
+		return ret;
+	}
+
+	bool notify_all( synchronization_lock(L) & this ) with(this) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		bool ret = blocked_threads ? true : false;
+		while( blocked_threads ) {
+			info_thread(L) * popped = pop_head( blocked_threads );
+			if(popped != 0p){
+				if( reacquire_after_signal ){
+					add_(*popped->lock, popped->t);
+				} else {
+					unpark(
+						popped->t __cfaabi_dbg_ctx2
+					);
+				}
+			}
+		}
+		unlock( lock );
+		return ret;
+	}
+
+	uintptr_t front( synchronization_lock(L) & this ) with(this) {
+		return (*peek(blocked_threads)).info;
+	}
+
+	bool empty( synchronization_lock(L) & this ) with(this) {
+		return blocked_threads ? false : true;
+	}
+
+	int counter( synchronization_lock(L) & this ) with(this) {
+		return count;
+	}
+
+	void queue_info_thread( synchronization_lock(L) & this, info_thread(L) & i ) with(this) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		append( blocked_threads, &i );
+		count++;
+		unlock( lock );
+		park( __cfaabi_dbg_ctx );
+	}
+
+
+	void wait( synchronization_lock(L) & this ) with(this) {
+		info_thread( L ) i = { kernelTLS.this_thread };
+		queue_info_thread( this, i );
+	}
+
+	void wait( synchronization_lock(L) & this, uintptr_t info ) with(this) {
+		info_thread( L ) i = { kernelTLS.this_thread, info };
+		queue_info_thread( this, i );
+	}
+	// I still need to implement the time delay wait routines
+	bool wait( synchronization_lock(L) & this, Duration duration ) with(this) {
+		timeval tv = { time(0) };
+		Time t = { tv };
+		return wait( this, t + duration );
+	}
+
+	bool wait( synchronization_lock(L) & this, uintptr_t info, Duration duration ) with(this) {
+		// TODO: ADD INFO
+		return wait( this, duration );
+	}
+
+	bool wait( synchronization_lock(L) & this, Time time ) with(this) {
+		return false; //default
+	}
+
+	bool wait( synchronization_lock(L) & this, uintptr_t info, Time time ) with(this) {
+		// TODO: ADD INFO
+		return wait( this, time );
+	}
+
+	void queue_info_thread_unlock( synchronization_lock(L) & this, L & l, info_thread(L) & i ) with(this) {
+		lock( lock __cfaabi_dbg_ctx2 );
+		append( this.blocked_threads, &i );
+		count++;
+		i.lock = &l;
+		size_t recursion_count = get_recursion_count(l);
+		remove_( l );
+		unlock( lock );
+		park( __cfaabi_dbg_ctx ); // blocks here
+
+		set_recursion_count(l, recursion_count); // resets recursion count here after waking
+	}
+
+	void wait( synchronization_lock(L) & this, L & l ) with(this) {
+		info_thread(L) i = { kernelTLS.this_thread };
+		queue_info_thread_unlock( this, l, i );
+	}
+
+	void wait( synchronization_lock(L) & this, L & l, uintptr_t info ) with(this) {
+		info_thread(L) i = { kernelTLS.this_thread, info };
+		queue_info_thread_unlock( this, l, i );
+	}
+
+	bool wait( synchronization_lock(L) & this, L & l, Duration duration ) with(this) {
+		timeval tv = { time(0) };
+		Time t = { tv };
+		return wait( this, l, t + duration );
+	}
+
+	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Duration duration ) with(this) {
+		// TODO: ADD INFO
+		return wait( this, l, duration );
+	}
+
+	bool wait( synchronization_lock(L) & this, L & l, Time time ) with(this) {
+		return false; //default
+	}
+
+	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Time time ) with(this) {
+		// TODO: ADD INFO
+		return wait( this, l, time );
+	}
+}
+
+///////////////////////////////////////////////////////////////////
+//// condition lock alternative approach
+///////////////////////////////////////////////////////////////////
+
+// the solution below is less efficient but does not require the lock to have a specific add/remove routine
+
+///////////////////////////////////////////////////////////////////
+//// is_simple_lock
+///////////////////////////////////////////////////////////////////
+
+forall(dtype L | is_simple_lock(L)) {
+	void ?{}( condition_lock(L) & this ){
+		// default
+	}
+
+	void ^?{}( condition_lock(L) & this ){
+		// default
+	}
+
+	bool notify_one( condition_lock(L) & this ) with(this) {
+		return notify_one( c_var );
+	}
+
+	bool notify_all( condition_lock(L) & this ) with(this) {
+		return notify_all( c_var );
+	}
+
+	void wait( condition_lock(L) & this, L & l ) with(this) {
+		lock( m_lock );
+		size_t recursion = get_recursion_count( l );
+		unlock( l );
+		wait( c_var, m_lock );
+		lock( l );
+		set_recursion_count( l , recursion );
+		unlock( m_lock );
+	}
+}
Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ libcfa/src/concurrency/locks.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,211 @@
+#include <stdbool.h>
+
+#include "bits/algorithm.hfa"
+#include "bits/locks.hfa"
+#include "bits/containers.hfa"
+
+#include "invoke.h"
+
+#include "time_t.hfa"
+#include "time.hfa"
+#include <sys/time.h>
+
+///////////////////////////////////////////////////////////////////
+//// is_blocking_lock
+///////////////////////////////////////////////////////////////////
+
+trait is_blocking_lock(dtype L | sized(L)) {
+	void add_( L &, struct $thread * );		// For synchronization locks to use when acquiring
+	void remove_( L & );    // For synchronization locks to use when releasing
+	size_t get_recursion_count( L & ); // to get recursion count for cond lock to reset after waking
+	void set_recursion_count( L &, size_t recursion ); // to set recursion count after getting signalled;
+};
+
+///////////////////////////////////////////////////////////////////
+//// info_thread
+///////////////////////////////////////////////////////////////////
+
+forall(dtype L | is_blocking_lock(L)) {
+	struct info_thread {
+		struct $thread * t;
+		uintptr_t info;
+		info_thread(L) * next;
+		L * lock;
+	};
+
+
+	void ?{}( info_thread(L) & this, $thread * t );
+	void ?{}( info_thread(L) & this, $thread * t, uintptr_t info );
+	void ^?{}( info_thread(L) & this );
+
+	info_thread(L) *& get_next( info_thread(L) & this );
+}
+
+///////////////////////////////////////////////////////////////////
+//// Blocking Locks
+///////////////////////////////////////////////////////////////////
+struct blocking_lock {
+	// Spin lock used for mutual exclusion
+	__spinlock_t lock;
+
+	// List of blocked threads
+	__queue_t( struct $thread ) blocked_threads;
+
+	// Count of current blocked threads
+	size_t wait_count;
+
+	// Flag if the lock allows multiple acquisition
+	bool multi_acquisition;
+
+	// Flag if lock can be released by non owner
+	bool strict_owner;
+
+	// Current thread owning the lock
+	struct $thread * owner;
+
+	// Number of recursion level
+	size_t recursion_count;
+};
+
+struct mutex_lock {
+	inline blocking_lock;
+};
+
+struct owner_lock {
+	inline blocking_lock;
+};
+
+struct recursive_mutex_lock {
+	inline blocking_lock;
+};
+
+void ?{}( blocking_lock & this, bool multi_acquisition, bool strict_owner );
+void ^?{}( blocking_lock & this );
+
+void ?{}( mutex_lock & this );
+void ^?{}( mutex_lock & this );
+
+void ?{}( owner_lock & this );
+void ^?{}( owner_lock & this );
+
+void ?{}( recursive_mutex_lock & this );
+void ^?{}( recursive_mutex_lock & this );
+
+void lock( blocking_lock & this );
+bool try_lock( blocking_lock & this );
+void unlock( blocking_lock & this );
+void add_( blocking_lock & this, struct $thread * t );
+void remove_( blocking_lock & this );
+size_t wait_count( blocking_lock & this );
+void set_recursion_count( blocking_lock & this, size_t recursion );
+size_t get_recursion_count( blocking_lock & this );
+
+void lock( mutex_lock & this );
+void unlock( mutex_lock & this );
+void add_( mutex_lock & this, struct $thread * t );
+void remove_( mutex_lock & this );
+void set_recursion_count( mutex_lock & this, size_t recursion );
+size_t get_recursion_count( mutex_lock & this );
+
+void lock( recursive_mutex_lock & this );
+void unlock( recursive_mutex_lock & this );
+void add_( recursive_mutex_lock & this, struct $thread * t );
+void remove_( recursive_mutex_lock & this );
+void set_recursion_count( recursive_mutex_lock & this, size_t recursion );
+size_t get_recursion_count( recursive_mutex_lock & this );
+
+///////////////////////////////////////////////////////////////////
+//// Synchronization Locks
+///////////////////////////////////////////////////////////////////
+forall(dtype L | is_blocking_lock(L)) {
+	struct synchronization_lock {
+		// Spin lock used for mutual exclusion
+		__spinlock_t lock;
+
+		// List of blocked threads
+		__queue_t( info_thread(L) ) blocked_threads;
+
+		// Count of current blocked threads
+		int count;
+
+		// If true threads will reacquire the lock they block on upon waking
+		bool reacquire_after_signal;
+	};
+
+	struct condition_variable {
+		inline synchronization_lock(L);
+	};
+
+	struct thread_queue {
+		inline synchronization_lock(L);
+	};
+
+
+	void ?{}( synchronization_lock(L) & this, bool multi_acquisition, bool strict_owner );
+	void ^?{}( synchronization_lock(L) & this );
+
+	void ?{}( condition_variable(L) & this );
+	void ^?{}( condition_variable(L) & this );
+
+	void ?{}( thread_queue(L) & this );
+	void ^?{}( thread_queue(L) & this );
+
+	bool notify_one( synchronization_lock(L) & this );
+	bool notify_all( synchronization_lock(L) & this );
+
+	uintptr_t front( synchronization_lock(L) & this );
+
+	bool empty( synchronization_lock(L) & this );
+	int counter( synchronization_lock(L) & this );
+
+	// wait functions that are not passed a mutex lock
+	void wait( synchronization_lock(L) & this );
+	void wait( synchronization_lock(L) & this, uintptr_t info );
+	bool wait( synchronization_lock(L) & this, Duration duration );
+	bool wait( synchronization_lock(L) & this, uintptr_t info, Duration duration );
+	bool wait( synchronization_lock(L) & this, Time time );
+	bool wait( synchronization_lock(L) & this, uintptr_t info, Time time );
+
+	// wait functions that are passed a lock
+	bool notify_one( synchronization_lock(L) & this, L & l );
+	bool notify_all( synchronization_lock(L) & this, L & l );
+
+	void wait( synchronization_lock(L) & this, L & l );
+	void wait( synchronization_lock(L) & this, L & l, uintptr_t info );
+	bool wait( synchronization_lock(L) & this, L & l, Duration duration );
+	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Duration duration );
+	bool wait( synchronization_lock(L) & this, L & l, Time time );
+	bool wait( synchronization_lock(L) & this, L & l, uintptr_t info, Time time );
+}
+
+///////////////////////////////////////////////////////////////////
+//// condition lock alternative approach
+///////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////
+//// is_simple_lock
+///////////////////////////////////////////////////////////////////
+
+trait is_simple_lock(dtype L | sized(L)) {
+	void lock( L & );		// For synchronization locks to use when acquiring
+	void unlock( L & );    // For synchronization locks to use when releasing
+	size_t get_recursion_count( L & ); // to get recursion count for cond lock to reset after waking
+	void set_recursion_count( L &, size_t recursion ); // to set recursion count after getting signalled;
+};
+
+forall(dtype L | is_simple_lock(L)) {
+	struct condition_lock {
+		// Spin lock used for mutual exclusion
+		mutex_lock m_lock;
+
+		condition_variable( mutex_lock ) c_var;
+	};
+
+	void ?{}( condition_lock(L) & this );
+	void ^?{}( condition_lock(L) & this );
+
+	bool notify_one( condition_lock(L) & this );
+	bool notify_all( condition_lock(L) & this );
+	void wait( condition_lock(L) & this, L & l );
+}
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/monitor.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -122,5 +122,5 @@
 
 		unlock( this->lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 
 		__cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
@@ -201,8 +201,8 @@
 		// Release the next thread
 		/* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
-		unpark( urgent->owner->waiting_thread __cfaabi_dbg_ctx2 );
+		unpark( urgent->owner->waiting_thread );
 
 		// Park current thread waiting
-		park( __cfaabi_dbg_ctx );
+		park();
 
 		// Some one was waiting for us, enter
@@ -222,5 +222,5 @@
 
 		// Park current thread waiting
-		park( __cfaabi_dbg_ctx );
+		park();
 
 		/* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
@@ -264,5 +264,5 @@
 	//We need to wake-up the thread
 	/* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
-	unpark( new_owner __cfaabi_dbg_ctx2 );
+	unpark( new_owner );
 }
 
@@ -493,9 +493,9 @@
 	// Wake the threads
 	for(int i = 0; i < thread_count; i++) {
-		unpark( threads[i] __cfaabi_dbg_ctx2 );
+		unpark( threads[i] );
 	}
 
 	// Everything is ready to go to sleep
-	park( __cfaabi_dbg_ctx );
+	park();
 
 	// We are back, restore the owners and recursions
@@ -575,8 +575,8 @@
 
 	// unpark the thread we signalled
-	unpark( signallee __cfaabi_dbg_ctx2 );
+	unpark( signallee );
 
 	//Everything is ready to go to sleep
-	park( __cfaabi_dbg_ctx );
+	park();
 
 
@@ -679,8 +679,8 @@
 
 				// unpark the thread we signalled
-				unpark( next __cfaabi_dbg_ctx2 );
+				unpark( next );
 
 				//Everything is ready to go to sleep
-				park( __cfaabi_dbg_ctx );
+				park();
 
 				// We are back, restore the owners and recursions
@@ -724,5 +724,5 @@
 
 	//Everything is ready to go to sleep
-	park( __cfaabi_dbg_ctx );
+	park();
 
 
Index: libcfa/src/concurrency/mutex.cfa
===================================================================
--- libcfa/src/concurrency/mutex.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/mutex.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -42,5 +42,5 @@
 		append( blocked_threads, kernelTLS.this_thread );
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 	}
 	else {
@@ -65,5 +65,5 @@
 	this.is_locked = (this.blocked_threads != 0);
 	unpark(
-		pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+		pop_head( this.blocked_threads )
 	);
 	unlock( this.lock );
@@ -97,5 +97,5 @@
 		append( blocked_threads, kernelTLS.this_thread );
 		unlock( lock );
-		park( __cfaabi_dbg_ctx );
+		park();
 	}
 }
@@ -124,5 +124,5 @@
 		owner = thrd;
 		recursion_count = (thrd ? 1 : 0);
-		unpark( thrd __cfaabi_dbg_ctx2 );
+		unpark( thrd );
 	}
 	unlock( lock );
@@ -142,5 +142,5 @@
 	lock( lock __cfaabi_dbg_ctx2 );
 	unpark(
-		pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+		pop_head( this.blocked_threads )
 	);
 	unlock( lock );
@@ -151,5 +151,5 @@
 	while(this.blocked_threads) {
 		unpark(
-			pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
+			pop_head( this.blocked_threads )
 		);
 	}
@@ -161,5 +161,5 @@
 	append( this.blocked_threads, kernelTLS.this_thread );
 	unlock( this.lock );
-	park( __cfaabi_dbg_ctx );
+	park();
 }
 
@@ -170,5 +170,5 @@
 	unlock(l);
 	unlock(this.lock);
-	park( __cfaabi_dbg_ctx );
+	park();
 	lock(l);
 }
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/preemption.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -274,5 +274,5 @@
 		kernelTLS.this_stats = this->curr_cluster->stats;
 	#endif
-	__unpark( id, this __cfaabi_dbg_ctx2 );
+	__unpark( id, this );
 }
 
@@ -411,4 +411,5 @@
 static void * alarm_loop( __attribute__((unused)) void * args ) {
 	__processor_id_t id;
+	id.full_proc = false;
 	id.id = doregister(&id);
 
Index: libcfa/src/concurrency/snzi.hfa
===================================================================
--- libcfa/src/concurrency/snzi.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/snzi.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -36,5 +36,5 @@
 static inline void depart( __snzi_node_t & );
 
-#define __snzi_half -1
+static const int __snzi_half = -1;
 
 //--------------------------------------------------
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/thread.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -39,4 +39,7 @@
 	link.prev = 0p;
 	link.preferred = -1;
+	#if defined( __CFA_WITH_VERIFY__ )
+		canary = 0x0D15EA5E0D15EA5E;
+	#endif
 
 	node.next = 0p;
@@ -48,4 +51,7 @@
 
 void ^?{}($thread& this) with( this ) {
+	#if defined( __CFA_WITH_VERIFY__ )
+		canary = 0xDEADDEADDEADDEAD;
+	#endif
 	unregister(curr_cluster, this);
 	^self_cor{};
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/concurrency/thread.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -88,13 +88,13 @@
 //----------
 // Park thread: block until corresponding call to unpark, won't block if unpark is already called
-void park( __cfaabi_dbg_ctx_param );
+void park( void );
 
 //----------
 // Unpark a thread, if the thread is already blocked, schedule it
 //                  if the thread is not yet block, signal that it should rerun immediately
-void unpark( $thread * this __cfaabi_dbg_ctx_param2 );
+void unpark( $thread * this );
 
 forall( dtype T | is_thread(T) )
-static inline void unpark( T & this __cfaabi_dbg_ctx_param2 ) { if(!&this) return; unpark( get_thread( this ) __cfaabi_dbg_ctx_fwd2 );}
+static inline void unpark( T & this ) { if(!&this) return; unpark( get_thread( this ) );}
 
 //----------
Index: libcfa/src/exception.h
===================================================================
--- libcfa/src/exception.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/exception.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -76,21 +76,21 @@
 // implemented in the .c file either so they all have to be inline.
 
-trait is_exception(dtype exceptT) {
+trait is_exception(dtype exceptT, dtype virtualT) {
 	/* The first field must be a pointer to a virtual table.
-	 * That virtual table must be a decendent of the base exception virtual tab$
+	 * That virtual table must be a decendent of the base exception virtual table.
 	 */
-	void mark_exception(exceptT *);
-	// This is never used and should be a no-op.
+	virtualT const & get_exception_vtable(exceptT *);
+	// Always returns the virtual table for this type (associated types hack).
 };
 
-trait is_termination_exception(dtype exceptT | is_exception(exceptT)) {
+trait is_termination_exception(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT)) {
 	void defaultTerminationHandler(exceptT &);
 };
 
-trait is_resumption_exception(dtype exceptT | is_exception(exceptT)) {
+trait is_resumption_exception(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT)) {
 	void defaultResumptionHandler(exceptT &);
 };
 
-forall(dtype exceptT | is_termination_exception(exceptT))
+forall(dtype exceptT, dtype virtualT | is_termination_exception(exceptT, virtualT))
 static inline void $throw(exceptT & except) {
 	__cfaehm_throw_terminate(
@@ -100,5 +100,5 @@
 }
 
-forall(dtype exceptT | is_resumption_exception(exceptT))
+forall(dtype exceptT, dtype virtualT | is_resumption_exception(exceptT, virtualT))
 static inline void $throwResume(exceptT & except) {
 	__cfaehm_throw_resume(
@@ -108,15 +108,15 @@
 }
 
-forall(dtype exceptT | is_exception(exceptT))
+forall(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT))
 static inline void cancel_stack(exceptT & except) __attribute__((noreturn)) {
 	__cfaehm_cancel_stack( (exception_t *)&except );
 }
 
-forall(dtype exceptT | is_exception(exceptT))
+forall(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT))
 static inline void defaultTerminationHandler(exceptT & except) {
 	return cancel_stack( except );
 }
 
-forall(dtype exceptT | is_exception(exceptT))
+forall(dtype exceptT, dtype virtualT | is_exception(exceptT, virtualT))
 static inline void defaultResumptionHandler(exceptT & except) {
 	throw except;
Index: libcfa/src/exception.hfa
===================================================================
--- libcfa/src/exception.hfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/exception.hfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -95,5 +95,5 @@
 // visible anywhere you use the instantiation of the exception is used.
 #define POLY_VTABLE_DECLARATION(exception_name, ...) \
-	void mark_exception(exception_name(__VA_ARGS__) *); \
+	VTABLE_TYPE(exception_name)(__VA_ARGS__) const & get_exception_vtable(exception_name(__VA_ARGS__) *); \
 	extern VTABLE_TYPE(exception_name)(__VA_ARGS__) VTABLE_NAME(exception_name)
 
@@ -125,4 +125,13 @@
 #define VTABLE_ASSERTION(exception_name, parameters) \
 	{ VTABLE_TYPE(exception_name) parameters VTABLE_NAME(exception_name); }
+
+// IS_EXCEPTION(exception_name [, (...parameters)])
+// IS_RESUMPTION_EXCEPTION(exception_name [, (parameters...)])
+// IS_TERMINATION_EXCEPTION(exception_name [, (parameters...)])
+// Create an assertion that exception_name, possibly with the qualifing parameters, is the given
+// kind of exception with the standard vtable with the same parameters if applicable.
+#define IS_EXCEPTION(...) _IS_EXCEPTION(is_exception, __VA_ARGS__, , ~)
+#define IS_RESUMPTION_EXCEPTION(...) _IS_EXCEPTION(is_resumption_exception, __VA_ARGS__, , ~)
+#define IS_TERMINATION_EXCEPTION(...) _IS_EXCEPTION(is_termination_exception, __VA_ARGS__, , ~)
 
 // All internal helper macros begin with an underscore.
@@ -160,10 +169,11 @@
 
 #define _FORALL_CTOR0_DECLARATION(exception_name, assertions, parameters) \
-	forall(_UNPACK assertions | VTABLE_ASSERTION(exception_name, parameters) ) \
+	forall(_UNPACK assertions | \
+		is_exception(exception_name parameters, VTABLE_TYPE(exception_name) parameters)) \
 	void ?{}(exception_name parameters & this)
 
 #define _FORALL_CTOR0_INSTANCE(exception_name, assertions, parameters) \
 	_FORALL_CTOR0_DECLARATION(exception_name, assertions, parameters) { \
-		VTABLE_INIT(this, exception_name); \
+		(this).virtual_table = &get_exception_vtable(&this); \
 	}
 
@@ -185,6 +195,6 @@
 #define _VTABLE_DECLARATION(exception_name, parent_name, ...) \
 	struct exception_name; \
-	void mark_exception(exception_name *); \
 	VTABLE_TYPE(exception_name); \
+	VTABLE_TYPE(exception_name) const & get_exception_vtable(exception_name *); \
 	extern VTABLE_TYPE(exception_name) VTABLE_NAME(exception_name); \
 	VTABLE_TYPE(exception_name) { \
@@ -197,5 +207,7 @@
 
 #define _VTABLE_INSTANCE(exception_name, parent_name, ...) \
-	void mark_exception(exception_name *) {} \
+	VTABLE_TYPE(exception_name) const & get_exception_vtable(exception_name *) { \
+		return VTABLE_NAME(exception_name); \
+	} \
 	void _GLUE2(exception_name,_copy)(exception_name * this, exception_name * other) { \
 		*this = *other; \
@@ -218,5 +230,9 @@
 
 #define _POLY_VTABLE_INSTANCE(exception_name, parent_name, ...) \
-	void mark_exception(exception_name(__VA_ARGS__) *) {} \
+	extern VTABLE_TYPE(exception_name)(__VA_ARGS__) VTABLE_NAME(exception_name); \
+	VTABLE_TYPE(exception_name)(__VA_ARGS__) const & get_exception_vtable( \
+			exception_name(__VA_ARGS__) *) { \
+		return VTABLE_NAME(exception_name); \
+	} \
 	void _GLUE2(exception_name,_copy)( \
 			exception_name(__VA_ARGS__) * this, exception_name(__VA_ARGS__) * other) { \
@@ -227,2 +243,5 @@
 		_GLUE2(exception_name,_copy), ^?{}, \
 		_CLOSE
+
+#define _IS_EXCEPTION(kind, exception_name, parameters, ...) \
+	kind(exception_name parameters, VTABLE_TYPE(exception_name) parameters)
Index: libcfa/src/parseargs.cfa
===================================================================
--- libcfa/src/parseargs.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ libcfa/src/parseargs.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -25,12 +25,18 @@
 #include "limits.hfa"
 
-extern int cfa_args_argc;
-extern char ** cfa_args_argv;
-extern char ** cfa_args_envp;
+extern int cfa_args_argc __attribute__((weak));
+extern char ** cfa_args_argv __attribute__((weak));
+extern char ** cfa_args_envp __attribute__((weak));
 
 static void usage(char * cmd, cfa_option options[], size_t opt_count, const char * usage, FILE * out)  __attribute__ ((noreturn));
 
 void parse_args( cfa_option options[], size_t opt_count, const char * usage, char ** & left ) {
-	parse_args(cfa_args_argc, cfa_args_argv, options, opt_count, usage, left );
+	if( 0p != &cfa_args_argc ) {
+		parse_args(cfa_args_argc, cfa_args_argv, options, opt_count, usage, left );
+	}
+	else {
+		char * temp = "";
+		parse_args(0, &temp, options, opt_count, usage, left );
+	}
 }
 
Index: src/AST/Convert.cpp
===================================================================
--- src/AST/Convert.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/Convert.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -177,9 +177,25 @@
 	const ast::DeclWithType * visit( const ast::FunctionDecl * node ) override final {
 		if ( inCache( node ) ) return nullptr;
+
+		// function decl contains real variables that the type must use.
+		// the structural change means function type in and out of decl
+		// must be handled **differently** on convert back to old.
+		auto ftype = new FunctionType(
+			cv(node->type),
+			(bool)node->type->isVarArgs
+		);
+		ftype->returnVals = get<DeclarationWithType>().acceptL(node->returns);
+		ftype->parameters = get<DeclarationWithType>().acceptL(node->params);
+
+		ftype->forall = get<TypeDecl>().acceptL( node->type->forall );
+
+		visitType(node->type, ftype);
+
 		auto decl = new FunctionDecl(
 			node->name,
 			Type::StorageClasses( node->storage.val ),
 			LinkageSpec::Spec( node->linkage.val ),
-			get<FunctionType>().accept1( node->type ),
+			ftype,
+			//get<FunctionType>().accept1( node->type ),
 			{},
 			get<Attribute>().acceptL( node->attributes ),
@@ -1152,10 +1168,28 @@
 
 	const ast::Type * visit( const ast::FunctionType * node ) override final {
+		static std::string dummy_paramvar_prefix = "__param_";
+		static std::string dummy_returnvar_prefix = "__retval_";
+
 		auto ty = new FunctionType {
 			cv( node ),
 			(bool)node->isVarArgs
 		};
-		ty->returnVals = get<DeclarationWithType>().acceptL( node->returns );
-		ty->parameters = get<DeclarationWithType>().acceptL( node->params );
+		auto returns = get<Type>().acceptL(node->returns);
+		auto params = get<Type>().acceptL(node->params);
+
+		int ret_index = 0;
+		for (auto t: returns) {
+			// xxx - LinkageSpec shouldn't matter but needs to be something
+			ObjectDecl * dummy = new ObjectDecl(dummy_returnvar_prefix + std::to_string(ret_index++), {}, LinkageSpec::C, nullptr, t, nullptr);
+			ty->returnVals.push_back(dummy);
+		}
+		int param_index = 0;
+		for (auto t: params) {
+			ObjectDecl * dummy = new ObjectDecl(dummy_paramvar_prefix + std::to_string(param_index++), {}, LinkageSpec::C, nullptr, t, nullptr);
+			ty->parameters.push_back(dummy);
+		}
+
+		// ty->returnVals = get<DeclarationWithType>().acceptL( node->returns );
+		// ty->parameters = get<DeclarationWithType>().acceptL( node->params );
 		ty->forall = get<TypeDecl>().acceptL( node->forall );
 		return visitType( node, ty );
@@ -1374,5 +1408,9 @@
 	ast::Node * node = nullptr;
 	/// cache of nodes that might be referenced by readonly<> for de-duplication
-	std::unordered_map< const BaseSyntaxNode *, ast::Node * > cache = {};
+	/// in case that some nodes are dropped by conversion (due to possible structural change)
+	/// use smart pointers in cache value to prevent accidental invalidation.
+	/// at conversion stage, all created nodes are guaranteed to be unique, therefore
+	/// const_casting out of smart pointers is permitted.
+	std::unordered_map< const BaseSyntaxNode *, ast::ptr<ast::Node> > cache = {};
 
 	// Local Utilities:
@@ -1447,5 +1485,5 @@
 		auto it = cache.find( old );
 		if ( it == cache.end() ) return false;
-		node = it->second;
+		node = const_cast<ast::Node *>(it->second.get());
 		return true;
 	}
@@ -1486,8 +1524,28 @@
 	virtual void visit( const FunctionDecl * old ) override final {
 		if ( inCache( old ) ) return;
+		auto paramVars = GET_ACCEPT_V(type->parameters, DeclWithType);
+		auto returnVars = GET_ACCEPT_V(type->returnVals, DeclWithType);
+		auto forall = GET_ACCEPT_V(type->forall, TypeDecl);
+
+		// function type is now derived from parameter decls instead of storing them
+		auto ftype = new ast::FunctionType((ast::ArgumentFlag)old->type->isVarArgs, cv(old->type));
+		ftype->params.reserve(paramVars.size());
+		ftype->returns.reserve(returnVars.size());
+
+		for (auto & v: paramVars) {
+			ftype->params.emplace_back(v->get_type());
+		}
+		for (auto & v: returnVars) {
+			ftype->returns.emplace_back(v->get_type());
+		}
+		ftype->forall = std::move(forall);
+		visitType(old->type, ftype);
+
 		auto decl = new ast::FunctionDecl{
 			old->location,
 			old->name,
-			GET_ACCEPT_1(type, FunctionType),
+			// GET_ACCEPT_1(type, FunctionType),
+			std::move(paramVars),
+			std::move(returnVars),
 			{},
 			{ old->storageClasses.val },
@@ -1496,5 +1554,8 @@
 			{ old->get_funcSpec().val }
 		};
+
+		decl->type = ftype;
 		cache.emplace( old, decl );
+
 		decl->withExprs = GET_ACCEPT_V(withExprs, Expr);
 		decl->stmts = GET_ACCEPT_1(statements, CompoundStmt);
@@ -2515,6 +2576,14 @@
 			cv( old )
 		};
-		ty->returns = GET_ACCEPT_V( returnVals, DeclWithType );
-		ty->params = GET_ACCEPT_V( parameters, DeclWithType );
+		auto returnVars = GET_ACCEPT_V(returnVals, DeclWithType);
+		auto paramVars = GET_ACCEPT_V(parameters, DeclWithType);
+		// ty->returns = GET_ACCEPT_V( returnVals, DeclWithType );
+		// ty->params = GET_ACCEPT_V( parameters, DeclWithType );
+		for (auto & v: returnVars) {
+			ty->returns.emplace_back(v->get_type());
+		}
+		for (auto & v: paramVars) {
+			ty->params.emplace_back(v->get_type());
+		}
 		ty->forall = GET_ACCEPT_V( forall, TypeDecl );
 		visitType( old, ty );
Index: src/AST/Decl.hpp
===================================================================
--- src/AST/Decl.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/Decl.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -124,12 +124,16 @@
 class FunctionDecl : public DeclWithType {
 public:
+	std::vector<ptr<DeclWithType>> params;
+	std::vector<ptr<DeclWithType>> returns;
+	// declared type, derived from parameter declarations
 	ptr<FunctionType> type;
 	ptr<CompoundStmt> stmts;
 	std::vector< ptr<Expr> > withExprs;
 
-	FunctionDecl( const CodeLocation & loc, const std::string & name, FunctionType * type,
+	FunctionDecl( const CodeLocation & loc, const std::string & name, 
+		std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,
 		CompoundStmt * stmts, Storage::Classes storage = {}, Linkage::Spec linkage = Linkage::C,
 		std::vector<ptr<Attribute>>&& attrs = {}, Function::Specs fs = {})
-	: DeclWithType( loc, name, storage, linkage, std::move(attrs), fs ), type( type ),
+	: DeclWithType( loc, name, storage, linkage, std::move(attrs), fs ), params(std::move(params)), returns(std::move(returns)),
 	  stmts( stmts ) {}
 
Index: src/AST/ForallSubstitutor.hpp
===================================================================
--- src/AST/ForallSubstitutor.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/ForallSubstitutor.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -33,4 +33,14 @@
 	}
 
+	template<typename node_t > 
+	std::vector<ptr<node_t>> operator() (const std::vector<ptr<node_t>> & o) {
+		std::vector<ptr<node_t>> n;
+		n.reserve(o.size());
+		for (const node_t * d : o) { n.emplace_back(d->accept(*visitor)); }
+		return n;
+	}
+	
+	/*
+
 	/// Substitute parameter/return type
 	std::vector< ptr< DeclWithType > > operator() ( const std::vector< ptr< DeclWithType > > & o ) {
@@ -48,4 +58,6 @@
 		return n;
 	}
+
+	*/
 };
 
Index: src/AST/Pass.impl.hpp
===================================================================
--- src/AST/Pass.impl.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/Pass.impl.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -465,4 +465,8 @@
 			__pass::symtab::addId( core, 0, func );
 			VISIT(
+				// parameter declarations are now directly here
+				maybe_accept( node, &FunctionDecl::params );
+				maybe_accept( node, &FunctionDecl::returns );
+				// foralls are still in function type
 				maybe_accept( node, &FunctionDecl::type );
 				// function body needs to have the same scope as parameters - CompoundStmt will not enter
Index: src/AST/SymbolTable.cpp
===================================================================
--- src/AST/SymbolTable.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/SymbolTable.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -335,4 +335,5 @@
 }
 
+/*
 void SymbolTable::addFunctionType( const FunctionType * ftype ) {
 	addTypes( ftype->forall );
@@ -340,4 +341,5 @@
 	addIds( ftype->params );
 }
+*/
 
 void SymbolTable::lazyInitScope() {
@@ -368,5 +370,5 @@
 		assert( ! params.empty() );
 		// use base type of pointer, so that qualifiers on the pointer type aren't considered.
-		const Type * base = InitTweak::getPointerBase( params.front()->get_type() );
+		const Type * base = InitTweak::getPointerBase( params.front() );
 		assert( base );
 		return Mangle::mangle( base );
Index: src/AST/SymbolTable.hpp
===================================================================
--- src/AST/SymbolTable.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/SymbolTable.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -145,5 +145,5 @@
 
 	/// convenience function for adding all of the declarations in a function type to the indexer
-	void addFunctionType( const FunctionType * ftype );
+	// void addFunctionType( const FunctionType * ftype );
 
 private:
Index: src/AST/Type.cpp
===================================================================
--- src/AST/Type.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/Type.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -102,4 +102,5 @@
 // --- FunctionType
 
+
 FunctionType::FunctionType( const FunctionType & o )
 : ParameterizedType( o.qualifiers, copy( o.attributes ) ), returns(), params(),
@@ -112,7 +113,7 @@
 
 namespace {
-	bool containsTtype( const std::vector<ptr<DeclWithType>> & l ) {
+	bool containsTtype( const std::vector<ptr<Type>> & l ) {
 		if ( ! l.empty() ) {
-			return Tuples::isTtype( l.back()->get_type() );
+			return Tuples::isTtype( l.back() );
 		}
 		return false;
Index: src/AST/Type.hpp
===================================================================
--- src/AST/Type.hpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/AST/Type.hpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -302,6 +302,9 @@
 class FunctionType final : public ParameterizedType {
 public:
-	std::vector<ptr<DeclWithType>> returns;
-	std::vector<ptr<DeclWithType>> params;
+//	std::vector<ptr<DeclWithType>> returns;
+//	std::vector<ptr<DeclWithType>> params;
+
+	std::vector<ptr<Type>> returns;
+	std::vector<ptr<Type>> params;
 
 	/// Does the function accept a variable number of arguments following the arguments specified
Index: src/Concurrency/Keywords.cc
===================================================================
--- src/Concurrency/Keywords.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/Concurrency/Keywords.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -66,5 +66,6 @@
 			bool needs_main, AggregateDecl::Aggregate cast_target ) :
 		  type_name( type_name ), field_name( field_name ), getter_name( getter_name ),
-		  context_error( context_error ), vtable_name( getVTableName( exception_name ) ),
+		  context_error( context_error ), exception_name( exception_name ),
+		  vtable_name( getVTableName( exception_name ) ),
 		  needs_main( needs_main ), cast_target( cast_target ) {}
 
@@ -89,4 +90,5 @@
 		const std::string getter_name;
 		const std::string context_error;
+		const std::string exception_name;
 		const std::string vtable_name;
 		bool needs_main;
@@ -95,4 +97,5 @@
 		StructDecl   * type_decl = nullptr;
 		FunctionDecl * dtor_decl = nullptr;
+		StructDecl * except_decl = nullptr;
 		StructDecl * vtable_decl = nullptr;
 	};
@@ -376,4 +379,7 @@
 		else if ( is_target(decl) ) {
 			handle( decl );
+		}
+		else if ( !except_decl && exception_name == decl->name && decl->body ) {
+			except_decl = decl;
 		}
 		else if ( !vtable_decl && vtable_name == decl->name && decl->body ) {
@@ -398,7 +404,11 @@
 			assert( struct_type );
 
-			declsToAddAfter.push_back( Virtual::makeVtableInstance( vtable_decl, {
-				new TypeExpr( struct_type->clone() ),
-			}, struct_type, nullptr ) );
+			std::list< Expression * > poly_args = { new TypeExpr( struct_type->clone() ) };
+			ObjectDecl * vtable_object = Virtual::makeVtableInstance(
+				vtable_decl->makeInst( poly_args ), struct_type, nullptr );
+			declsToAddAfter.push_back( vtable_object );
+			declsToAddAfter.push_back( Virtual::makeGetExceptionFunction(
+				vtable_object, except_decl->makeInst( std::move( poly_args ) )
+			) );
 		}
 
@@ -434,7 +444,13 @@
 	void ConcurrentSueKeyword::addVtableForward( StructDecl * decl ) {
 		if ( vtable_decl ) {
-			declsToAddBefore.push_back( Virtual::makeVtableForward( vtable_decl, {
+			std::list< Expression * > poly_args = {
 				new TypeExpr( new StructInstType( noQualifiers, decl ) ),
-			} ) );
+			};
+			declsToAddBefore.push_back( Virtual::makeGetExceptionForward(
+				vtable_decl->makeInst( poly_args ),
+				except_decl->makeInst( poly_args )
+			) );
+			declsToAddBefore.push_back( Virtual::makeVtableForward(
+				vtable_decl->makeInst( move( poly_args ) ) ) );
 		// Its only an error if we want a vtable and don't have one.
 		} else if ( ! vtable_name.empty() ) {
Index: src/GenPoly/InstantiateGeneric.cc
===================================================================
--- src/GenPoly/InstantiateGeneric.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/GenPoly/InstantiateGeneric.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -172,5 +172,5 @@
 		InstantiationMap< AggregateDecl, AggregateDecl > instantiations;
 		/// Set of types which are dtype-only generic (and therefore have static layout)
-		ScopedSet< AggregateDecl* > dtypeStatics;
+		std::set<AggregateDecl *> dtypeStatics;
 		/// Namer for concrete types
 		UniqueName typeNamer;
@@ -505,10 +505,8 @@
 	void GenericInstantiator::beginScope() {
 		instantiations.beginScope();
-		dtypeStatics.beginScope();
 	}
 
 	void GenericInstantiator::endScope() {
 		instantiations.endScope();
-		dtypeStatics.endScope();
 	}
 
Index: src/InitTweak/FixGlobalInit.cc
===================================================================
--- src/InitTweak/FixGlobalInit.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/InitTweak/FixGlobalInit.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -112,4 +112,20 @@
 			} // if
 			if ( Statement * ctor = ctorInit->ctor ) {
+				// Translation 1: Add this attribute on the global declaration:
+				//    __attribute__((section (".data#")))
+				// which makes gcc put the global in the data section,
+				// so that the global is writeable (via a const cast) in the init function.
+				// The trailing # is an injected assembly comment, to suppress the "a" in
+				//    .section .data,"a"
+				//    .section .data#,"a"
+				// to avoid assembler warning "ignoring changed section attributes for .data"
+				Type *strLitT = new PointerType( Type::Qualifiers( ),
+					new BasicType( Type::Qualifiers( ), BasicType::Char ) );
+				std::list< Expression * > attr_params;
+				attr_params.push_back( 
+					new ConstantExpr( Constant( strLitT, "\".data#\"", std::nullopt ) ) );
+				objDecl->attributes.push_back(new Attribute("section", attr_params));
+				// Translation 2: Move the initizliation off the global declaration,
+				// into the startup function.
 				initStatements.push_back( ctor );
 				objDecl->init = nullptr;
Index: src/InitTweak/InitTweak.cc
===================================================================
--- src/InitTweak/InitTweak.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/InitTweak/InitTweak.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1026,7 +1026,7 @@
 		if ( ftype->params.size() != 2 ) return false;
 
-		const ast::Type * t1 = getPointerBase( ftype->params.front()->get_type() );
+		const ast::Type * t1 = getPointerBase( ftype->params.front() );
 		if ( ! t1 ) return false;
-		const ast::Type * t2 = ftype->params.back()->get_type();
+		const ast::Type * t2 = ftype->params.back();
 
 		return ResolvExpr::typesCompatibleIgnoreQualifiers( t1, t2, ast::SymbolTable{} );
Index: src/Parser/DeclarationNode.cc
===================================================================
--- src/Parser/DeclarationNode.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/Parser/DeclarationNode.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Sat May 16 12:34:05 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jun  9 20:26:55 2020
-// Update Count     : 1134
+// Last Modified On : Thu Oct  8 08:03:38 2020
+// Update Count     : 1135
 //
 
@@ -1016,5 +1016,5 @@
 			if ( DeclarationWithType * dwt = dynamic_cast< DeclarationWithType * >( decl ) ) {
 				dwt->location = cur->location;
-				* out++ = dwt;
+				*out++ = dwt;
 			} else if ( StructDecl * agg = dynamic_cast< StructDecl * >( decl ) ) {
 				// e.g., int foo(struct S) {}
@@ -1022,5 +1022,5 @@
 				auto obj = new ObjectDecl( "", Type::StorageClasses(), linkage, nullptr, inst, nullptr );
 				obj->location = cur->location;
-				* out++ = obj;
+				*out++ = obj;
 				delete agg;
 			} else if ( UnionDecl * agg = dynamic_cast< UnionDecl * >( decl ) ) {
@@ -1029,5 +1029,5 @@
 				auto obj = new ObjectDecl( "", Type::StorageClasses(), linkage, nullptr, inst, nullptr );
 				obj->location = cur->location;
-				* out++ = obj;
+				*out++ = obj;
 			} else if ( EnumDecl * agg = dynamic_cast< EnumDecl * >( decl ) ) {
 				// e.g., int foo(enum E) {}
@@ -1035,5 +1035,5 @@
 				auto obj = new ObjectDecl( "", Type::StorageClasses(), linkage, nullptr, inst, nullptr );
 				obj->location = cur->location;
-				* out++ = obj;
+				*out++ = obj;
 			} // if
 		} catch( SemanticErrorException & e ) {
Index: src/Parser/lex.ll
===================================================================
--- src/Parser/lex.ll	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/Parser/lex.ll	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
  * Created On       : Sat Sep 22 08:58:10 2001
  * Last Modified By : Peter A. Buhr
- * Last Modified On : Sat Feb 15 11:05:50 2020
- * Update Count     : 737
+ * Last Modified On : Tue Oct  6 18:15:41 2020
+ * Update Count     : 743
  */
 
@@ -62,5 +62,5 @@
 #define IDENTIFIER_RETURN()	RETURN_VAL( typedefTable.isKind( yytext ) )
 
-#ifdef HAVE_KEYWORDS_FLOATXX								// GCC >= 7 => keyword, otherwise typedef
+#ifdef HAVE_KEYWORDS_FLOATXX							// GCC >= 7 => keyword, otherwise typedef
 #define FLOATXX(v) KEYWORD_RETURN(v);
 #else
@@ -292,5 +292,5 @@
 __restrict__	{ KEYWORD_RETURN(RESTRICT); }			// GCC
 return			{ KEYWORD_RETURN(RETURN); }
-	/* resume			{ KEYWORD_RETURN(RESUME); }				// CFA */
+ /* resume			{ KEYWORD_RETURN(RESUME); }				// CFA */
 short			{ KEYWORD_RETURN(SHORT); }
 signed			{ KEYWORD_RETURN(SIGNED); }
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/Parser/parser.yy	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu May 28 12:11:45 2020
-// Update Count     : 4500
+// Last Modified On : Fri Oct  9 18:09:09 2020
+// Update Count     : 4614
 //
 
@@ -204,8 +204,8 @@
 			return forCtrl( type, new string( identifier->name ), start, compop, comp, inc );
 		} else {
-			SemanticError( yylloc, "Expression disallowed. Only loop-index name allowed" ); return nullptr;
+			SemanticError( yylloc, "Expression disallowed. Only loop-index name allowed." ); return nullptr;
 		} // if
 	} else {
-		SemanticError( yylloc, "Expression disallowed. Only loop-index name allowed" ); return nullptr;
+		SemanticError( yylloc, "Expression disallowed. Only loop-index name allowed." ); return nullptr;
 	} // if
 } // forCtrl
@@ -278,6 +278,6 @@
 %token OTYPE FTYPE DTYPE TTYPE TRAIT					// CFA
 %token SIZEOF OFFSETOF
-// %token RESUME									// CFA
-%token SUSPEND									// CFA
+// %token RESUME											// CFA
+%token SUSPEND											// CFA
 %token ATTRIBUTE EXTENSION								// GCC
 %token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN
@@ -329,5 +329,5 @@
 %type<en> conditional_expression		constant_expression			assignment_expression		assignment_expression_opt
 %type<en> comma_expression				comma_expression_opt
-%type<en> argument_expression_list_opt		argument_expression			default_initialize_opt
+%type<en> argument_expression_list_opt	argument_expression			default_initialize_opt
 %type<ifctl> if_control_expression
 %type<fctl> for_control_expression		for_control_expression_list
@@ -370,5 +370,5 @@
 %type<decl> assertion assertion_list assertion_list_opt
 
-%type<en>   bit_subrange_size_opt bit_subrange_size
+%type<en> bit_subrange_size_opt bit_subrange_size
 
 %type<decl> basic_declaration_specifier basic_type_name basic_type_specifier direct_type indirect_type
@@ -793,5 +793,4 @@
 	| '(' aggregate_control '&' ')' cast_expression		// CFA
 		{ $$ = new ExpressionNode( build_keyword_cast( $2, $5 ) ); }
-		// VIRTUAL cannot be opt because of look ahead issues
 	| '(' VIRTUAL ')' cast_expression					// CFA
 		{ $$ = new ExpressionNode( new VirtualCastExpr( maybeMoveBuild< Expression >( $4 ), maybeMoveBuildType( nullptr ) ) ); }
@@ -920,9 +919,9 @@
 	| unary_expression assignment_operator assignment_expression
 		{
-			if ( $2 == OperKinds::AtAssn ) {
-				SemanticError( yylloc, "C @= assignment is currently unimplemented." ); $$ = nullptr;
-			} else {
+//			if ( $2 == OperKinds::AtAssn ) {
+//				SemanticError( yylloc, "C @= assignment is currently unimplemented." ); $$ = nullptr;
+//			} else {
 				$$ = new ExpressionNode( build_binary_val( $2, $1, $3 ) );
-			} // if
+//			} // if
 		}
 	| unary_expression '=' '{' initializer_list_opt comma_opt '}'
@@ -1676,38 +1675,14 @@
 
 typedef_expression:
-		// GCC, naming expression type: typedef name = exp; gives a name to the type of an expression
+		// deprecated GCC, naming expression type: typedef name = exp; gives a name to the type of an expression
 	TYPEDEF identifier '=' assignment_expression
 		{
-			// $$ = DeclarationNode::newName( 0 );			// unimplemented
-			SemanticError( yylloc, "Typedef expression is currently unimplemented." ); $$ = nullptr;
+			SemanticError( yylloc, "Typedef expression is deprecated, use typeof(...) instead." ); $$ = nullptr;
 		}
 	| typedef_expression pop ',' push identifier '=' assignment_expression
 		{
-			// $$ = DeclarationNode::newName( 0 );			// unimplemented
-			SemanticError( yylloc, "Typedef expression is currently unimplemented." ); $$ = nullptr;
-		}
-	;
-
-//c_declaration:
-//	declaring_list pop ';'
-//	| typedef_declaration pop ';'
-//	| typedef_expression pop ';'						// GCC, naming expression type
-//	| sue_declaration_specifier pop ';'
-//	;
-//
-//declaring_list:
-//		// A semantic check is required to ensure asm_name only appears on declarations with implicit or explicit static
-//		// storage-class
-//	 declarator asm_name_opt initializer_opt
-//		{
-//			typedefTable.addToEnclosingScope( IDENTIFIER );
-//			$$ = ( $2->addType( $1 ))->addAsmName( $3 )->addInitializer( $4 );
-//		}
-//	| declaring_list ',' attribute_list_opt declarator asm_name_opt initializer_opt
-//		{
-//			typedefTable.addToEnclosingScope( IDENTIFIER );
-//			$$ = $1->appendList( $1->cloneBaseType( $4->addAsmName( $5 )->addInitializer( $6 ) ) );
-//		}
-//	;
+			SemanticError( yylloc, "Typedef expression is deprecated, use typeof(...) instead." ); $$ = nullptr;
+		}
+	;
 
 c_declaration:
@@ -1715,5 +1690,5 @@
 		{ $$ = distAttr( $1, $2 ); }
 	| typedef_declaration
-	| typedef_expression								// GCC, naming expression type
+	| typedef_expression								// deprecated GCC, naming expression type
 	| sue_declaration_specifier
 	;
@@ -2094,5 +2069,6 @@
 		{ yyy = true; $$ = AggregateDecl::Union; }
 	| EXCEPTION											// CFA
-		{ yyy = true; $$ = AggregateDecl::Exception; }
+		// { yyy = true; $$ = AggregateDecl::Exception; }
+		{ SemanticError( yylloc, "exception aggregate is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
 	;
 
@@ -2436,5 +2412,5 @@
 // Overloading: function, data, and operator identifiers may be overloaded.
 //
-// Type declarations: "type" is used to generate new types for declaring objects. Similarly, "dtype" is used for object
+// Type declarations: "otype" is used to generate new types for declaring objects. Similarly, "dtype" is used for object
 //     and incomplete types, and "ftype" is used for function types. Type declarations with initializers provide
 //     definitions of new types. Type declarations with storage class "extern" provide opaque types.
@@ -2465,5 +2441,5 @@
 	type_class identifier_or_type_name
 		{ typedefTable.addToScope( *$2, TYPEDEFname, "9" ); }
-	  type_initializer_opt assertion_list_opt
+	type_initializer_opt assertion_list_opt
 		{ $$ = DeclarationNode::newTypeParam( $1, $2 )->addTypeInitializer( $4 )->addAssertions( $5 ); }
 	| type_specifier identifier_parameter_declarator
@@ -2492,5 +2468,5 @@
 	assertion
 	| assertion_list assertion
-		{ $$ = $1 ? $1->appendList( $2 ) : $2; }
+		{ $$ = $1->appendList( $2 ); }
 	;
 
Index: src/ResolvExpr/CandidateFinder.cpp
===================================================================
--- src/ResolvExpr/CandidateFinder.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/CandidateFinder.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -188,10 +188,10 @@
 
 			// mark conversion cost and also specialization cost of param type
-			const ast::Type * paramType = (*param)->get_type();
+			// const ast::Type * paramType = (*param)->get_type();
 			cand->expr = ast::mutate_field_index(
 				appExpr, &ast::ApplicationExpr::args, i,
 				computeExpressionConversionCost(
-					args[i], paramType, symtab, cand->env, convCost ) );
-			convCost.decSpec( specCost( paramType ) );
+					args[i], *param, symtab, cand->env, convCost ) );
+			convCost.decSpec( specCost( *param ) );
 			++param;  // can't be in for-loop update because of the continue
 		}
@@ -698,5 +698,5 @@
 			if ( targetType && ! targetType->isVoid() && ! funcType->returns.empty() ) {
 				// attempt to narrow based on expected target type
-				const ast::Type * returnType = funcType->returns.front()->get_type();
+				const ast::Type * returnType = funcType->returns.front();
 				if ( ! unify(
 					returnType, targetType, funcEnv, funcNeed, funcHave, funcOpen, symtab )
@@ -712,12 +712,28 @@
 			std::size_t genStart = 0;
 
-			for ( const ast::DeclWithType * param : funcType->params ) {
-				auto obj = strict_dynamic_cast< const ast::ObjectDecl * >( param );
+			// xxx - how to handle default arg after change to ftype representation?
+			if (const ast::VariableExpr * varExpr = func->expr.as<ast::VariableExpr>()) {
+				if (const ast::FunctionDecl * funcDecl = varExpr->var.as<ast::FunctionDecl>()) {
+					// function may have default args only if directly calling by name
+					// must use types on candidate however, due to RenameVars substitution
+					auto nParams = funcType->params.size();
+
+					for (size_t i=0; i<nParams; ++i) {
+						auto obj = funcDecl->params[i].strict_as<ast::ObjectDecl>();
+						if (!instantiateArgument(
+							funcType->params[i], obj->init, args, results, genStart, symtab)) return;
+					}
+					goto endMatch;
+				}
+			}
+			for ( const auto & param : funcType->params ) {
 				// Try adding the arguments corresponding to the current parameter to the existing
 				// matches
+				// no default args for indirect calls
 				if ( ! instantiateArgument(
-					obj->type, obj->init, args, results, genStart, symtab ) ) return;
-			}
-
+					param, nullptr, args, results, genStart, symtab ) ) return;
+			}
+
+			endMatch:
 			if ( funcType->isVarArgs ) {
 				// append any unused arguments to vararg pack
Index: src/ResolvExpr/CurrentObject.cc
===================================================================
--- src/ResolvExpr/CurrentObject.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/CurrentObject.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -594,5 +594,5 @@
 	class SimpleIterator final : public MemberIterator {
 		CodeLocation location;
-		readonly< Type > type = nullptr;
+		const Type * type = nullptr;
 	public:
 		SimpleIterator( const CodeLocation & loc, const Type * t ) : location( loc ), type( t ) {}
@@ -630,6 +630,6 @@
 	class ArrayIterator final : public MemberIterator {
 		CodeLocation location;
-		readonly< ArrayType > array = nullptr;
-		readonly< Type > base = nullptr;
+		const ArrayType * array = nullptr;
+		const Type * base = nullptr;
 		size_t index = 0;
 		size_t size = 0;
Index: src/ResolvExpr/ResolveAssertions.cc
===================================================================
--- src/ResolvExpr/ResolveAssertions.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/ResolveAssertions.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -277,5 +277,12 @@
 			const DeclarationWithType * candidate = cdata.id;
 
-			// build independent unification context for candidate
+			// ignore deleted candidates.
+			// NOTE: this behavior is different from main resolver.
+			// further investigations might be needed to determine
+			// if we should implement the same rule here
+			// (i.e. error if unique best match is deleted)
+			if (candidate->isDeleted) continue;
+
+			// build independent unification context. for candidate
 			AssertionSet have, newNeed;
 			TypeEnvironment newEnv{ resn.alt.env };
Index: src/ResolvExpr/Resolver.cc
===================================================================
--- src/ResolvExpr/Resolver.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/Resolver.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1223,5 +1223,5 @@
 		template<typename Iter>
 		inline bool nextMutex( Iter & it, const Iter & end ) {
-			while ( it != end && ! (*it)->get_type()->is_mutex() ) { ++it; }
+			while ( it != end && ! (*it)->is_mutex() ) { ++it; }
 			return it != end;
 		}
@@ -1638,8 +1638,8 @@
 								// Check if the argument matches the parameter type in the current
 								// scope
-								ast::ptr< ast::Type > paramType = (*param)->get_type();
+								// ast::ptr< ast::Type > paramType = (*param)->get_type();
 								if (
 									! unify(
-										arg->expr->result, paramType, resultEnv, need, have, open,
+										arg->expr->result, *param, resultEnv, need, have, open,
 										symtab )
 								) {
@@ -1648,5 +1648,5 @@
 									ss << "candidate function not viable: no known conversion "
 										"from '";
-									ast::print( ss, (*param)->get_type() );
+									ast::print( ss, *param );
 									ss << "' to '";
 									ast::print( ss, arg->expr->result );
Index: src/ResolvExpr/SatisfyAssertions.cpp
===================================================================
--- src/ResolvExpr/SatisfyAssertions.cpp	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/SatisfyAssertions.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -170,4 +170,11 @@
 			const ast::DeclWithType * candidate = cdata.id;
 
+			// ignore deleted candidates.
+			// NOTE: this behavior is different from main resolver.
+			// further investigations might be needed to determine
+			// if we should implement the same rule here
+			// (i.e. error if unique best match is deleted)
+			if (candidate->isDeleted) continue;
+
 			// build independent unification context for candidate
 			ast::AssertionSet have, newNeed;
@@ -318,6 +325,6 @@
 					if ( ! func ) continue;
 
-					for ( const ast::DeclWithType * param : func->params ) {
-						cost.decSpec( specCost( param->get_type() ) );
+					for ( const auto & param : func->params ) {
+						cost.decSpec( specCost( param ) );
 					}
 
Index: src/ResolvExpr/SpecCost.cc
===================================================================
--- src/ResolvExpr/SpecCost.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/SpecCost.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -178,6 +178,6 @@
 		void previsit( const ast::FunctionType * fty ) {
 			int minCount = std::numeric_limits<int>::max();
-			updateMinimumPresent( minCount, fty->params, decl_type );
-			updateMinimumPresent( minCount, fty->returns, decl_type );
+			updateMinimumPresent( minCount, fty->params, type_deref );
+			updateMinimumPresent( minCount, fty->returns, type_deref );
 			// Add another level to minCount if set.
 			count = toNoneOrInc( minCount );
Index: src/ResolvExpr/Unify.cc
===================================================================
--- src/ResolvExpr/Unify.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/ResolvExpr/Unify.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -395,5 +395,5 @@
 
 	template< typename Iterator1, typename Iterator2 >
-	bool unifyDeclList( Iterator1 list1Begin, Iterator1 list1End, Iterator2 list2Begin, Iterator2 list2End, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, const OpenVarSet &openVars, const SymTab::Indexer &indexer ) {
+	bool unifyTypeList( Iterator1 list1Begin, Iterator1 list1End, Iterator2 list2Begin, Iterator2 list2End, TypeEnvironment &env, AssertionSet &needAssertions, AssertionSet &haveAssertions, const OpenVarSet &openVars, const SymTab::Indexer &indexer ) {
 		auto get_type = [](DeclarationWithType * dwt){ return dwt->get_type(); };
 		for ( ; list1Begin != list1End && list2Begin != list2End; ++list1Begin, ++list2Begin ) {
@@ -489,6 +489,6 @@
 					|| flatOther->isTtype()
 			) {
-				if ( unifyDeclList( flatFunc->parameters.begin(), flatFunc->parameters.end(), flatOther->parameters.begin(), flatOther->parameters.end(), env, needAssertions, haveAssertions, openVars, indexer ) ) {
-					if ( unifyDeclList( flatFunc->returnVals.begin(), flatFunc->returnVals.end(), flatOther->returnVals.begin(), flatOther->returnVals.end(), env, needAssertions, haveAssertions, openVars, indexer ) ) {
+				if ( unifyTypeList( flatFunc->parameters.begin(), flatFunc->parameters.end(), flatOther->parameters.begin(), flatOther->parameters.end(), env, needAssertions, haveAssertions, openVars, indexer ) ) {
+					if ( unifyTypeList( flatFunc->returnVals.begin(), flatFunc->returnVals.end(), flatOther->returnVals.begin(), flatOther->returnVals.end(), env, needAssertions, haveAssertions, openVars, indexer ) ) {
 
 						// the original types must be used in mark assertions, since pointer comparisons are used
@@ -784,15 +784,15 @@
 
 		/// returns flattened version of `src`
-		static std::vector< ast::ptr< ast::DeclWithType > > flattenList(
-			const std::vector< ast::ptr< ast::DeclWithType > > & src, ast::TypeEnvironment & env
+		static std::vector< ast::ptr< ast::Type > > flattenList(
+			const std::vector< ast::ptr< ast::Type > > & src, ast::TypeEnvironment & env
 		) {
-			std::vector< ast::ptr< ast::DeclWithType > > dst;
+			std::vector< ast::ptr< ast::Type > > dst;
 			dst.reserve( src.size() );
-			for ( const ast::DeclWithType * d : src ) {
+			for ( const auto & d : src ) {
 				ast::Pass<TtypeExpander_new> expander{ env };
 				// TtypeExpander pass is impure (may mutate nodes in place)
 				// need to make nodes shared to prevent accidental mutation
-				ast::ptr<ast::DeclWithType> dc = d->accept(expander);
-				auto types = flatten( dc->get_type() );
+				ast::ptr<ast::Type> dc = d->accept(expander);
+				auto types = flatten( dc );
 				for ( ast::ptr< ast::Type > & t : types ) {
 					// outermost const, volatile, _Atomic qualifiers in parameters should not play
@@ -803,5 +803,5 @@
 					// requirements than a non-mutex function
 					remove_qualifiers( t, ast::CV::Const | ast::CV::Volatile | ast::CV::Atomic );
-					dst.emplace_back( new ast::ObjectDecl{ dc->location, "", t } );
+					dst.emplace_back( t );
 				}
 			}
@@ -811,10 +811,10 @@
 		/// Creates a tuple type based on a list of DeclWithType
 		template< typename Iter >
-		static ast::ptr< ast::Type > tupleFromDecls( Iter crnt, Iter end ) {
+		static ast::ptr< ast::Type > tupleFromTypes( Iter crnt, Iter end ) {
 			std::vector< ast::ptr< ast::Type > > types;
 			while ( crnt != end ) {
 				// it is guaranteed that a ttype variable will be bound to a flat tuple, so ensure
 				// that this results in a flat tuple
-				flatten( (*crnt)->get_type(), types );
+				flatten( *crnt, types );
 
 				++crnt;
@@ -825,5 +825,5 @@
 
 		template< typename Iter >
-		static bool unifyDeclList(
+		static bool unifyTypeList(
 			Iter crnt1, Iter end1, Iter crnt2, Iter end2, ast::TypeEnvironment & env,
 			ast::AssertionSet & need, ast::AssertionSet & have, const ast::OpenVarSet & open,
@@ -831,6 +831,6 @@
 		) {
 			while ( crnt1 != end1 && crnt2 != end2 ) {
-				const ast::Type * t1 = (*crnt1)->get_type();
-				const ast::Type * t2 = (*crnt2)->get_type();
+				const ast::Type * t1 = *crnt1;
+				const ast::Type * t2 = *crnt2;
 				bool isTuple1 = Tuples::isTtype( t1 );
 				bool isTuple2 = Tuples::isTtype( t2 );
@@ -840,10 +840,10 @@
 					// combine remainder of list2, then unify
 					return unifyExact(
-						t1, tupleFromDecls( crnt2, end2 ), env, need, have, open,
+						t1, tupleFromTypes( crnt2, end2 ), env, need, have, open,
 						noWiden(), symtab );
 				} else if ( ! isTuple1 && isTuple2 ) {
 					// combine remainder of list1, then unify
 					return unifyExact(
-						tupleFromDecls( crnt1, end1 ), t2, env, need, have, open,
+						tupleFromTypes( crnt1, end1 ), t2, env, need, have, open,
 						noWiden(), symtab );
 				}
@@ -860,15 +860,15 @@
 			if ( crnt1 != end1 ) {
 				// try unifying empty tuple with ttype
-				const ast::Type * t1 = (*crnt1)->get_type();
+				const ast::Type * t1 = *crnt1;
 				if ( ! Tuples::isTtype( t1 ) ) return false;
 				return unifyExact(
-					t1, tupleFromDecls( crnt2, end2 ), env, need, have, open,
+					t1, tupleFromTypes( crnt2, end2 ), env, need, have, open,
 					noWiden(), symtab );
 			} else if ( crnt2 != end2 ) {
 				// try unifying empty tuple with ttype
-				const ast::Type * t2 = (*crnt2)->get_type();
+				const ast::Type * t2 = *crnt2;
 				if ( ! Tuples::isTtype( t2 ) ) return false;
 				return unifyExact(
-					tupleFromDecls( crnt1, end1 ), t2, env, need, have, open,
+					tupleFromTypes( crnt1, end1 ), t2, env, need, have, open,
 					noWiden(), symtab );
 			}
@@ -877,11 +877,11 @@
 		}
 
-		static bool unifyDeclList(
-			const std::vector< ast::ptr< ast::DeclWithType > > & list1,
-			const std::vector< ast::ptr< ast::DeclWithType > > & list2,
+		static bool unifyTypeList(
+			const std::vector< ast::ptr< ast::Type > > & list1,
+			const std::vector< ast::ptr< ast::Type > > & list2,
 			ast::TypeEnvironment & env, ast::AssertionSet & need, ast::AssertionSet & have,
 			const ast::OpenVarSet & open, const ast::SymbolTable & symtab
 		) {
-			return unifyDeclList(
+			return unifyTypeList(
 				list1.begin(), list1.end(), list2.begin(), list2.end(), env, need, have, open,
 				symtab );
@@ -928,6 +928,6 @@
 			) return;
 
-			if ( ! unifyDeclList( params, params2, tenv, need, have, open, symtab ) ) return;
-			if ( ! unifyDeclList(
+			if ( ! unifyTypeList( params, params2, tenv, need, have, open, symtab ) ) return;
+			if ( ! unifyTypeList(
 				func->returns, func2->returns, tenv, need, have, open, symtab ) ) return;
 
@@ -1232,9 +1232,9 @@
 	ast::ptr<ast::Type> extractResultType( const ast::FunctionType * func ) {
 		if ( func->returns.empty() ) return new ast::VoidType{};
-		if ( func->returns.size() == 1 ) return func->returns[0]->get_type();
+		if ( func->returns.size() == 1 ) return func->returns[0];
 
 		std::vector<ast::ptr<ast::Type>> tys;
-		for ( const ast::DeclWithType * decl : func->returns ) {
-			tys.emplace_back( decl->get_type() );
+		for ( const auto & decl : func->returns ) {
+			tys.emplace_back( decl );
 		}
 		return new ast::TupleType{ std::move(tys) };
Index: src/SymTab/Autogen.cc
===================================================================
--- src/SymTab/Autogen.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/SymTab/Autogen.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -339,5 +339,10 @@
 		} catch ( SemanticErrorException & ) {
 			// okay if decl does not resolve - that means the function should not be generated
-			delete dcl;
+			// delete dcl;
+			delete dcl->statements;
+			dcl->statements = nullptr;
+			dcl->isDeleted = true;
+			definitions.push_back( dcl );
+			indexer.addId( dcl );
 		}
 	}
Index: src/SymTab/Mangler.cc
===================================================================
--- src/SymTab/Mangler.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/SymTab/Mangler.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -551,10 +551,8 @@
 			GuardValue( inFunctionType );
 			inFunctionType = true;
-			std::vector< ast::ptr< ast::Type > > returnTypes = getTypes( functionType->returns );
-			if (returnTypes.empty()) mangleName << Encoding::void_t;
-			else accept_each( returnTypes, *visitor );
+			if (functionType->returns.empty()) mangleName << Encoding::void_t;
+			else accept_each( functionType->returns, *visitor );
 			mangleName << "_";
-			std::vector< ast::ptr< ast::Type > > paramTypes = getTypes( functionType->params );
-			accept_each( paramTypes, *visitor );
+			accept_each( functionType->params, *visitor );
 			mangleName << "_";
 		}
Index: src/SymTab/Validate.cc
===================================================================
--- src/SymTab/Validate.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/SymTab/Validate.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1384,4 +1384,5 @@
 	/// Replaces enum types by int, and function/array types in function parameter and return
 	/// lists by appropriate pointers
+	/*
 	struct EnumAndPointerDecay_new {
 		const ast::EnumDecl * previsit( const ast::EnumDecl * enumDecl ) {
@@ -1434,4 +1435,5 @@
 		}
 	};
+	*/
 
 	/// expand assertions from a trait instance, performing appropriate type variable substitutions
@@ -1837,9 +1839,9 @@
 const ast::Type * validateType(
 		const CodeLocation & loc, const ast::Type * type, const ast::SymbolTable & symtab ) {
-	ast::Pass< EnumAndPointerDecay_new > epc;
+	// ast::Pass< EnumAndPointerDecay_new > epc;
 	ast::Pass< LinkReferenceToTypes_new > lrt{ loc, symtab };
 	ast::Pass< ForallPointerDecay_new > fpd{ loc };
 
-	return type->accept( epc )->accept( lrt )->accept( fpd );
+	return type->accept( lrt )->accept( fpd );
 }
 
Index: src/SynTree/AggregateDecl.cc
===================================================================
--- src/SynTree/AggregateDecl.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/SynTree/AggregateDecl.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -21,4 +21,5 @@
 #include "Common/utility.h"      // for printAll, cloneAll, deleteAll
 #include "Declaration.h"         // for AggregateDecl, TypeDecl, Declaration
+#include "Expression.h"
 #include "Initializer.h"
 #include "LinkageSpec.h"         // for Spec, linkageName, Cforall
@@ -88,4 +89,17 @@
 const char * StructDecl::typeString() const { return aggrString( kind ); }
 
+StructInstType * StructDecl::makeInst( std::list< Expression * > const & new_parameters ) {
+	std::list< Expression * > copy_parameters;
+	cloneAll( new_parameters, copy_parameters );
+	return makeInst( move( copy( copy_parameters ) ) );
+}
+
+StructInstType * StructDecl::makeInst( std::list< Expression * > && new_parameters ) {
+	assert( parameters.size() == new_parameters.size() );
+	StructInstType * type = new StructInstType( noQualifiers, this );
+	type->parameters = std::move( new_parameters );
+	return type;
+}
+
 const char * UnionDecl::typeString() const { return aggrString( Union ); }
 
Index: src/SynTree/Declaration.h
===================================================================
--- src/SynTree/Declaration.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/SynTree/Declaration.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -306,4 +306,8 @@
 	bool is_thread   () { return kind == Thread   ; }
 
+	// Make a type instance of this declaration.
+	StructInstType * makeInst( std::list< Expression * > const & parameters );
+	StructInstType * makeInst( std::list< Expression * > && parameters );
+
 	virtual StructDecl * clone() const override { return new StructDecl( *this ); }
 	virtual void accept( Visitor & v ) override { v.visit( this ); }
Index: src/SynTree/TypeDecl.cc
===================================================================
--- src/SynTree/TypeDecl.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/SynTree/TypeDecl.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Dec 13 15:26:14 2019
-// Update Count     : 21
+// Last Modified On : Thu Oct  8 18:18:55 2020
+// Update Count     : 22
 //
 
@@ -21,5 +21,6 @@
 #include "Type.h"            // for Type, Type::StorageClasses
 
-TypeDecl::TypeDecl( const std::string & name, Type::StorageClasses scs, Type * type, Kind kind, bool sized, Type * init ) : Parent( name, scs, type ), kind( kind ), sized( kind == Ttype || sized ), init( init ) {
+TypeDecl::TypeDecl( const std::string & name, Type::StorageClasses scs, Type * type, Kind kind, bool sized, Type * init ) :
+	Parent( name, scs, type ), kind( kind ), sized( kind == Ttype || sized ), init( init ) {
 }
 
Index: src/Virtual/Tables.cc
===================================================================
--- src/Virtual/Tables.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/Virtual/Tables.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -14,6 +14,8 @@
 //
 
+#include <SynTree/Attribute.h>
 #include <SynTree/Declaration.h>
 #include <SynTree/Expression.h>
+#include <SynTree/Statement.h>
 #include <SynTree/Type.h>
 
@@ -38,14 +40,4 @@
 }
 
-// Fuse base polymorphic declaration and forall arguments into a new type.
-static StructInstType * vtableInstType(
-		StructDecl * polyDecl, std::list< Expression * > && parameters ) {
-	assert( parameters.size() == polyDecl->parameters.size() );
-	StructInstType * type = new StructInstType(
-			Type::Qualifiers( /* Type::Const */ ), polyDecl );
-	type->parameters = std::move( parameters );
-	return type;
-}
-
 static ObjectDecl * makeVtableDeclaration(
 		StructInstType * type, Initializer * init ) {
@@ -66,14 +58,12 @@
 
 ObjectDecl * makeVtableForward( StructInstType * type ) {
+	assert( type );
 	return makeVtableDeclaration( type, nullptr );
 }
 
-ObjectDecl * makeVtableForward(
-		StructDecl * polyDecl, std::list< Expression * > && parameters ) {
-	return makeVtableForward( vtableInstType( polyDecl, std::move( parameters ) ) );
-}
-
 ObjectDecl * makeVtableInstance(
-		StructInstType * vtableType, Type * vobject_type, Initializer * init ) {
+		StructInstType * vtableType, Type * objectType, Initializer * init ) {
+	assert( vtableType );
+	assert( objectType );
 	StructDecl * vtableStruct = vtableType->baseStruct;
 	// Build the initialization
@@ -92,7 +82,7 @@
 						new SingleInit( new AddressExpr( new NameExpr( parentInstance ) ) ) );
 			} else if ( std::string( "size" ) == field->name ) {
-				inits.push_back( new SingleInit( new SizeofExpr( vobject_type->clone() ) ) );
+				inits.push_back( new SingleInit( new SizeofExpr( objectType->clone() ) ) );
 			} else if ( std::string( "align" ) == field->name ) {
-				inits.push_back( new SingleInit( new AlignofExpr( vobject_type->clone() ) ) );
+				inits.push_back( new SingleInit( new AlignofExpr( objectType->clone() ) ) );
 			} else {
 				inits.push_back( new SingleInit( new NameExpr( field->name ) ) );
@@ -108,9 +98,51 @@
 }
 
-ObjectDecl * makeVtableInstance(
-		StructDecl * polyDecl, std::list< Expression * > && parameters,
-		Type * vobject, Initializer * init ) {
-	return makeVtableInstance(
-		vtableInstType( polyDecl, std::move( parameters ) ), vobject, init );
+namespace {
+	std::string const functionName = "get_exception_vtable";
+}
+
+FunctionDecl * makeGetExceptionForward(
+		Type * vtableType, Type * exceptType ) {
+	assert( vtableType );
+	assert( exceptType );
+	FunctionType * type = new FunctionType( noQualifiers, false );
+	vtableType->tq.is_const = true;
+	type->returnVals.push_back( new ObjectDecl(
+		"_retvalue",
+		noStorageClasses,
+		LinkageSpec::Cforall,
+		nullptr,
+		new ReferenceType( noQualifiers, vtableType ),
+		nullptr,
+        { new Attribute("unused") }
+	) );
+	type->parameters.push_back( new ObjectDecl(
+		"__unused",
+		noStorageClasses,
+		LinkageSpec::Cforall,
+		nullptr,
+		new PointerType( noQualifiers, exceptType ),
+		nullptr,
+		{ new Attribute("unused") }
+	) );
+	return new FunctionDecl(
+		functionName,
+		noStorageClasses,
+		LinkageSpec::Cforall,
+		type,
+		nullptr
+	);
+}
+
+FunctionDecl * makeGetExceptionFunction(
+		ObjectDecl * vtableInstance, Type * exceptType ) {
+	assert( vtableInstance );
+	assert( exceptType );
+	FunctionDecl * func = makeGetExceptionForward(
+		vtableInstance->type->clone(), exceptType );
+	func->statements = new CompoundStmt( {
+		new ReturnStmt( new VariableExpr( vtableInstance ) ),
+	} );
+	return func;
 }
 
Index: src/Virtual/Tables.h
===================================================================
--- src/Virtual/Tables.h	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/Virtual/Tables.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -27,25 +27,25 @@
 bool isVTableInstanceName( std::string const & name );
 
-/// Converts exceptions into regular structures.
-//void ( std::list< Declaration * > & translationUnit );
-
-ObjectDecl * makeVtableForward( StructInstType * );
-ObjectDecl * makeVtableForward( StructDecl *, std::list< Expression * > && );
-/* Create a forward definition of a vtable of the given type.
- *
- * Instead of the virtual table type you may provide the declaration and all
- * the forall parameters.
+ObjectDecl * makeVtableForward( StructInstType * vtableType );
+/* Create a forward declaration of a vtable of the given type.
+ * vtableType node is consumed.
  */
 
-ObjectDecl * makeVtableInstance( StructInstType *, Type *, Initializer * );
-ObjectDecl * makeVtableInstance(
-	StructDecl *, std::list< Expression * > &&, Type *, Initializer * );
+ObjectDecl * makeVtableInstance( StructInstType * vtableType, Type * objectType,
+	Initializer * init = nullptr );
 /* Create an initialized definition of a vtable.
- *
- * The parameters are the virtual table type (or the base declaration and the
- * forall parameters), the object type and optionally an initializer.
- *
- * Instead of the virtual table type you may provide the declaration and all
- * the forall parameters.
+ * vtableType and init (if provided) nodes are consumed.
+ */
+
+// Some special code for how exceptions interact with virtual tables.
+FunctionDecl * makeGetExceptionForward( Type * vtableType, Type * exceptType );
+/* Create a forward declaration of the exception virtual function
+ * linking the vtableType to the exceptType. Both nodes are consumed.
+ */
+
+FunctionDecl * makeGetExceptionFunction(
+	ObjectDecl * vtableInstance, Type * exceptType );
+/* Create the definition of the exception virtual function.
+ * exceptType node is consumed.
  */
 
Index: src/main.cc
===================================================================
--- src/main.cc	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ src/main.cc	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -9,7 +9,7 @@
 // Author           : Peter Buhr and Rob Schluntz
 // Created On       : Fri May 15 23:12:02 2015
-// Last Modified By : Andrew Beach
-// Last Modified On : Tue May 19 12:03:00 2020
-// Update Count     : 634
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Oct  8 18:17:46 2020
+// Update Count     : 637
 //
 
@@ -451,5 +451,5 @@
 
 
-static const char optstring[] = ":c:ghlLmNnpdP:S:twW:D:";
+static const char optstring[] = ":c:ghlLmNnpdOAP:S:twW:D:";
 
 enum { PreludeDir = 128 };
@@ -478,23 +478,23 @@
 
 static const char * description[] = {
-	"diagnostic color: never, always, or auto.",          // -c
-	"wait for gdb to attach",                             // -g
-	"print help message",                                 // -h
-	"generate libcfa.c",                                  // -l
-	"generate line marks",                                // -L
-	"do not replace main",                                // -m
-	"do not generate line marks",                         // -N
-	"do not read prelude",                                // -n
+	"diagnostic color: never, always, or auto.",		// -c
+	"wait for gdb to attach",							// -g
+	"print help message",								// -h
+	"generate libcfa.c",								// -l
+	"generate line marks",								// -L
+	"do not replace main",								// -m
+	"do not generate line marks",						// -N
+	"do not read prelude",								// -n
 	"generate prototypes for prelude functions",		// -p
-	"don't print output that isn't deterministic",        // -d
-	"Use the old-ast",                                    // -O
-	"Use the new-ast",                                    // -A
-	"print",                                              // -P
+	"only print deterministic output",                  // -d
+	"Use the old-ast",									// -O
+	"Use the new-ast",									// -A
+	"print",											// -P
 	"<directory> prelude directory for debug/nodebug",	// no flag
 	"<option-list> enable profiling information:\n          counters,heap,time,all,none", // -S
-	"building cfa standard lib",                          // -t
-	"",                                                   // -w
-	"",                                                   // -W
-	"",                                                   // -D
+	"building cfa standard lib",						// -t
+	"",													// -w
+	"",													// -W
+	"",													// -D
 }; // description
 
Index: tests/.expect/array.txt
===================================================================
--- tests/.expect/array.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/array.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+array.cfa: In function '_X4mainFi___1':
+array.cfa:55:9: note: #pragma message: Compiled
Index: tests/.expect/cast.txt
===================================================================
--- tests/.expect/cast.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/cast.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+cast.cfa: In function '_X4mainFi_iPPKc__1':
+cast.cfa:18:9: note: #pragma message: Compiled
Index: tests/.expect/const-init.txt
===================================================================
--- tests/.expect/const-init.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/.expect/const-init.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/enum.txt
===================================================================
--- tests/.expect/enum.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/enum.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/expression.txt
===================================================================
--- tests/.expect/expression.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/expression.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+expression.cfa: In function '_X4mainFi___1':
+expression.cfa:89:9: note: #pragma message: Compiled
Index: tests/.expect/forall.txt
===================================================================
--- tests/.expect/forall.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/forall.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+forall.cfa: In function '_X4mainFi___1':
+forall.cfa:218:9: note: #pragma message: Compiled
Index: tests/.expect/heap.txt
===================================================================
--- tests/.expect/heap.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/heap.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/identFuncDeclarator.txt
===================================================================
--- tests/.expect/identFuncDeclarator.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/identFuncDeclarator.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+identFuncDeclarator.cfa: In function '_X4mainFi___1':
+identFuncDeclarator.cfa:116:9: note: #pragma message: Compiled
Index: tests/.expect/identParamDeclarator.txt
===================================================================
--- tests/.expect/identParamDeclarator.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/identParamDeclarator.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/init1-ERROR.txt
===================================================================
--- tests/.expect/init1-ERROR.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/.expect/init1-ERROR.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,47 @@
+error: No reasonable alternatives for expression Untyped Init Expression
+  Name: rx  InitAlternative: reference to signed int
+error: No reasonable alternatives for expression Untyped Init Expression
+  Name: px  InitAlternative: pointer to signed int
+error: No reasonable alternatives for expression Untyped Init Expression
+  Name: crx  InitAlternative: reference to float
+error: No reasonable alternatives for expression Untyped Init Expression
+  Name: cpx  InitAlternative: pointer to float
+init1.cfa:104:1 error: No reasonable alternatives for expression Generated Cast of:
+  Name: rx
+... to:
+  reference to signed int
+init1.cfa:107:1 error: No reasonable alternatives for expression Applying untyped:
+  Name: ?{}
+...to:
+  Generated Cast of:
+    Variable Expression: _retval_f_py: pointer to signed int
+  ... to:
+    reference to pointer to signed int
+  Name: px
+
+init1.cfa:114:1 error: No reasonable alternatives for expression Generated Cast of:
+  Name: crx
+... to:
+  reference to float
+init1.cfa:117:1 error: No reasonable alternatives for expression Applying untyped:
+  Name: ?{}
+...to:
+  Generated Cast of:
+    Variable Expression: _retval_f_py2: pointer to float
+  ... to:
+    reference to pointer to float
+  Name: cpx
+
+init1.cfa:124:1 error: No reasonable alternatives for expression Generated Cast of:
+  Name: s
+... to:
+  reference to instance of type T (not function type)
+init1.cfa:128:1 error: No reasonable alternatives for expression Applying untyped:
+  Name: ?{}
+...to:
+  Generated Cast of:
+    Variable Expression: _retval_anycvt: pointer to instance of type T (not function type)
+  ... to:
+    reference to pointer to instance of type T (not function type)
+  Name: s
+
Index: tests/.expect/init1.txt
===================================================================
--- tests/.expect/init1.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/init1.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1,47 +1,2 @@
-error: No reasonable alternatives for expression Untyped Init Expression
-  Name: rx  InitAlternative: reference to signed int
-error: No reasonable alternatives for expression Untyped Init Expression
-  Name: px  InitAlternative: pointer to signed int
-error: No reasonable alternatives for expression Untyped Init Expression
-  Name: crx  InitAlternative: reference to float
-error: No reasonable alternatives for expression Untyped Init Expression
-  Name: cpx  InitAlternative: pointer to float
-init1.cfa:94:1 error: No reasonable alternatives for expression Generated Cast of:
-  Name: rx
-... to:
-  reference to signed int
-init1.cfa:97:1 error: No reasonable alternatives for expression Applying untyped:
-  Name: ?{}
-...to:
-  Generated Cast of:
-    Variable Expression: _retval_f_py: pointer to signed int
-  ... to:
-    reference to pointer to signed int
-  Name: px
-
-init1.cfa:104:1 error: No reasonable alternatives for expression Generated Cast of:
-  Name: crx
-... to:
-  reference to float
-init1.cfa:107:1 error: No reasonable alternatives for expression Applying untyped:
-  Name: ?{}
-...to:
-  Generated Cast of:
-    Variable Expression: _retval_f_py2: pointer to float
-  ... to:
-    reference to pointer to float
-  Name: cpx
-
-init1.cfa:114:1 error: No reasonable alternatives for expression Generated Cast of:
-  Name: s
-... to:
-  reference to instance of type T (not function type)
-init1.cfa:118:1 error: No reasonable alternatives for expression Applying untyped:
-  Name: ?{}
-...to:
-  Generated Cast of:
-    Variable Expression: _retval_anycvt: pointer to instance of type T (not function type)
-  ... to:
-    reference to pointer to instance of type T (not function type)
-  Name: s
-
+init1.cfa: In function '_X4mainFi___1':
+init1.cfa:136:9: note: #pragma message: Compiled
Index: tests/.expect/labelledExit.txt
===================================================================
--- tests/.expect/labelledExit.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/labelledExit.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+labelledExit.cfa: In function '_X4mainFi_iPPKc__1':
+labelledExit.cfa:183:9: note: #pragma message: Compiled
Index: tests/.expect/limits.txt
===================================================================
--- tests/.expect/limits.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/limits.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+limits.cfa: In function '_X4mainFi_iPPKc__1':
+limits.cfa:154:9: note: #pragma message: Compiled
Index: tests/.expect/maybe.txt
===================================================================
--- tests/.expect/maybe.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/maybe.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/nested-types.txt
===================================================================
--- tests/.expect/nested-types.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/nested-types.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+nested-types.cfa: In function '_X4mainFi___1':
+nested-types.cfa:102:9: note: #pragma message: Compiled
Index: tests/.expect/numericConstants.txt
===================================================================
--- tests/.expect/numericConstants.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/numericConstants.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+numericConstants.cfa: In function '_X4mainFi___1':
+numericConstants.cfa:68:9: note: #pragma message: Compiled
Index: tests/.expect/operators.txt
===================================================================
--- tests/.expect/operators.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/operators.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: sts/.expect/poly-cycle.txt
===================================================================
--- tests/.expect/poly-cycle.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,1 +1,0 @@
-Success!
Index: tests/.expect/poly-d-cycle.txt
===================================================================
--- tests/.expect/poly-d-cycle.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/.expect/poly-d-cycle.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+Success!
Index: tests/.expect/poly-o-cycle.txt
===================================================================
--- tests/.expect/poly-o-cycle.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/.expect/poly-o-cycle.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+Success!
Index: tests/.expect/result.txt
===================================================================
--- tests/.expect/result.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/result.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/stdincludes.txt
===================================================================
--- tests/.expect/stdincludes.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/stdincludes.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+stdincludes.cfa: In function '_X4mainFi___1':
+stdincludes.cfa:52:9: note: #pragma message: Compiled
Index: tests/.expect/switch.txt
===================================================================
--- tests/.expect/switch.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/switch.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+switch.cfa: In function '_X4mainFi___1':
+switch.cfa:105:9: note: #pragma message: Compiled
Index: tests/.expect/typedefRedef-ERR1.txt
===================================================================
--- tests/.expect/typedefRedef-ERR1.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/typedefRedef-ERR1.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1,2 +1,2 @@
 typedefRedef.cfa:4:1 error: Cannot redefine typedef: Foo
-typedefRedef.cfa:60:1 error: Cannot redefine typedef: ARR
+typedefRedef.cfa:59:1 error: Cannot redefine typedef: ARR
Index: tests/.expect/typedefRedef.txt
===================================================================
--- tests/.expect/typedefRedef.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/typedefRedef.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+typedefRedef.cfa: In function '_X4mainFi___1':
+typedefRedef.cfa:71:9: note: #pragma message: Compiled
Index: tests/.expect/typeof.txt
===================================================================
--- tests/.expect/typeof.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/typeof.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/.expect/variableDeclarator.txt
===================================================================
--- tests/.expect/variableDeclarator.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/variableDeclarator.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+variableDeclarator.cfa: In function '_X4mainFi_iPPKc__1':
+variableDeclarator.cfa:182:9: note: #pragma message: Compiled
Index: tests/.expect/voidPtr.txt
===================================================================
--- tests/.expect/voidPtr.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/.expect/voidPtr.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/Makefile.am	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -11,6 +11,6 @@
 ## Created On       : Sun May 31 09:08:15 2015
 ## Last Modified By : Peter A. Buhr
-## Last Modified On : Tue Nov 20 11:18:51 2018
-## Update Count     : 68
+## Last Modified On : Fri Oct  9 23:13:07 2020
+## Update Count     : 86
 ###############################################################################
 
@@ -40,4 +40,5 @@
 	-fdebug-prefix-map=$(abspath ${abs_srcdir})= \
 	-fdebug-prefix-map=/tmp= \
+	-fno-diagnostics-show-caret \
 	-g \
 	-Wall \
@@ -52,5 +53,5 @@
 
 # adjust CC to current flags
-CC = $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CC = LC_ALL=C $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
 CFACC = $(CC)
 
@@ -133,41 +134,53 @@
 	$(CFACOMPILETEST) -CFA -XCFA -p -c -fsyntax-only -o $(abspath ${@})
 
-# Use for tests where the make command is expected to succeed but the expected.txt should be compared to stderr
-EXPECT_STDERR = builtins/sync warnings/self-assignment
-$(EXPECT_STDERR): % : %.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -c -fsyntax-only 2> $(abspath ${@})
-
 #------------------------------------------------------------------------------
 # CUSTOM TARGET
 #------------------------------------------------------------------------------
-# tests that just validate syntax
-expression : expression.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -c -fsyntax-only 2> $(abspath ${@})
+# tests that just validate syntax and compiler output should be compared to stderr
+CFACOMPILE_SYNTAX = $(CFACOMPILETEST) -Wno-unused-variable -Wno-unused-label -c -fsyntax-only -o $(abspath ${@})
+
+SYNTAX_ONLY_CODE = expression typedefRedef variableDeclarator switch numericConstants identFuncDeclarator forall \
+	init1 limits nested-types stdincludes cast labelledExit array builtins/sync warnings/self-assignment
+$(SYNTAX_ONLY_CODE): % : %.cfa $(CFACCBIN)
+	$(CFACOMPILE_SYNTAX)
+	$(if $(test), cp $(test) $(abspath ${@}), )
 
 # expected failures
-# use custom target since they require a custom define and custom dependencies
+# use custom target since they require a custom define *and* have a name that doesn't match the file
 alloc-ERROR : alloc.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR1 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
+
+init1-ERROR : init1.cfa $(CFACCBIN)
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
 
 typedefRedef-ERR1 : typedefRedef.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR1 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
 
 nested-types-ERR1 : nested-types.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR1 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
 
 nested-types-ERR2 : nested-types.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR2 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR2
+	-cp $(test) $(abspath ${@})
 
 raii/memberCtors-ERR1 : raii/memberCtors.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR1 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
 
 raii/ctor-autogen-ERR1 : raii/ctor-autogen.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR1 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
 
 raii/dtor-early-exit-ERR1 : raii/dtor-early-exit.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR1 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR1
+	-cp $(test) $(abspath ${@})
 
 raii/dtor-early-exit-ERR2 : raii/dtor-early-exit.cfa $(CFACCBIN)
-	$(CFACOMPILETEST) -DERR2 -c -fsyntax-only -o $(abspath ${@})
+	$(CFACOMPILE_SYNTAX) -DERR2
+	-cp $(test) $(abspath ${@})
 
 # Exception Tests
Index: tests/alloc.cfa
===================================================================
--- tests/alloc.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/alloc.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed Feb  3 07:56:22 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Aug 14 16:59:59 2020
-// Update Count     : 430
+// Last Modified On : Fri Oct  9 23:03:11 2020
+// Update Count     : 431
 //
 
@@ -362,5 +362,5 @@
 	ip = memset( stp, 10 );
 	ip = memcpy( &st1, &st );
-#endif
+#endif // ERR1
 } // main
 
Index: tests/array.cfa
===================================================================
--- tests/array.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/array.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1,23 +1,23 @@
-//                               -*- Mode: C -*- 
-// 
+//                               -*- Mode: C -*-
+//
 // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 //
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
-// 
+//
 // array.cfa -- test array declarations
-// 
+//
 // Author           : Peter A. Buhr
 // Created On       : Tue Feb 19 21:18:06 2019
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Feb 19 21:18:46 2019
-// Update Count     : 1
-// 
+// Last Modified On : Sun Sep 27 09:05:40 2020
+// Update Count     : 4
+//
 
-int a1[];
+int a1[0];
 //int a2[*];
 //double a4[3.0];
 
-int m1[][3];
+int m1[0][3];
 //int m2[*][*];
 int m4[3][3];
@@ -49,5 +49,9 @@
 }
 
-int main() {}
+int main() {
+	#if !defined(NO_COMPILED_PRAGMA)
+		#pragma message( "Compiled" )	// force non-empty .expect file
+	#endif
+}
 
 // Local Variables: //
Index: tests/bugs/196.cfa
===================================================================
--- tests/bugs/196.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/bugs/196.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,29 @@
+// Trac ticket
+// https://cforall.uwaterloo.ca/trac/ticket/196
+
+forall(dtype T)
+struct link;
+
+forall(dtype T)
+struct link {
+	link(T) * next;
+};
+
+// -----
+
+forall(dtype T)
+struct foo;
+
+forall(dtype U)
+struct bar {
+	foo(U) * data;
+};
+
+forall(dtype T)
+struct foo {};
+
+// -----
+
+int main(int argc, char * argv[]) {
+	return 0;
+}
Index: tests/builtins/.expect/sync.txt
===================================================================
--- tests/builtins/.expect/sync.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/builtins/.expect/sync.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,2 @@
+builtins/sync.cfa: In function '_X4mainFi___1':
+builtins/sync.cfa:358:9: note: #pragma message: Compiled
Index: tests/builtins/sync.cfa
===================================================================
--- tests/builtins/sync.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/builtins/sync.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -66,5 +66,4 @@
 	#if defined(__SIZEOF_INT128__)
 	{ __int128 ret; ret = __sync_fetch_and_nand(vplll, vlll); }
-	{ __int128 ret; ret = __sync_fetch_and_nand_16(vplll, vlll); }
 	#endif
 
@@ -355,4 +354,4 @@
 
 int main() {
-	return 0;
+	#pragma message( "Compiled" )			// force non-empty .expect file
 }
Index: tests/cast.cfa
===================================================================
--- tests/cast.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/cast.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -13,6 +13,5 @@
 
 //Dummy main
-int main(int argc, char const *argv[])
-{
-	return 0;
+int main( int argc, char const * argv[] ) {
+	#pragma message( "Compiled" )			// force non-empty .expect file
 }
Index: tests/concurrent/.expect/clib.txt
===================================================================
--- tests/concurrent/.expect/clib.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/concurrent/.expect/clib.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,21 @@
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done
+Done Unparker
Index: tests/concurrent/.expect/cluster.txt
===================================================================
--- tests/concurrent/.expect/cluster.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/.expect/cluster.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/clib.c
===================================================================
--- tests/concurrent/clib.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/concurrent/clib.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,67 @@
+#include <clib/cfathread.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+thread_local struct drand48_data buffer = { 0 };
+int myrand() {
+	long int result;
+	lrand48_r(&buffer, &result);
+	return result;
+}
+
+
+enum Constants { blocked_size = 20 };
+cfathread_t volatile blocked[blocked_size];
+
+void Worker( cfathread_t this ) {
+	for(int i = 0; i < 1000; i++) {
+		int idx = myrand() % blocked_size;
+		if(blocked[idx]) {
+			cfathread_t thrd = __atomic_exchange_n(&blocked[idx], NULL, __ATOMIC_SEQ_CST);
+			cfathread_unpark( thrd );
+		} else {
+			cfathread_t thrd = __atomic_exchange_n(&blocked[idx], this, __ATOMIC_SEQ_CST);
+			cfathread_unpark( thrd );
+			cfathread_park();
+		}
+	}
+	printf("Done\n");
+}
+
+volatile bool stop;
+void Unparker( cfathread_t this ) {
+	while(!stop) {
+		int idx = myrand() % blocked_size;
+		cfathread_t thrd = __atomic_exchange_n(&blocked[idx], NULL, __ATOMIC_SEQ_CST);
+		cfathread_unpark( thrd );
+		int r = myrand() % 20;
+		for( int i = 0; i < r; i++ ) {
+			cfathread_yield();
+		}
+	}
+	printf("Done Unparker\n");
+}
+
+
+int main() {
+	stop = false;
+	for(int i = 0; i < blocked_size; i++) {
+		blocked[i] = NULL;
+	}
+
+	cfathread_setproccnt( 4 );
+	cfathread_t u = cfathread_create( Unparker );
+	{
+		cfathread_t t[20];
+		for(int i = 0; i < 20; i++) {
+			t[i] = cfathread_create( Worker );
+		}
+		for(int i = 0; i < 20; i++) {
+			cfathread_join( t[i] );
+		}
+	}
+	stop = true;
+	cfathread_join(u);
+	cfathread_setproccnt( 1 );
+}
Index: tests/concurrent/cluster.cfa
===================================================================
--- tests/concurrent/cluster.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/cluster.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -32,4 +32,4 @@
 		}
 	}
-	return 0;
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/concurrent/examples/.expect/datingService.txt
===================================================================
--- tests/concurrent/examples/.expect/datingService.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/examples/.expect/datingService.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/examples/datingService.cfa
===================================================================
--- tests/concurrent/examples/datingService.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/examples/datingService.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Mon Oct 30 12:56:20 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jun 21 11:32:34 2019
-// Update Count     : 38
+// Last Modified On : Sun Sep 27 15:42:25 2020
+// Update Count     : 40
 //
 
@@ -108,4 +108,6 @@
 		if ( girlck[ boyck[i] ] != boyck[ girlck[i] ] ) abort();
 	} // for
+
+	printf( "done\n" );									// non-empty .expect file
 } // main
 
Index: tests/concurrent/futures/.expect/basic.txt
===================================================================
--- tests/concurrent/futures/.expect/basic.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/futures/.expect/basic.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/futures/basic.cfa
===================================================================
--- tests/concurrent/futures/basic.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/futures/basic.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -91,3 +91,5 @@
 		}
 	}
+	printf( "done\n" );				// non-empty .expect file
+
 }
Index: tests/concurrent/park/.expect/force_preempt.txt
===================================================================
--- tests/concurrent/park/.expect/force_preempt.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/park/.expect/force_preempt.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/park/.expect/start_parked.txt
===================================================================
--- tests/concurrent/park/.expect/start_parked.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/park/.expect/start_parked.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/concurrent/park/contention.cfa
===================================================================
--- tests/concurrent/park/contention.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/park/contention.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -21,9 +21,9 @@
 		if(blocked[idx]) {
 			Thread * thrd = __atomic_exchange_n(&blocked[idx], 0p, __ATOMIC_SEQ_CST);
-			unpark( *thrd __cfaabi_dbg_ctx2 );
+			unpark( *thrd );
 		} else {
 			Thread * thrd = __atomic_exchange_n(&blocked[idx], &this, __ATOMIC_SEQ_CST);
-			unpark( *thrd __cfaabi_dbg_ctx2 );
-			park( __cfaabi_dbg_ctx );
+			unpark( *thrd );
+			park();
 		}
 	}
@@ -41,5 +41,5 @@
 			int idx = myrand() % blocked_size;
 			Thread * thrd = __atomic_exchange_n(&blocked[idx], 0p, __ATOMIC_SEQ_CST);
-			unpark( *thrd __cfaabi_dbg_ctx2 );
+			unpark( *thrd );
 			yield( myrand() % 20 );
 		}
Index: tests/concurrent/park/force_preempt.cfa
===================================================================
--- tests/concurrent/park/force_preempt.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/park/force_preempt.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -30,5 +30,5 @@
 
 		// Unpark this thread, don't force a yield
-		unpark( this __cfaabi_dbg_ctx2 );
+		unpark( this );
 		assert(mask == 0xCAFEBABA);
 
@@ -43,5 +43,5 @@
 		// Park this thread,
 		assert(mask == (id_hash ^ 0xCAFEBABA));
-		park( __cfaabi_dbg_ctx );
+		park();
 		assert(mask == (id_hash ^ 0xCAFEBABA));
 
@@ -57,3 +57,4 @@
 		Waiter waiters[5];
 	}
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/concurrent/park/start_parked.cfa
===================================================================
--- tests/concurrent/park/start_parked.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/concurrent/park/start_parked.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -3,11 +3,12 @@
 thread Parker {};
 void main( Parker & ) {
-      park( __cfaabi_dbg_ctx );
+	park();
 }
 
 int main() {
-      for(1000) {
-            Parker parker;
-            unpark( parker __cfaabi_dbg_ctx2 );
-      }
+	for(1000) {
+		Parker parker;
+		unpark( parker );
+	}
+	printf( "done\n" );									// non-empty .expect file
 }
Index: tests/const-init.cfa
===================================================================
--- tests/const-init.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/const-init.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,48 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2020 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// const-init.cfa -- tests of initializing constants
+//
+// Author           : Michael Brooks
+// Created On       : Tue Oct 06 22:00:00 2020
+// Last Modified By : Michael Brooks
+// Last Modified On : Tue Oct 06 22:00:00 2020
+// Update Count     : 1
+//
+
+/*
+
+This test shows non-crashing of generated code for constants with interesting initizers.
+The potential for these to crash is compiler dependent.
+
+There are two cases:
+1. static constants in one compilation unit (tested here)
+2. extern constants across compilation units (tested by libcfa being loadable, specifically
+   the constant declarations in libcfa/src/limits.cfa, which almost every test exercises,
+   including "hello;" but notably, the "limits" test does not exercise it because that test
+   is compile-only)
+
+Crashes that we have obsrved (#182 and build failures September 2020) are because the libcfa
+initialization is writing to a global variable (which the declaring program wants typed as 
+constant), while the compiler has placed this global in a read-only section.
+
+Compiler dependence includes:
+
+                          Case 1           Case 2
+GCC-6  on Ubuntu 16.04    Never crashed    Never crashed
+GCC-8  on both            Has crashed      Never crashed
+GCC-10 on Ubuntu 20.04    Has crashed      Has crashed
+
+For this test case to fail, with most other tests passing, would be a situation only ever
+observed with GCC-8.
+
+*/
+
+static const char foo = -1;
+
+int main() {
+    printf("done\n");
+}
Index: tests/enum.cfa
===================================================================
--- tests/enum.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/enum.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -26,3 +26,4 @@
 //Dummy main
 int main(int argc, char const *argv[]) {
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/errors/.expect/completeType.x64.txt
===================================================================
--- tests/errors/.expect/completeType.x64.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/errors/.expect/completeType.x64.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -132,8 +132,8 @@
 ?=?: pointer to function
         ... with parameters
-          reference to instance of type _109_0_T (not function type)
-          instance of type _109_0_T (not function type)
+          reference to instance of type _110_0_T (not function type)
+          instance of type _110_0_T (not function type)
         ... returning
-          _retval__operator_assign: instance of type _109_0_T (not function type)
+          _retval__operator_assign: instance of type _110_0_T (not function type)
           ... with attributes:
             Attribute with name: unused
Index: tests/errors/.expect/completeType.x86.txt
===================================================================
--- tests/errors/.expect/completeType.x86.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/errors/.expect/completeType.x86.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -132,8 +132,8 @@
 ?=?: pointer to function
         ... with parameters
-          reference to instance of type _108_0_T (not function type)
-          instance of type _108_0_T (not function type)
+          reference to instance of type _109_0_T (not function type)
+          instance of type _109_0_T (not function type)
         ... returning
-          _retval__operator_assign: instance of type _108_0_T (not function type)
+          _retval__operator_assign: instance of type _109_0_T (not function type)
           ... with attributes:
             Attribute with name: unused
Index: tests/exceptions/.expect/virtual-cast.txt
===================================================================
--- tests/exceptions/.expect/virtual-cast.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/exceptions/.expect/virtual-cast.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/exceptions/.expect/virtual-poly.txt
===================================================================
--- tests/exceptions/.expect/virtual-poly.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/exceptions/.expect/virtual-poly.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/exceptions/defaults.cfa
===================================================================
--- tests/exceptions/defaults.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/exceptions/defaults.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -55,5 +55,5 @@
 
 void unhandled_test(void) {
-	forall(dtype T | is_exception(T))
+	forall(dtype T, dtype V | is_exception(T, V))
 	void defaultTerminationHandler(T &) {
 		throw (unhandled_exception){};
Index: tests/exceptions/virtual-cast.cfa
===================================================================
--- tests/exceptions/virtual-cast.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/exceptions/virtual-cast.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -74,3 +74,4 @@
 	free(tri);
 	free(top);
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/exceptions/virtual-poly.cfa
===================================================================
--- tests/exceptions/virtual-poly.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/exceptions/virtual-poly.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -77,3 +77,4 @@
 	mono_poly_test();
 	poly_poly_test();
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/expression.cfa
===================================================================
--- tests/expression.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/expression.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -8,78 +8,82 @@
 
 int main() {
-    int a[3] = { 0, 0, 0 };
-    S s = { 3 }, * ps = &s;
-    [int] t = { 3 };
-    * [int] pt = &t;
-    int i = 1, j = 2;
+	int a[3] = { 0, 0, 0 };
+	S s = { 3 }, * ps = &s;
+	[int] t = { 3 };
+	* [int] pt = &t;
+	int i = 1, j = 2;
 
-    // operators
+	// operators
 
-    !i;
-    ~i;
-    +i;
-    -i;
-    *ps;
-    ++ps;
-    --ps;
-    ps++;
-    ps--;
+	!i;
+	~i;
+	+i;
+	-i;
+	*ps;
+	++ps;
+	--ps;
+	ps++;
+	ps--;
 
-    i + j;
-    i - j;
-    i * j;
+	i + j;
+	i - j;
+	i * j;
 
-    i / j;
-    i % j;
-    i ^ j;
-    i & j;
-    i | j;
-    i < j;
-    i > j;
-    i = j;
+	i / j;
+	i % j;
+	i ^ j;
+	i & j;
+	i | j;
+	i < j;
+	i > j;
+	i = j;
 
-    i == j;
-    i != j;
-    i << j;
-    i >> j;
-    i <= j;
-    i >= j;
-    i && j;
-    i || j;
-    ps->i;
+	i == j;
+	i != j;
+	i << j;
+	i >> j;
+	i <= j;
+	i >= j;
+	i && j;
+	i || j;
+	ps->i;
 
-    i *= j;
-    i /= j;
-    i %= j;
-    i += j;
-    i -= j;
-    i &= j;
-    i |= j;
-    i ^= j;
-    i <<= j;
-    i >>= j;
+	i *= j;
+	i /= j;
+	i %= j;
+	i += j;
+	i -= j;
+	i &= j;
+	i |= j;
+	i ^= j;
+	i <<= j;
+	i >>= j;
 
-    i ? i : j;
+	i ? i : j;
 
-    // postfix function call
+	// postfix function call
 
-    (3 + 4)`mary;
-    ({3 + 4;})`mary;
-    [3, 4]`mary;
-    3`mary;
-    a[0]`mary;
-    a[0]`mary`mary;
-    s{0}`mary;
-    a[3]`jane++;
-    jack(3)`mary;
-    s.i`mary;
-    t.0`mary;
-    s.[i]`mary;
-    ps->i`mary;
-    pt->0`mary;
-    ps->[i]`mary;
-    i++`mary;
-    i--`mary;
-    (S){2}`mary;
-    (S)@{2}`mary;
+	(3 + 4)`mary;
+	({3 + 4;})`mary;
+	[3, 4]`mary;
+	3`mary;
+	a[0]`mary;
+	a[0]`mary`mary;
+	s{0}`mary;
+	a[3]`jane++;
+	jack(3)`mary;
+	s.i`mary;
+	t.0`mary;
+	s.[i]`mary;
+	ps->i`mary;
+	pt->0`mary;
+	ps->[i]`mary;
+	i++`mary;
+	i--`mary;
+	(S){2}`mary;
+	(S)@{2}`mary;
+
+	#if !defined(NO_COMPILED_PRAGMA)
+		#pragma message( "Compiled" )	// force non-empty .expect file
+	#endif
 } // main
Index: tests/forall.cfa
===================================================================
--- tests/forall.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/forall.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed May  9 08:48:15 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Mar 19 08:29:38 2019
-// Update Count     : 32
+// Last Modified On : Sun Sep 27 08:43:20 2020
+// Update Count     : 35
 // 
 
@@ -158,5 +158,5 @@
 }
 forall( otype T ) inline static {
-	int RT9( T ) { T t; }
+	int RT9( T ) { T t; return 3; }
 }
 
@@ -213,5 +213,7 @@
 // w3 g3;
 
-int main( void ) {}
+int main( void ) {
+	#pragma message( "Compiled" )			// force non-empty .expect file
+}
 
 // Local Variables: //
Index: tests/heap.cfa
===================================================================
--- tests/heap.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/heap.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Nov  6 17:54:56 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Sep  7 18:37:41 2020
-// Update Count     : 72
+// Last Modified On : Fri Sep 25 15:21:52 2020
+// Update Count     : 73
 // 
 
@@ -485,4 +485,5 @@
 	// checkFreeOn();
 	// malloc_stats();
+	printf( "done\n" );									// non-empty .expect file
 }
 
Index: tests/identFuncDeclarator.cfa
===================================================================
--- tests/identFuncDeclarator.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/identFuncDeclarator.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed Aug 17 08:36:34 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 17:56:33 2018
-// Update Count     : 3
+// Last Modified On : Sun Sep 27 08:20:46 2020
+// Update Count     : 5
 // 
 
@@ -111,4 +111,6 @@
 	int (* (* const f80)(int))();
 	int (* const(* const f81)(int))();
+
+	#pragma message( "Compiled" )			// force non-empty .expect file
 }
 
Index: tests/identParamDeclarator.cfa
===================================================================
--- tests/identParamDeclarator.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/identParamDeclarator.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed Aug 17 08:37:56 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 17:56:44 2018
-// Update Count     : 3
+// Last Modified On : Fri Sep 25 14:31:08 2020
+// Update Count     : 4
 // 
 
@@ -158,5 +158,5 @@
 
 int main( int argc, char const *argv[] ) {				// dummy main
-	return 0;
+	printf( "done\n" );									// non-empty .expect file
 }
 
Index: tests/init1.cfa
===================================================================
--- tests/init1.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/init1.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -9,7 +9,7 @@
 // Author           : Michael Brooks
 // Created On       : Thu Jul 16 22:00:00 2020
-// Last Modified By : Michael Brooks
-// Last Modified On : Thu Jul 16 22:00:00 2020
-// Update Count     : 1
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Sun Oct 11 10:26:50 2020
+// Update Count     : 8
 //
 
@@ -41,8 +41,16 @@
     const float * cpx2 = cpx;
 
+    // FIX ME: Code gen not producing correct cast.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
+    int (* fp)( int ) = 0p;
+    fp = 0p;
+#pragma GCC diagnostic pop
+
     //
     // unsound initializations
     //
 
+    #ifdef ERR1
     // mismatched referenced type
     int & ry = rx;
@@ -52,4 +60,5 @@
     float & ry2 = crx;
     float * py2 = cpx;
+    #endif // ERR1
 }
 
@@ -90,4 +99,5 @@
 //
 
+#ifdef ERR1
 int & f_ry() { 
     float & rx = *0p;
@@ -119,2 +129,7 @@
     return s;               // mismatched referenced type
 }
+#endif // ERR1
+
+int main() {
+    #pragma message( "Compiled" )			// force non-empty .expect file
+}
Index: tests/labelledExit.cfa
===================================================================
--- tests/labelledExit.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/labelledExit.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed Aug 10 07:29:39 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Feb  5 16:49:48 2020
-// Update Count     : 9
+// Last Modified On : Sun Sep 27 09:01:34 2020
+// Update Count     : 12
 // 
 
@@ -179,5 +179,5 @@
 
 int main( int argc, char const *argv[] ) {
-	/* code */
+	#pragma message( "Compiled" )						// force non-empty .expect file
 }
 
Index: tests/limits.cfa
===================================================================
--- tests/limits.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/limits.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,7 +10,10 @@
 // Created On       : Tue May 10 20:44:20 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 17:57:55 2018
-// Update Count     : 8
+// Last Modified On : Sun Sep 27 08:45:43 2020
+// Update Count     : 10
 //
+
+// Note: For testing the ability to load the constants defined in libcfa/src/limits.cfa,
+// see discussion in test const-init.
 
 #include <limits.hfa>
@@ -147,6 +150,5 @@
 
 int main(int argc, char const *argv[]) {
-	//DUMMY
-	return 0;
+	#pragma message( "Compiled" )						// force non-empty .expect file
 }
 
Index: sts/linking/.expect/linkerror.txt
===================================================================
--- tests/linking/.expect/linkerror.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,3 +1,0 @@
-linking/linkerror.o: In function `_X4mainFi___1':
-linking/linkerror.cfa:6: undefined reference to `_X18this_doesnot_existFv_i__1'
-collect2: error: ld returned 1 exit status
Index: sts/linking/linkerror.cfa
===================================================================
--- tests/linking/linkerror.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,7 +1,0 @@
-// This is more of a meta test, to confirm the test suite handles link errors correctly.
-
-extern void this_doesnot_exist(int);
-
-int main() {
-	this_doesnot_exist( 6 );
-}
Index: tests/maybe.cfa
===================================================================
--- tests/maybe.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/maybe.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Thr May 25 16:02:00 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Jul 20 15:24:07 2017
-// Update Count     : 1
+// Last Modified On : Fri Sep 25 15:13:28 2020
+// Update Count     : 2
 //
 
@@ -65,3 +65,4 @@
 	//checkNamedConstructors();
 	checkSetters();
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/nested-types.cfa
===================================================================
--- tests/nested-types.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/nested-types.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,26 +10,26 @@
 // Created On       : Mon Jul 9 10:20:03 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Feb 12 18:21:15 2020
-// Update Count     : 3
+// Last Modified On : Sun Sep 27 08:48:59 2020
+// Update Count     : 6
 //
 
 typedef int N;
 struct A {
-  forall(otype T)
-  struct N {
-    T x;
-  };
+	forall(otype T)
+	struct N {
+		T x;
+	};
 };
 
 struct S {
-  struct T {
-    int i;
-    typedef int Bar;
-  };
-  T x;
+	struct T {
+		int i;
+		typedef int Bar;
+	};
+	T x;
 
-  // struct U;
-  typedef T Bar;
-  typedef int Baz;
+	// struct U;
+	typedef T Bar;
+	typedef int Baz;
 };
 
@@ -65,36 +65,38 @@
 
 int main() {
-  // access nested struct
-  S.T x;
+	// access nested struct
+	S.T x;
 
-  {
-    struct S {
-      int i;
-      struct Z {
-        double d;
-      };
-    };
+	{
+		struct S {
+		  int i;
+		  struct Z {
+		    double d;
+		  };
+		};
 
-    S.Z z;   // gets local S
-    .S.T y;  // lookup at global scope only
+		S.Z z;											// gets local S
+		.S.T y;											// lookup at global scope only
 
-    const volatile .S.T q;
+		const volatile .S.T q;
 #if ERR1
-    T err1;           // error: no T in scope
+		T err1;											// error: no T in scope
 #endif
 #if ERR2
-    .Z err2;          // error: no Z in global scope
-    .S.Baz.Bar err3;  // error: .S.Baz => int, int is not aggregate and should not appear left of the dot
-    .S.Z err4;        // error: no Z in global S
+		.Z err2;										// error: no Z in global scope
+		.S.Baz.Bar err3;								// error: .S.Baz => int, int is not aggregate and should not appear left of the dot
+		.S.Z err4;										// error: no Z in global S
 #endif
-  }
+	}
 
-  // U.S un;
+	// U.S un;
 
-  S.Bar y;
-  S.Baz x;
-  S.T.Bar z;
+	S.Bar y;
+	S.Baz x;
+	S.T.Bar z;
 
-  // A.N(int) x;  // xxx - should not be an error, but currently is.
+	// A.N(int) x;  // xxx - should not be an error, but currently is.
+
+	#pragma message( "Compiled" )			// force non-empty .expect file
 }
 
Index: tests/numericConstants.cfa
===================================================================
--- tests/numericConstants.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/numericConstants.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 24 22:10:36 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Feb  5 08:58:16 2019
-// Update Count     : 5
+// Last Modified On : Sun Sep 27 07:55:22 2020
+// Update Count     : 7
 // 
 
@@ -63,4 +63,6 @@
 	0x_ff.ffp0;					// hex real
 	0x_1.ffff_ffff_p_128_l;
+
+	#pragma message( "Compiled" )	// force non-empty .expect file
 } // main
 
Index: tests/operators.cfa
===================================================================
--- tests/operators.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/operators.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -31,5 +31,5 @@
 int main(int argc, char const *argv[]) {
 	/* code */
-	return 0;
+	printf( "done\n" );				// non-empty .expect file
 }
 
Index: sts/poly-cycle.cfa
===================================================================
--- tests/poly-cycle.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ 	(revision )
@@ -1,28 +1,0 @@
-// Check that a cycle of polymorphic data structures can be instancated.
-
-#include <stdio.h>
-
-forall(otype T)
-struct func_table;
-
-forall(otype U)
-struct object {
-	func_table(U) * virtual_table;
-};
-
-forall(otype T)
-struct func_table {
-	void (*object_func)(object(T) *);
-};
-
-void func(object(int) *) {
-	printf("Success!\n");
-}
-
-func_table(int) an_instance = { func };
-
-int main(int argc, char * argv[]) {
-	object(int) x = { 0p };
-	an_instance.object_func( &x );
-	return 0;
-}
Index: tests/poly-d-cycle.cfa
===================================================================
--- tests/poly-d-cycle.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/poly-d-cycle.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,28 @@
+// Check that a cycle of polymorphic dtype structures can be instancated.
+
+#include <stdio.h>
+
+forall(dtype T)
+struct func_table;
+
+forall(dtype U)
+struct object {
+	func_table(U) * virtual_table;
+};
+
+forall(dtype T)
+struct func_table {
+	void (*object_func)(object(T) *);
+};
+
+void func(object(int) *) {
+	printf("Success!\n");
+}
+
+func_table(int) an_instance = { func };
+
+int main(int argc, char * argv[]) {
+	object(int) x = { 0p };
+	an_instance.object_func( &x );
+	return 0;
+}
Index: tests/poly-o-cycle.cfa
===================================================================
--- tests/poly-o-cycle.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/poly-o-cycle.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,28 @@
+// Check that a cycle of polymorphic otype structures can be instancated.
+
+#include <stdio.h>
+
+forall(otype T)
+struct func_table;
+
+forall(otype U)
+struct object {
+	func_table(U) * virtual_table;
+};
+
+forall(otype T)
+struct func_table {
+	void (*object_func)(object(T) *);
+};
+
+void func(object(int) *) {
+	printf("Success!\n");
+}
+
+func_table(int) an_instance = { func };
+
+int main(int argc, char * argv[]) {
+	object(int) x = { 0p };
+	an_instance.object_func( &x );
+	return 0;
+}
Index: tests/pybin/tools.py
===================================================================
--- tests/pybin/tools.py	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/pybin/tools.py	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -88,22 +88,34 @@
 		raise
 
+def is_empty(fname):
+	if not os.path.isfile(fname):
+		return True
+
+	if os.stat(fname).st_size == 0:
+		return True
+
+	return False
+
 def is_ascii(fname):
 	if settings.dry_run:
 		print("is_ascii: %s" % fname)
-		return True
+		return (True, "")
 
 	if not os.path.isfile(fname):
-		return False
-
-	code, out = sh("file %s" % fname, output_file=subprocess.PIPE)
+		return (False, "No file")
+
+	code, out = sh("file", fname, output_file=subprocess.PIPE)
 	if code != 0:
-		return False
+		return (False, "'file EXPECT' failed with code {}".format(code))
 
 	match = re.search(".*: (.*)", out)
 
 	if not match:
-		return False
-
-	return match.group(1).startswith("ASCII text")
+		return (False, "Unreadable file type: '{}'".format(out))
+
+	if "ASCII text" in match.group(1):
+		return (True, "")
+
+	return (False, "File type should be 'ASCII text', was '{}'".format(match.group(1)))
 
 def is_exe(fname):
Index: tests/raii/.expect/ctor-autogen.txt
===================================================================
--- tests/raii/.expect/ctor-autogen.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/raii/.expect/ctor-autogen.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/raii/.expect/init_once.txt
===================================================================
--- tests/raii/.expect/init_once.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/raii/.expect/init_once.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+done
Index: tests/raii/ctor-autogen.cfa
===================================================================
--- tests/raii/ctor-autogen.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/raii/ctor-autogen.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -151,3 +151,4 @@
 	identity(gcs);
 	identity(gcu);
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/raii/init_once.cfa
===================================================================
--- tests/raii/init_once.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/raii/init_once.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jun 14 15:43:35 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Mar 22 13:41:26 2019
-// Update Count     : 4
+// Last Modified On : Fri Sep 25 15:36:39 2020
+// Update Count     : 5
 //
 
@@ -188,4 +188,5 @@
 		static_variable();
 	}
+	printf( "done\n" );									// non-empty .expect file
 }
 
Index: tests/result.cfa
===================================================================
--- tests/result.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/result.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Thr May 25 16:50:00 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Jul 20 15:24:12 2017
-// Update Count     : 1
+// Last Modified On : Fri Sep 25 15:22:59 2020
+// Update Count     : 2
 //
 
@@ -66,3 +66,4 @@
 	checkGetters();
 	checkSetters();
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/stdincludes.cfa
===================================================================
--- tests/stdincludes.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/stdincludes.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Aug 29 08:26:14 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 18:00:53 2018
-// Update Count     : 6
+// Last Modified On : Sun Sep 27 08:51:38 2020
+// Update Count     : 8
 // 
 
@@ -47,5 +47,7 @@
 #include <wctype.h>
 
-int main() {}
+int main() {
+	#pragma message( "Compiled" )			// force non-empty .expect file
+}
 
 // Local Variables: //
Index: tests/switch.cfa
===================================================================
--- tests/switch.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/switch.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jul 12 06:50:22 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 18:01:34 2018
-// Update Count     : 37
+// Last Modified On : Sun Sep 27 08:35:02 2020
+// Update Count     : 43
 // 
 
@@ -100,4 +100,6 @@
 		j = 5;
 	} // choose
+
+	#pragma message( "Compiled" )						// force non-empty .expect file
 } // main
 
Index: tests/test.py
===================================================================
--- tests/test.py	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/test.py	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -173,10 +173,16 @@
 	test.prepare()
 
+	# ----------
+	# MAKE
+	# ----------
 	# build, skipping to next test on error
 	with Timed() as comp_dur:
 		make_ret, _ = make( test.target(), output_file=subprocess.DEVNULL, error=out_file, error_file = err_file )
 
+	# ----------
+	# RUN
+	# ----------
+	# run everything in a temp directory to make sure core file are handled properly
 	run_dur = None
-	# run everything in a temp directory to make sure core file are handled properly
 	with tempdir():
 		# if the make command succeeds continue otherwise skip to diff
@@ -256,20 +262,8 @@
 	make('clean', output_file=subprocess.DEVNULL, error=subprocess.DEVNULL)
 
-	# since python prints stacks by default on a interrupt, redo the interrupt handling to be silent
-	def worker_init():
-		def sig_int(signal_num, frame):
-			pass
-
-		signal.signal(signal.SIGINT, sig_int)
-
-	# create the executor for our jobs and handle the signal properly
-	pool = multiprocessing.Pool(jobs, worker_init)
+	# create the executor for our jobs
+	pool = multiprocessing.Pool(jobs)
 
 	failed = False
-
-	def stop(x, y):
-		print("Tests interrupted by user", file=sys.stderr)
-		sys.exit(1)
-	signal.signal(signal.SIGINT, stop)
 
 	# for each test to run
@@ -360,4 +354,10 @@
 		failed = 0
 
+		# check if the expected files aren't empty
+		if not options.regenerate_expected:
+			for t in tests:
+				if is_empty(t.expect()):
+					print('WARNING: test "{}" has empty .expect file'.format(t.target()), file=sys.stderr)
+
 		# for each build configurations, run the test
 		with Timed() as total_dur:
Index: tests/typedefRedef.cfa
===================================================================
--- tests/typedefRedef.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/typedefRedef.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -27,9 +27,8 @@
 typedef int ARR[];
 typedef int ARR[];
-// #ifdef ERR1
-// if a typedef has an array dimension,
-// it can only be redefined to the same dimension
+#ifdef ERR1
+// if a typedef has an array dimension, it can only be redefined to the same dimension
 typedef int ARR[2];
-// #endif
+#endif
 
 typedef int X;
@@ -54,16 +53,18 @@
 
 int main() {
-  typedef int ARR[sz];
+	typedef int ARR[sz];
 
-  // can't redefine typedef which is VLA
+	// can't redefine typedef which is VLA
 #if ERR1
-  typedef int ARR[sz];
+	typedef int ARR[sz];
 #endif
 
-  Foo *x;
+	Foo * x;
 
-  typedef struct Bar Foo;
-  Foo *y;
+	typedef struct Bar Foo;
+	Foo * y;
 
-  typedef int *** pt;
+	typedef int *** pt;
+
+	#pragma message( "Compiled" )			// force non-empty .expect file
 }
Index: tests/typeof.cfa
===================================================================
--- tests/typeof.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/typeof.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -1,11 +1,12 @@
 int main() {
-    int *v1;
-    typeof(v1) v2;
-    typeof(*v1) v3[4];
-    char *v4[4];
-    typeof(typeof(char *)[4]) v5;
-    typeof (int *) v6;
-    typeof( int ( int, int p ) ) *v7;
-    typeof( [int] ( int, int p ) ) *v8;
-    (typeof(v1)) v2; // cast with typeof
+	int *v1;
+	typeof(v1) v2;
+	typeof(*v1) v3[4];
+	char *v4[4];
+	typeof(typeof(char *)[4]) v5;
+	typeof (int *) v6;
+	typeof( int ( int, int p ) ) *v7;
+	typeof( [int] ( int, int p ) ) *v8;
+	(typeof(v1)) v2; // cast with typeof
+	printf( "done\n" );				// non-empty .expect file
 }
Index: tests/variableDeclarator.cfa
===================================================================
--- tests/variableDeclarator.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/variableDeclarator.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Wed Aug 17 08:41:42 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Nov  6 18:02:16 2018
-// Update Count     : 2
+// Last Modified On : Sun Sep 27 07:46:17 2020
+// Update Count     : 13
 // 
 
@@ -18,74 +18,74 @@
 int (f2);
 
-int *f3;
-int **f4;
-int * const *f5;
+int * f3;
+int ** f4;
+int * const * f5;
 int * const * const f6;
 
-int *(f7);
-int **(f8);
-int * const *(f9);
+int * (f7);
+int ** (f8);
+int * const * (f9);
 int * const * const (f10);
 
-int (*f11);
-int (**f12);
-int (* const *f13);
+int (* f11);
+int (** f12);
+int (* const * f13);
 int (* const * const f14);
 
-int f15[];
+int f15[0];
 int f16[10];
-int (f17[]);
+int (f17[0]);
 int (f18[10]);
 
-int *f19[];
-int *f20[10];
-int **f21[];
-int **f22[10];
-int * const *f23[];
-int * const *f24[10];
-int * const * const f25[];
+int * f19[0];
+int * f20[10];
+int ** f21[0];
+int ** f22[10];
+int * const * f23[0];
+int * const * f24[10];
+int * const * const f25[0];
 int * const * const f26[10];
 
-int *(f27[]);
+int *(f27[0]);
 int *(f28[10]);
-int **(f29[]);
+int **(f29[0]);
 int **(f30[10]);
-int * const *(f31[]);
+int * const *(f31[0]);
 int * const *(f32[10]);
-int * const * const (f33[]);
+int * const * const (f33[0]);
 int * const * const (f34[10]);
 
-int (*f35)[];
-int (*f36)[10];
-int (**f37)[];
-int (**f38)[10];
-int (* const *f39)[];
-int (* const *f40)[10];
+int (* f35)[];
+int (* f36)[10];
+int (** f37)[];
+int (** f38)[10];
+int (* const * f39)[];
+int (* const * f40)[10];
 int (* const * const f41)[];
 int (* const * const f42)[10];
 
-int f43[][3];
+int f43[0][3];
 int f44[3][3];
-int (f45[])[3];
+int (f45[0])[3];
 int (f46[3])[3];
-int ((f47[]))[3];
+int ((f47[0]))[3];
 int ((f48[3]))[3];
 
-int *f49[][3];
-int *f50[3][3];
-int **f51[][3];
-int **f52[3][3];
-int * const *f53[][3];
-int * const *f54[3][3];
-int * const * const f55[][3];
+int * f49[0][3];
+int * f50[3][3];
+int ** f51[0][3];
+int ** f52[3][3];
+int * const * f53[0][3];
+int * const * f54[3][3];
+int * const * const f55[0][3];
 int * const * const f56[3][3];
 
-int (*f57[][3]);
-int (*f58[3][3]);
-int (**f59[][3]);
-int (**f60[3][3]);
-int (* const *f61[][3]);
-int (* const *f62[3][3]);
-int (* const * const f63[][3]);
+int (* f57[0][3]);
+int (* f58[3][3]);
+int (** f59[0][3]);
+int (** f60[3][3]);
+int (* const * f61[0][3]);
+int (* const * f62[3][3]);
+int (* const * const f63[0][3]);
 int (* const * const f64[3][3]);
 
@@ -93,7 +93,7 @@
 int (f66)(int);
 
-int *f67(int);
-int **f68(int);
-int * const *f69(int);
+int * f67(int);
+int ** f68(int);
+int * const * f69(int);
 int * const * const f70(int);
 
@@ -104,10 +104,10 @@
 int * const * const (f74)(int);
 
-int (*f75)(int);
-int (**f76)(int);
-int (* const *f77)(int);
+int (* f75)(int);
+int (** f76)(int);
+int (* const * f77)(int);
 int (* const * const f78)(int);
 
-int (*(*f79)(int))();
+int (*(* f79)(int))();
 int (*(* const f80)(int))();
 int (* const(* const f81)(int))();
@@ -119,7 +119,8 @@
 //int fe2()[];				// returning an array
 //int fe3()();				// returning a function
-//int (*fe4)()();				// returning a function
-//int ((*fe5())())[];			// returning an array
+//int (* fe4)()();				// returning a function
+//int ((* fe5())())[];			// returning an array
 
+#ifdef __CFA__
 // Cforall extensions
 
@@ -129,14 +130,14 @@
 const * const * int cf6;
 
-[] int cf15;
+[0] int cf15;
 [10] int cf16;
 
-[] * int cf19;
+[0] * int cf19;
 [10] * int cf20;
-int **cf21[];
+int ** cf21[0];
 [10] * * int cf22;
-[] * const * int cf23;
+[0] * const * int cf23;
 [10] * const * int cf24;
-[] const * const * int cf25;
+[0] const * const * int cf25;
 [10] const * const * int cf26;
 
@@ -150,14 +151,14 @@
 const * const * [10] int cf42;
 
-[][3] int cf43;
+[0][3] int cf43;
 [3][3] int cf44;
 
-[][3] * int cf49;
+[0][3] * int cf49;
 [3][3] * int cf50;
-[][3] * * int cf51;
+[0][3] * * int cf51;
 [3][3] * * int cf52;
-[][3] const * int cf53;
+[0][3] const * int cf53;
 [3][3] * const * int cf54;
-[][3] const * const * int cf55;
+[0][3] const * const * int cf55;
 [3][3] const * const * int cf56;
 
@@ -173,9 +174,9 @@
 
 *[]*[]* [ *[]*[] int ]( *[]*[] int, *[]*[] int ) v3;
+#endif // __CFA__
 
 //Dummy main
-int main(int argc, char const *argv[])
-{
-	return 0;
+int main( int argc, char const * argv[] ) {
+	#pragma message( "Compiled" )						// force non-empty .expect file
 }
 
Index: tests/voidPtr.cfa
===================================================================
--- tests/voidPtr.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/voidPtr.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -13,5 +13,6 @@
 	if ( ! a ) {
 		abort();
-	}	
+	}
+	printf( "done\n" );				// non-empty .expect file
 }
 
Index: tests/warnings/.expect/self-assignment.txt
===================================================================
--- tests/warnings/.expect/self-assignment.txt	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/warnings/.expect/self-assignment.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -24,2 +24,4 @@
 ... to:
   reference to signed int
+warnings/self-assignment.cfa: In function '_X4mainFi___1':
+warnings/self-assignment.cfa:36:9: note: #pragma message: Compiled
Index: tests/warnings/self-assignment.cfa
===================================================================
--- tests/warnings/self-assignment.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/warnings/self-assignment.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -10,6 +10,6 @@
 // Created On       : Thu Mar 1 13:53:57 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Feb 20 07:56:17 2019
-// Update Count     : 3
+// Last Modified On : Sun Sep 27 09:24:34 2020
+// Update Count     : 6
 //
 
@@ -31,8 +31,10 @@
 	s.i = s.i;
 	t.s.i = t.s.i;
+
+	#pragma message( "Compiled" )			// force non-empty .expect file
 }
 
 // Local Variables: //
 // tab-width: 4 //
-// compile-command: "cfa dtor-early-exit" //
+// compile-command: "cfa self-assignment.cfa" //
 // End: //
Index: tests/zombies/ArrayN.c
===================================================================
--- tests/zombies/ArrayN.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/ArrayN.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,23 @@
+#include <fstream.hfa>
+
+// [unsigned, unsigned] offset_to_index(unsigned offset, unsigned sx, unsigned sy)
+// {
+//     return [offset / sx, offset % sy];
+// }
+
+forall(otype index_t)
+index_t offset_to_index(unsigned offset, index_t size) {
+    return [offset / size.0, offset % size.1];
+}
+
+int main(int argc, char* argv[]) {
+    unsigned x = 0, y = 0, i = 0;
+    unsigned sx = 4, sy = 4;
+
+    i = 6;
+    [x, y] = offset_to_index(6, [sx, sy]);
+
+    sout | x | ' ' | y;
+
+    return 0;
+}
Index: tests/zombies/Initialization.c
===================================================================
--- tests/zombies/Initialization.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Initialization.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,41 @@
+// Cforall extensions
+
+int * x11 = 0, x12 = 0;
+int * x21 = 0, x22 = 0;
+
+[20] int y1, y2 = { 1, 2, 3 };
+
+// designators
+
+struct {
+	[int] w;
+} a = { .w : [2] };
+
+struct { int a[3], b; } w [] = { [0].a : {1}, [0].b : 3, [1].a[0] : 2 };
+
+struct {
+	int f1, f2, f3;
+	struct { int g1, g2, g3; } f4[4];
+} v7 = {
+  .f1 : 4,
+  f2 : 3,
+  .f4[2] : {
+	  .g1 : 3,
+	  g3 : 0,
+	},
+  .f4[3].g3 : 7,
+};
+
+struct point { int x; int z; struct {int y1, y2, y3;} y; int w;};
+struct quintet { int v, w, x, y, z;};
+
+int main() {
+	struct point p1 = { x : 3 };
+	struct point p2 = { 3, 4 };
+	struct point p3 = { .[x,z] : 5, y : { .[y3,y1] : 6, 17 } };
+	struct point p4 = { w : 5, 4 };
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: tests/zombies/Initialization2.c
===================================================================
--- tests/zombies/Initialization2.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Initialization2.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,15 @@
+int a = 3;
+struct { int x; int y; } z = { 3, 7 };      /* OK */
+struct { int x; int y; } z1 = { .[x,y]:3 }; /* OK */
+struct { int x; int y; } z2 = { y:3, x:4 }; /* OK */
+struct { int x; struct { int y1; int y2; } y; } z3 = { x:3, y:{y1:4, y2:5} };  /* OK */
+struct { int x; struct { int y1; int y2; } y; } z3 = { y:{y2:9, y1:8}, x:7 };  /* OK */
+struct { int x; struct { int y1; int y2; } y; } z3 = { x:7, {y2:9, y1:8} };  /* OK */
+struct { int x; struct { int y1; int y2; } y; } z3 = { 3, {4, 5} };   /* OK */
+//struct { int x; struct { int y1; int y2; } } z3 = {4, {5,6}};
+//struct { int x; struct { int y1; int y2; } y; } z4 = { y:{4,5}, a:3 };
+//struct { int x; struct { int y1; int y2; } y; } z5 = { a:3, {4,5}};
+//int x[20] = { [10]: 4 };
+struct t { int a, b; };
+struct t x = { b:4, a:3 };
+struct { int x; int y; } z6= {5,6,4};  /* (should be an) error */
Index: tests/zombies/Makefile.example
===================================================================
--- tests/zombies/Makefile.example	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Makefile.example	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,42 @@
+CFA ?= ../driver/cfa-cpp
+CFAOPT ?= -a
+OUTPUT ?= Output
+EXPECT ?= Expect
+OUTPUTDIR ?= ${OUTPUT}${CFAOPT}
+EXPECTDIR ?= ${EXPECT}${CFAOPT}
+EXAMPLES = ${wildcard *.c}
+OUTPUTS = ${addprefix ${OUTPUTDIR}/,${EXAMPLES:.c=.txt}}
+
+#.SILENT :
+
+all :
+	+for opt in -a -e -f -r -s -v ; do \
+	    make test CFAOPT=$${opt} ; \
+	done ; \
+	rm -f core
+
+test : ${OUTPUTS} ${OUTPUTDIR}/report
+
+${OUTPUTDIR}/%.txt : %.c ${CFA} Makefile
+	-${CFA} -n ${CFAOPT} $< > $@ 2>&1
+
+${OUTPUTDIR}/report : ${OUTPUTS} ${EXPECTDIR}
+	rm -f $@
+	echo "===== regression test using cfa-cpp flag ${CFAOPT} ====="
+	@for i in ${OUTPUTS} ; do \
+	     echo "---"`basename $$i`"---" | tee -a $@; \
+	     diff -B -w ${EXPECTDIR}/`basename $$i` $$i | tee -a $@; \
+	done
+
+${OUTPUTS} : | ${OUTPUTDIR}		# order only prerequisite
+
+${OUTPUTDIR} :
+	mkdir -p $@
+
+# remove the expected results directories to generate new ones from the current output
+
+${EXPECTDIR} : | ${OUTPUTS}		# new Expected results ?
+	cp -pr ${OUTPUTDIR} $@
+
+clean :
+	rm -rf ${OUTPUT}-* core
Index: tests/zombies/Members.c
===================================================================
--- tests/zombies/Members.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Members.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,72 @@
+char ?=?( char*, char );
+int ?=?( int*, int );
+float ?=?( float*, float );
+forall( dtype DT ) DT * ?=?( DT**, DT* );
+forall(otype T) lvalue T *?( T* );
+char *__builtin_memcpy();
+
+void a( char );
+void b( int );
+void c( int* );
+void d( float* );
+
+struct a_struct {
+	int a;
+	char a;
+	float a;
+};
+
+union b_struct {
+	int *a;
+	char *a;
+	float *a;
+};
+
+void f() {
+	struct a_struct the_struct;
+	union b_struct the_struct;
+  
+	a( the_struct.a );
+	b( the_struct.a );
+	c( the_struct.a );
+	d( the_struct.a );
+}
+
+struct c_struct {
+	int;
+	char;
+	float;
+};
+
+union d_struct {
+	int*;
+	char*;
+	float*;
+};
+
+void g() {
+	unsigned short x;
+	struct c_struct x;
+	union d_struct x;
+  
+	a( x );	// the 'a' and 'b' calls resolve to the ushort
+	b( x );	// it's debatable whether this is good
+	c( x );
+	d( x );
+}
+
+// make sure that forward declarations work
+
+struct forward;
+
+struct forward *q;
+
+struct forward { int y; };
+
+void h() {
+	q->y;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: tests/zombies/Misc.c
===================================================================
--- tests/zombies/Misc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Misc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,19 @@
+// interesting corner cases
+
+int a;
+int b;
+float b;
+
+void g( int );
+void g( unsigned );
+
+void f( void ) {
+	g( (a, b) );
+	g( (a, a, b) );
+	g( sizeof a );
+	g( sizeof( int ) );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: tests/zombies/MiscError.c
===================================================================
--- tests/zombies/MiscError.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/MiscError.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,16 @@
+int a;
+int b;
+float b;
+
+void g( int );
+
+void f( void ) {
+	g( (b, a) );
+	g( (b, a, b) );
+	g( (a, b, b) );
+	sizeof b;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: tests/zombies/Rank2.c
===================================================================
--- tests/zombies/Rank2.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Rank2.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,20 @@
+int ?=?( int &, int );
+forall(dtype DT) DT * ?=?( DT *&, DT * );
+
+void a() {
+	forall( otype T ) void f( T );
+	void g( forall( otype U ) void p( U ) );
+	g( f );
+}
+
+void g() {
+	void h( int *null );
+	forall( otype T ) T id( T );
+//	forall( dtype T ) T *0;
+//	int 0;
+	h( id( id( id( 0 ) ) ) );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: tests/zombies/Tuple.c
===================================================================
--- tests/zombies/Tuple.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/Tuple.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,73 @@
+int f( int, int );
+int g( int, int, int );
+static
+[ int, int *, * int, int ] h( int a, int b, * int c, [] char d );
+
+struct inner {
+	int f2, f3;
+};
+
+struct outer {
+	int f1;
+	struct inner i;
+	double f4;
+} s, *sp;
+
+const volatile [ int, int ] t1;
+static const [ int, int ] t2;
+const static [ int, const int ] t3;
+
+[ int rc ] printf( * char fmt, ... );
+int printf( char *fmt, ... );
+
+[ short x, unsigned y ] f1( int w ) {
+// 	return [ y, x ] = [ x, y ] = [ w, 23 ];
+}
+
+[ [ int, char, long, int ] r ] g1() {
+	short int x, p;
+	unsigned int y;
+	[ int, int ] z;
+
+	[ x, y, z ] = [ p, f( 17, 18 ), 4, 3 ];
+//	[ x, y, z ] = ([short, unsigned int, [int, int]])([ p, f( 17, 18 ), 4, 3 ]);
+	r = [ x, y, z ];
+}
+
+[ int rc ] main( int argc, ** char argv ) {
+	int a, b, c, d;
+//	struct outer t = { .[ f1, f4 ] : [ 1, 7.0 ] };
+	f( [ 3,5 ] );
+	g( [ 3,5 ], 3 );
+	f( t1 );
+	g( t1, 3 );
+
+//	[ , , , ];						/* empty tuple */
+	[ 3, 5 ];
+	[ a, b ] = 3;
+	[ a, b ] = [ 4.6 ];
+	[ a, b ] = 4.6;
+	[ a, b ] = [ c, d ] = [ 3, 5 ];
+//	[ a, b, [ c ] ] = [ 2, [ a, b ] ];
+	[ a, b, c ] = [ 2, [ a, b ] ];
+	[ a, b ] = 3 > 4 ? [ b, 6 ] : [ 7, 8 ];
+
+	t1 = [ a, b ];
+	t1 = t2 = [ a, b ];
+	[ a, b ] = [ c, d ] = d += c += 1;
+	[ a, b ] = [ c, d ] = t1;
+	[ a, b ] = t1 = [ c, d ];
+	[ a, b ] = t1 = t2 = [ c, d ];
+	t1 = [ 3, 4 ] = [ 3, 4 ] = t1 = [ 3, 4 ];
+
+	s.[ f1, i.[ f2, f3 ], f4 ] = [ 11, 12, 13, 3.14159 ];
+//	s.[ f1, i.[ f2, f3 ], f4 ] = h( 3, 3, (* int)0, "abc" );
+//	[ a, , b, ] = h( 3, 3, 0, "abc" );			/* ignore some results */
+	sp->[ f4, f1 ] = sp->[ f1, f4 ];
+	printf( "expecting 3, 17, 23, 4; got %g, %d, %d, %d\n", s.[ f4, i.[ f3, f2 ], f1 ] );
+	rc = 0;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: tests/zombies/abstype.c
===================================================================
--- tests/zombies/abstype.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/abstype.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,43 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// abstype.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Sep 30 13:55:47 2020
+// Update Count     : 10
+//
+
+otype T | { T x( T ); };
+
+T y( T t ) {
+	T t_instance;
+	return x( t );
+}
+
+forall( otype T ) T *?( T * );
+int ?++( int * );
+int ?=?( int *, int );
+forall( dtype DT ) DT * ?=?( DT **, DT * );
+
+otype U = int *;
+
+U x( U u ) {
+	U u_instance = u;
+	(*u)++;
+	return u;
+}
+
+int *break_abstraction( U u ) {
+	return u;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa abstype.c" //
+// End: //
Index: tests/zombies/constructors.c
===================================================================
--- tests/zombies/constructors.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/constructors.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,70 @@
+#include <fstream.hfa>
+#include <stdlib.hfa>
+
+int main() {
+    // initialize basic structure
+    struct S {
+	int i, j, k;
+    };
+    void ?{}( S & s ) { s.i = 1, s.k = 2; }		// default constructor
+    void ?{}( S & s, int i, int k ) { s.i = i, s.k = k; } // 2 parameter constructor
+    void ?{}( S & s, S c ) { /* s @= c */ s.[i,j,k] = c.[i,j,k]; } // copy constructor
+    void ^?{}( S & s ) { s.i = 0, s.k = 0; }		// default destructor
+    void ^?{}( S & s, int i ) { s.i = i, s.k = i; }	// 1 parameter destructor
+    {
+	S s1;						// default constructor
+	S s2 = { 3, 7 };				// 2 parameter constructor
+	S s3 @= { .k:3, .i:7 };				// 2 parameter C initialization
+	?{}( s3, 2, 5 );				// explicit 2 parameter constructor
+	^?{}( s1 );					// explicit call to default destructor
+    } // implicit call to default destructor for s2, explicit call s1, no call for s3
+    S s4 @= {};						// no default construction
+    (s4){ 2, 5 };					// explicit 2 parameter constructor
+    ^s4{ 3 };						// explicit call to 1 parameter destructor
+
+    // initialize pointer to a basic structure
+
+    void ?{}( S *& s ) { s = malloc(); s->i = 1, (*s).k = 2; } // default constructor
+    void ?{}( S *& s, int i, int k ) { s = malloc(); (*s).i = i, (*s).k = k; } // 2 parameter constructor
+    void ^?{}( S *& s ) { (*s).i = 0, (*s).k = 0; free( s ); &s = 0p; } // default destructor
+    {
+	S * ps1;					// default constructor
+	S * ps2 = { 3, 7 };				// 2 parameter constructor
+	sout | ps1 | ps2;
+
+	S * ps3 @= 0p;					// C initialization
+	S * ps4 @= { 3 };				// no default construction
+	sout | ps3 | ps4;
+
+	?{}( ps3, 2, 5 );				// explicit 2 parameter constructor
+	(ps4){ 2, 5 };					// explicit 2 parameter constructor
+	sout | ps3 | ps4;
+
+	^?{}( ps3 );					// explicit call to default destructor
+	^ps4{};						// explicit call to default destructor
+	sout | ps3 | ps4;
+    } // implicit call to default destructor for ps2 and ps1, checks ordering of explicit destructor calls
+
+    // initialize complex structure
+
+    struct T {
+	struct S s;
+    };
+
+    void ?{}( T & t ) {}	// default constructor => implicitly call constructor for field s
+    void ?{}( T & t, int i, int k ) { (t.s){ i, k }; } // 2 parameter constructor => explicitly call constructor for field s
+    void ?{}( T & t, S c ) { (t.s){ c }; }// 1 parameter constructor => explicitly call copy constructor for field s
+    void ^?{}( T & s ) {}	// destructor => implicitly call destructor for field s
+    void ^?{}( T & s, int i ) {}// destructor => implicitly call destructor for field s
+    {
+	S s;						// default constructor
+	T t1;						// default constructor
+	T t2 = { s };					// 1 parameter constructor
+	^?{}( t1, 3 );					// explicit call to default destructor => implicit call to t1.s's destructor
+	T t3;						// default constructor
+	T t4 @= { { 1, 3 } };				// C initialization
+	(t4){ 2, 5 };					// explicit 2 parameter constructor
+    } // implicit call to default destructor for t2 and implicit call for s;
+
+    T *pt = malloc(){ 3, 4 };	// common usage
+} // implicit call to default destructor for t3
Index: tests/zombies/forward.c
===================================================================
--- tests/zombies/forward.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/forward.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,29 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// forward.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed May 27 18:11:57 2015
+// Update Count     : 2
+//
+
+forall(type T) lvalue T *?( T* );
+int ?=?( int*, int );
+
+struct q { int y; };
+struct q *x;
+
+void f() {
+	*x;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa forward.c" //
+// End: //
Index: tests/zombies/gc_no_raii/.gitignore
===================================================================
--- tests/zombies/gc_no_raii/.gitignore	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/.gitignore	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,4 @@
+.tags
+.tags*
+gc-test
+build/
Index: tests/zombies/gc_no_raii/bug-repro/assert.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/assert.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/assert.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,16 @@
+struct gc_object_header{
+ int size;
+};
+
+struct gc_state;
+
+inline _Bool needs_collect(gc_state* state) {
+ return state->used_space > 0;
+}
+
+struct gc_object_header* gc_get_object_for_ref();
+
+inline gc_object_header* gc_get_object_ptr(void* ptr)
+{
+ return 0;
+}
Index: tests/zombies/gc_no_raii/bug-repro/blockers/explicit_cast.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/blockers/explicit_cast.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/blockers/explicit_cast.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,22 @@
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct gcpointer_t
+{
+	intptr_t ptr;
+	struct gcpointer_t* next;
+};
+
+forall(otype T)
+struct gcpointer
+{
+	gcpointer_t internal;
+};
+
+forall(otype T)
+static inline gcpointer(T) gcmalloc()
+{
+    gcpointer(T) test;
+    return test;
+}
Index: tests/zombies/gc_no_raii/bug-repro/blockers/file_scope.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/blockers/file_scope.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/blockers/file_scope.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,18 @@
+
+#include <stdbool.h>
+#include <stdlib.hfa>
+
+#define POOL_SIZE_EXP 24
+#define POOL_SIZE_BYTES 0x1 << POOL_SIZE_EXP
+#define POOL_PTR_MASK ~(POOL_SIZE_BYTES - 1)
+
+#define CARDS_SIZE_EXP 12
+#define CARDS_SIZE_BYTES 0x1 << CARDS_SIZE_EXP
+#define CARDS_OFFSET_MASK (~(CARDS_SIZE_BYTES - 1)) & (POOL_SIZE_BYTES - 1)
+#define CARDS_COUNT POOL_SIZE_BYTES / CARDS_SIZE_BYTES
+
+struct card_table_t
+{
+	size_t count;
+	void* cards_start[CARDS_COUNT];
+};
Index: tests/zombies/gc_no_raii/bug-repro/blockers/recursive_realloc.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/blockers/recursive_realloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/blockers/recursive_realloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,21 @@
+
+#include <stdbool.h>
+#include <stdlib.hfa>
+
+trait allocator_c(otype T, otype allocator_t)
+{
+	void realloc(allocator_t* const, size_t);
+};
+
+forall(otype T)
+struct heap_allocator
+{
+	T* storage;
+	size_t capacity;
+};
+
+forall(otype T)
+inline void realloc(heap_allocator(T) *const this, size_t size)
+{
+	this->storage = (T*)realloc((void*)this->storage, this->capacity);
+}
Index: tests/zombies/gc_no_raii/bug-repro/crash.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/crash.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/crash.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,6 @@
+
+void f()
+{
+ void* obj;
+ (void)obj;
+}
Index: tests/zombies/gc_no_raii/bug-repro/deref.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/deref.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/deref.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,19 @@
+    forall(otype T)
+    struct wrap
+    {
+        T val;
+    };
+
+    forall(otype T)
+    T *? (wrap(T) rhs)
+    {
+        return rhs.val;
+    }
+
+    int main(int argc, char const *argv[])
+    {
+        wrap(int) test;
+        test.val = 3;
+        int i = *test;
+        return 0;
+    }
Index: tests/zombies/gc_no_raii/bug-repro/field.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/field.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/field.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,130 @@
+extern "C" {
+#include <stdbool.h>
+#include <stdint.h>
+}
+
+#include <stdlib.hfa>
+
+//------------------------------------------------------------------------------
+//Declaration
+trait allocator_c(otype T, otype allocator_t)
+{
+	void ctor(allocator_t* const);
+	void dtor(allocator_t* const);
+	void realloc(allocator_t* const, size_t);
+	T* data(allocator_t* const);
+};
+
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+struct vector
+{
+	allocator_t storage;
+	size_t size;
+};
+
+int global = 3;
+
+struct card_table_t
+{
+	size_t count;
+	void* cards_start[100];
+};
+
+static inline void ctor(card_table_t* const this)
+{
+	this->count = 0;
+}
+
+struct gc_memory_pool
+{
+	struct memory_pool* mirror;
+	struct memory_pool* next;
+
+	uint8_t type_code;
+
+	card_table_t* cards;
+
+	uint8_t* end_p;
+	uint8_t* free_p;
+	uint8_t start_p[1];
+};
+
+void ctor(	gc_memory_pool *const this,
+		size_t size,
+		gc_memory_pool* next,
+		gc_memory_pool* mirror,
+		uint8_t type
+	);
+
+void dtor(gc_memory_pool *const this);
+
+struct gc_pool_object_iterator
+{
+	struct gc_object_header* object;
+	#ifndef NDEBUG
+		intptr_t lower_limit;
+		intptr_t upper_limit;
+	#endif
+};
+
+void ctor(
+		gc_pool_object_iterator* const this,
+		void* start_object
+		#ifndef NDEBUG
+			, intptr_t pool_start
+			, intptr_t pool_end
+		#endif
+	);
+
+bool ?!=?(const gc_pool_object_iterator lhs, const gc_pool_object_iterator rhs);
+
+gc_pool_object_iterator begin(gc_memory_pool* const this);
+gc_pool_object_iterator end(gc_memory_pool* const);
+
+gc_pool_object_iterator* ++?(gc_pool_object_iterator* it);
+
+const void* *?(const gc_pool_object_iterator it);
+void* *?(gc_pool_object_iterator it);
+
+static inline bool gc_pool_is_from_space(const gc_memory_pool* pool)
+{
+	return false;
+}
+
+void gc_reset_pool(gc_memory_pool* const pool);
+
+static inline size_t gc_pool_size_used(const gc_memory_pool* pool)
+{
+	return pool->free_p - pool->start_p;
+}
+
+static inline size_t gc_pool_size_total(const gc_memory_pool* pool)
+{
+	return pool->end_p - pool->start_p;
+}
+
+static inline size_t gc_pool_size_left(const gc_memory_pool* pool)
+{
+	return pool->end_p - pool->free_p;
+}
+
+void* gc_pool_allocate(gc_memory_pool* const pool, size_t size, bool zero);
+
+gc_pool_object_iterator gc_pool_iterator_for(gc_memory_pool* const pool, void* member);
+
+void ctor(gc_memory_pool *const this, size_t size, gc_memory_pool* next, gc_memory_pool* mirror, uint8_t type)
+{
+	this->mirror = mirror;
+	this->next = next;
+	this->type_code = type;
+
+	this->cards = malloc();
+	ctor(this->cards);
+
+	this->end_p = ((uint8_t*)this) + size;
+	this->free_p = this->start_p;
+
+	// check(gc_pool_of(this) == this);
+	// check(this->cards);
+	// gc_reset_pool(this);
+}
Index: tests/zombies/gc_no_raii/bug-repro/find.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/find.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/find.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,10 @@
+
+void main()
+{
+	int a[3] = {1, 2, 3};
+	int* begin = a;
+	int *const end = begin + 3;
+
+	int* f = find(begin, &end, 2);
+
+}
Index: tests/zombies/gc_no_raii/bug-repro/inline.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/inline.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/inline.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,8 @@
+inline _Bool test(int t){
+	return t == 3;
+}
+
+int main()
+{
+	test(6);
+}
Index: tests/zombies/gc_no_raii/bug-repro/malloc.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/malloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/malloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,33 @@
+forall(otype T)
+struct wrapper
+{
+    T val;
+};
+
+forall(otype T)
+void ctor(wrapper(T)* this)
+{
+    this->val = 0;
+}
+
+forall(otype T)
+wrapper(T) gcmalloc()
+{
+    wrapper(T) w;
+    ctor(&w);
+    return w;
+}
+
+forall(otype T)
+wrapper(T)* ?=? (wrapper(T)* lhs, wrapper(T)* rhs)
+{
+    lhs->val = rhs->val;
+    return lhs;
+}
+
+int main(int argc, char *argv[])
+{
+    wrapper(int) test;
+    test = gcmalloc();
+    return 0;
+}
Index: tests/zombies/gc_no_raii/bug-repro/not_equal.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/not_equal.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/not_equal.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,10 @@
+
+struct pointer_t
+{
+	void* p;
+};
+
+_Bool operator_not_equal_p(pointer_t* lhs, pointer_t* rhs)
+{
+	return lhs->p == rhs->p;
+}
Index: tests/zombies/gc_no_raii/bug-repro/oddtype.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/oddtype.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/oddtype.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,13 @@
+forall(dtype T)
+struct wrap {
+	int i;
+};
+
+forall(otype T) void ?{}(wrap(T)* this) {}
+forall(otype T) void ?=?(wrap(T)* this) {}
+forall(otype T) void ^?{}(wrap(T)* this) {}
+
+struct List_t {
+	int val;
+	wrap(List_t) next;
+};
Index: tests/zombies/gc_no_raii/bug-repro/push_back.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/push_back.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/push_back.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,16 @@
+#include <stddef.h>
+#include <stdint.h>
+
+#include "push_back.h"
+
+typedef vector(intptr_t*, heap_allocator(intptr_t*)) worklist_t;
+
+void test()
+{
+	worklist_t w;
+	if(!empty(&w))
+	{
+		intptr_t zero = 0;
+		push_back(&w, &zero);
+	}
+}
Index: tests/zombies/gc_no_raii/bug-repro/push_back.h
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/push_back.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/push_back.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,72 @@
+//------------------------------------------------------------------------------
+//Declaration
+trait allocator_c(otype T, otype allocator_t) {
+	void ctor(allocator_t* const);
+	void dtor(allocator_t* const);
+	void realloc(allocator_t* const, size_t);
+	T* data(allocator_t* const);
+};
+
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+struct vector
+{
+	allocator_t storage;
+	size_t size;
+};
+
+//------------------------------------------------------------------------------
+//Initialization
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+void vector_ctor(vector(T, allocator_t) *const this);
+
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+void dtor(vector(T, allocator_t) *const this);
+
+//------------------------------------------------------------------------------
+//Allocator
+forall(otype T)
+struct heap_allocator
+{
+	T* storage;
+	size_t capacity;
+};
+
+forall(otype T)
+void ctor(heap_allocator(T) *const this);
+
+forall(otype T)
+void dtor(heap_allocator(T) *const this);
+
+forall(otype T)
+void realloc(heap_allocator(T) *const this, size_t size);
+
+forall(otype T)
+inline T* data(heap_allocator(T) *const this)
+{
+	return this->storage;
+}
+
+//------------------------------------------------------------------------------
+//Capacity
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+inline bool empty(vector(T, allocator_t) *const this)
+{
+	return this->size == 0;
+}
+
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+inline bool size(vector(T, allocator_t) *const this)
+{
+	return this->size;
+}
+
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+inline void reserve(vector(T, allocator_t) *const this, size_t size)
+{
+	realloc(&this->storage, this->size+1);
+}
+
+//------------------------------------------------------------------------------
+//Modifiers
+forall(otype T, otype allocator_t | allocator_c(T, allocator_t))
+void push_back(vector(T, allocator_t) *const this, T value);
Index: tests/zombies/gc_no_raii/bug-repro/realloc.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/realloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/realloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,13 @@
+void* realloc(void*, unsigned long int);
+
+forall(otype T)
+struct wrap
+{
+	T* val;
+};
+
+forall(otype T)
+static inline void realloc(wrap(T) *const this, unsigned long int size)
+{
+	this->val = (T*)realloc((void*)this->val, size);
+}
Index: tests/zombies/gc_no_raii/bug-repro/return.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/return.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/return.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,27 @@
+forall(otype T)
+struct wrapper
+{
+	T value;
+};
+
+forall(otype T)
+wrapper(T) create()
+{
+	wrapper(T) test;
+	return test;
+}
+
+forall(otype T)
+wrapper(T)* ?=?(wrapper(T)* lhs, wrapper(T)* rhs)
+{
+	lhs->value = rhs->value;
+	return lhs;
+}
+
+
+int main(int argc, char const *argv[])
+{
+	wrapper(int) test;
+	test = create();
+	return 0;
+}
Index: tests/zombies/gc_no_raii/bug-repro/return_template.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/return_template.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/return_template.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,17 @@
+forall(otype T)
+struct wrap
+{
+	T value;
+};
+
+forall(otype T) void ?{}(wrap(T)* this);
+forall(otype T) void ?{}(wrap(T)* this, wrap(T)* rhs);
+forall(otype T) void ^?{}(wrap(T)* this);
+forall(otype T) void ?=?(wrap(T)* this, wrap(T)* rhs);
+
+forall(otype T)
+wrap(T) test()
+{
+	wrap(T) tester;
+	return tester;
+}
Index: tests/zombies/gc_no_raii/bug-repro/slow_malloc.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/slow_malloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/slow_malloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,20 @@
+#include <stdlib.hfa>
+
+forall(otype T)
+struct heap_allocator
+{
+	T* storage;
+	size_t capacity;
+};
+
+struct card_table_t
+{
+	unsigned long int count;
+	void* cards_start[1000];
+};
+
+int main(int argc, char const *argv[])
+{
+	card_table_t* t = (card_table_t*)malloc(sizeof(card_table_t));
+	return 0;
+}
Index: tests/zombies/gc_no_raii/bug-repro/static_const_local.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/static_const_local.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/static_const_local.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,6 @@
+typedef unsigned long long size_t;
+
+int main(int argc, char const *argv[]) {
+	static const size_t GROWTH_RATE = 2;
+	return 0;
+}
Index: tests/zombies/gc_no_raii/bug-repro/test-assert.cpp
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/test-assert.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/test-assert.cpp	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,9 @@
+#include <cassert>
+#include "../src/tools/checks.h"
+
+int main(int argc, char* argv[])
+{
+	//check(false);
+	assert(false);
+	return 0;
+}
Index: tests/zombies/gc_no_raii/bug-repro/void_pointer.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/void_pointer.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/void_pointer.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,18 @@
+#include <stddef.h>
+#include <stdint.h>
+
+inline void* test(intptr_t address)
+{
+	return (void*)address;
+}
+
+//inline void* test2(void* address)
+//{
+//	return address & 0xFF;
+//}
+
+// inline int test()
+// {
+// 	void* d = 0;
+// 	return (int)d;
+// }
Index: tests/zombies/gc_no_raii/bug-repro/while.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/while.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/while.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,14 @@
+extern void* get_member();
+extern void* get_next();
+
+void main()
+{
+	void* member = get_member();
+	void* start_obj = get_next();
+
+	do
+	{
+		start_obj = (void*) ( ((unsigned long int)start_obj) + sizeof(void*) );
+	}
+	while(start_obj > member || !(start_obj) );
+}
Index: tests/zombies/gc_no_raii/bug-repro/zero.c
===================================================================
--- tests/zombies/gc_no_raii/bug-repro/zero.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/bug-repro/zero.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,25 @@
+forall(otype T)
+struct wrap
+{
+    T val;
+};
+
+forall(otype T)
+int ?==? (wrap(T) lhs, wrap(T) rhs)
+{
+    return 0;
+}
+
+/*
+struct wrap(int) 0;
+/*/
+forall(otype T)
+struct wrap(T) 0;
+//*/
+
+int main(int argc, char const *argv[])
+{
+    wrap(int) test;
+    if(test == 0) { return 1; }
+    return 0;
+}
Index: tests/zombies/gc_no_raii/pool-alloc/allocate-malign.c
===================================================================
--- tests/zombies/gc_no_raii/pool-alloc/allocate-malign.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/pool-alloc/allocate-malign.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,30 @@
+/*
+ * Allocation functions (posix_malign)
+ *
+ * Copyright (c) 2014, 2015 Gregor Richards
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+static void *allocPool(size_t size, int mustSucceed)
+{
+    void *ret;
+    if ((errno = posix_memalign(&ret, size, size))) {
+        if (mustSucceed) {
+            perror("posix_memalign");
+            abort();
+        }
+        return NULL;
+    }
+    return ret;
+}
Index: tests/zombies/gc_no_raii/pool-alloc/allocate-malloc.c
===================================================================
--- tests/zombies/gc_no_raii/pool-alloc/allocate-malloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/pool-alloc/allocate-malloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,53 @@
+/*
+ * Allocation functions (malloc)
+ *
+ * Copyright (c) 2014, 2015 Gregor Richards
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+static void *allocPool(int mustSucceed)
+{
+    static ggc_mutex_t poolLock = GGC_MUTEX_INITIALIZER;
+    static unsigned char *space = NULL, *spaceEnd = NULL;
+    void *ret;
+
+    /* do we already have some available space? */
+    ggc_mutex_lock_raw(&poolLock);
+    if (!space || space + GGGGC_POOL_BYTES > spaceEnd) {
+        ggc_size_t i;
+
+        /* since we can't pre-align, align by getting as much as we can manage */
+        for (i = 16; i >= 2; i /= 2) {
+            space = malloc(GGGGC_POOL_BYTES * i);
+            if (space) break;
+        }
+        if (!space) {
+            if (mustSucceed) {
+                perror("malloc");
+                abort();
+            }
+            return NULL;
+        }
+        spaceEnd = space + GGGGC_POOL_BYTES * i;
+
+        /* align it */
+        space = (unsigned char *) GGGGC_POOL_OF(space + GGGGC_POOL_BYTES - 1);
+    }
+
+    ret = (struct GGGGC_Pool *) space;
+    space += GGGGC_POOL_BYTES;
+    ggc_mutex_unlock(&poolLock);
+
+    return ret;
+}
Index: tests/zombies/gc_no_raii/pool-alloc/allocate-mmap.c
===================================================================
--- tests/zombies/gc_no_raii/pool-alloc/allocate-mmap.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/pool-alloc/allocate-mmap.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,44 @@
+/*
+ * Allocation functions (mmap)
+ *
+ * Copyright (c) 2014, 2015 Gregor Richards
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+static void *allocPool(int mustSucceed)
+{
+    unsigned char *space, *aspace;
+    struct GGGGC_Pool *ret;
+
+    /* allocate enough space that we can align it later */
+    space = mmap(NULL, GGGGC_POOL_BYTES*2, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+    if (space == NULL) {
+        if (mustSucceed) {
+            perror("mmap");
+            abort();
+        }
+        return NULL;
+    }
+
+    /* align it */
+    ret = GGGGC_POOL_OF(space + GGGGC_POOL_BYTES - 1);
+    aspace = (unsigned char *) ret;
+
+    /* free unused space */
+    if (aspace > space)
+        munmap(space, aspace - space);
+    munmap(aspace + GGGGC_POOL_BYTES, space + GGGGC_POOL_BYTES - aspace);
+
+    return ret;
+}
Index: tests/zombies/gc_no_raii/pool-alloc/allocate-win-valloc.c
===================================================================
--- tests/zombies/gc_no_raii/pool-alloc/allocate-win-valloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/pool-alloc/allocate-win-valloc.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,45 @@
+/*
+ * Allocation functions (mmap)
+ *
+ * Copyright (c) 2014, 2015 Gregor Richards
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+static void *allocPool(int mustSucceed)
+{
+    unsigned char *space, *aspace;
+    struct GGGGC_Pool *ret;
+
+    /* allocate enough space that we can align it later */
+    space = (unsigned char *)
+        VirtualAlloc(NULL, GGGGC_POOL_BYTES*2, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
+    if (space == NULL) {
+        if (mustSucceed) {
+            perror("mmap");
+            abort();
+        }
+        return NULL;
+    }
+
+    /* align it */
+    ret = GGGGC_POOL_OF(space + GGGGC_POOL_BYTES - 1);
+    aspace = (unsigned char *) ret;
+
+    /* free unused space */
+    if (aspace > space)
+        VirtualFree(space, aspace - space, MEM_RELEASE);
+    VirtualFree(aspace + GGGGC_POOL_BYTES, space + GGGGC_POOL_BYTES - aspace, MEM_RELEASE);
+
+    return ret;
+}
Index: tests/zombies/gc_no_raii/premake4.lua
===================================================================
--- tests/zombies/gc_no_raii/premake4.lua	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/premake4.lua	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,82 @@
+#!lua
+
+-- Additional Linux libs: "X11", "Xxf86vm", "Xi", "Xrandr", "stdc++"
+
+includeDirList = {
+	"src/",
+	"../"
+}
+
+libDirectories = {
+
+}
+
+
+if os.get() == "linux" then
+    linkLibs = {
+
+    }
+end
+
+-- Build Options:
+buildOptions = {
+      "-g",
+	"-DTEST_FILE=${test}",
+      "\n  test = gctest",
+	"\n  CC = cfa\n  CXX = cfa", }
+
+solution "GC-no-RAII"
+	configurations  { "debug", "release",
+				"cproc-debug", "cproc-release",
+				"cfa-debug", "cfa-release" }
+
+	project "gc-test"
+		kind "ConsoleApp"
+		language "C"
+		location "build"
+		objdir "build"
+		targetdir "."
+		buildoptions (buildOptions)
+		defines {	"bool=_Bool",
+				"\"true=((_Bool)(const signed int)1)\"",
+				"\"false=((_Bool)(const signed int)0)\"",
+				"_GNU_SOURCE",
+				"__cforall"
+			}
+		libdirs (libDirectories)
+		links (linkLibs)
+		linkoptions (linkOptionList)
+		includedirs (includeDirList)
+		files { "src/**.c", "containers/**.c" }
+
+	configuration "debug"
+		defines { "DEBUG" }
+		flags { "Symbols" }
+
+	configuration "release"
+		defines { "NDEBUG" }
+		flags { "Optimize" }
+
+	configuration "cproc-debug"
+		buildoptions ({"-E"})
+		linkoptions ({"-E"})
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
+
+	configuration "cproc-release"
+		buildoptions ({"-E"})
+		linkoptions ({"-E"})
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
+
+	configuration "cfa-debug"
+		linkoptions ({"-E"})
+		files { "build/cproc-debug/*.o" }
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
+
+	configuration "cfa-release"
+		linkoptions ({"-E"})
+		files { "build/cproc-debug/*.o" }
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
Index: tests/zombies/gc_no_raii/src/allocate-pool.c
===================================================================
--- tests/zombies/gc_no_raii/src/allocate-pool.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/allocate-pool.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,64 @@
+#define _BSD_SOURCE /* for MAP_ANON */
+#define _DARWIN_C_SOURCE /* for MAP_ANON on OS X */
+
+#ifdef __cforall
+extern "C"{
+#else
+#error missing cfa define
+#endif
+
+/* for standards info */
+#if defined(unix) || defined(__unix) || defined(__unix__) || \
+    (defined(__APPLE__) && defined(__MACH__))
+#include <unistd.h>
+#endif
+
+#if defined(_WIN32)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#if _POSIX_VERSION
+#include <sys/mman.h>
+#endif
+
+/* figure out which allocator to use */
+#if defined(GGGGC_USE_MALLOC)
+#define GGGGC_ALLOCATOR_MALLOC 1
+#include "../pool-alloc/allocate-malloc.c"
+
+#elif _POSIX_ADVISORY_INFO >= 200112L
+#define GGGGC_ALLOCATOR_POSIX_MEMALIGN 1
+#include "../pool-alloc/allocate-malign.c"
+
+#elif defined(MAP_ANON)
+#define GGGGC_ALLOCATOR_MMAP 1
+#include "../pool-alloc/allocate-mmap.c"
+
+#elif defined(_WIN32)
+#define GGGGC_ALLOCATOR_VIRTUALALLOC 1
+#include "../pool-alloc/allocate-win-valloc.c"
+
+#else
+#warning GGGGC: No allocator available other than malloc!
+#define GGGGC_ALLOCATOR_MALLOC 1
+#include "../pool-alloc/allocate-malloc.c"
+
+#endif
+
+void* pal_allocPool(size_t size, int mustSucceed)
+{
+      return allocPool(size, mustSucceed);
+}
+
+#ifdef __cforall
+}
+#endif
Index: tests/zombies/gc_no_raii/src/allocate-pool.h
===================================================================
--- tests/zombies/gc_no_raii/src/allocate-pool.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/allocate-pool.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,14 @@
+#ifndef _GGGGC_ALlOCATE_POOL_H_
+#define _GGGGC_ALlOCATE_POOL_H_
+
+#ifdef __cforall
+extern "C" {
+#endif
+
+void* pal_allocPool(size_t size, int mustSucceed);
+
+#ifdef __cforall
+}
+#endif
+
+#endif
Index: tests/zombies/gc_no_raii/src/gc.h
===================================================================
--- tests/zombies/gc_no_raii/src/gc.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/gc.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "gcpointers.h"
+#include "internal/collector.h"
+
+// forall(otype T)
+// static inline gcpointer(T) gcmalloc()
+// {
+//     gcpointer(T) ptr = { gc_allocate(sizeof(T)) };
+//     ptr{};
+//     gc_conditional_collect();
+//     return ptr;
+// }
+
+forall(otype T)
+static inline void gcmalloc(gcpointer(T)* ptr)
+{
+	ptr { gc_allocate(sizeof(T)) };
+	get(ptr) {};
+      gc_conditional_collect();
+}
Index: tests/zombies/gc_no_raii/src/gcpointers.c
===================================================================
--- tests/zombies/gc_no_raii/src/gcpointers.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/gcpointers.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,147 @@
+#include "gcpointers.h"
+
+// #include "gc.h"
+#include "internal/collector.h"
+#include "internal/object_header.h"
+#include "internal/state.h"
+
+void register_ptr(gcpointer_t* this)
+{
+	if(gcpointer_null(this)) return;
+
+	if(gc_is_managed(this))
+	{
+		gc_object_header* obj = gc_get_object_for_ref(gc_get_state(), (void*)this);
+		check(obj);
+		check(is_valid(obj));
+		check(gc_is_managed(this) == gc_is_managed(obj->type_chain) || !obj->type_chain);
+		this->next = obj->type_chain;
+		obj->type_chain = this;
+		check(is_valid(obj));
+	}
+	else
+	{
+		gc_object_header* obj = gc_get_object_ptr((void*)this->ptr);
+		check(obj);
+		check(is_valid(obj));
+		check(!obj->root_chain || this->ptr == obj->root_chain->ptr);
+		check(!obj->root_chain || gc_is_managed(this) == gc_is_managed(obj->root_chain));
+		this->next = obj->root_chain;
+		obj->root_chain = this;
+		check(is_valid(obj));
+	}
+}
+
+void unregister_ptr(gcpointer_t* this)
+{
+	if(gcpointer_null(this)) return;
+
+	gcpointer_t** prev_next_ptr = gc_find_previous_ref(this);
+	check((*prev_next_ptr) == this);
+
+	(*prev_next_ptr) = this->next;
+}
+
+void ?{}(gcpointer_t* this)
+{
+	this->ptr = (intptr_t)NULL;
+	this->next = NULL;
+}
+
+void ?{}(gcpointer_t* this, void* address)
+{
+	this->ptr = (intptr_t)address;
+	this->next = NULL;
+
+	register_ptr(this);
+}
+
+void ?{}(gcpointer_t* this, gcpointer_t other)
+{
+	this->ptr = other.ptr;
+	this->next = NULL;
+
+	register_ptr(this);
+}
+
+void ^?{}(gcpointer_t* this)
+{
+	unregister_ptr(this);
+}
+
+gcpointer_t ?=?(gcpointer_t* this, gcpointer_t rhs)
+{
+	unregister_ptr(this);
+	this->ptr = rhs.ptr;
+	register_ptr(this);
+
+	return *this;
+}
+
+//Logical operators
+bool gcpointer_equal(const gcpointer_t* this, const gcpointer_t* rhs)
+{
+	return this->ptr == rhs->ptr;
+}
+
+bool gcpointer_not_equal(const gcpointer_t* this, const gcpointer_t* rhs)
+{
+	return this->ptr != rhs->ptr;
+}
+
+bool gcpointer_null(const gcpointer_t* this)
+{
+	return this->ptr == (intptr_t)NULL;
+}
+
+#ifndef NDEBUG
+	bool is_valid(const gcpointer_t* this) {
+		if(gcpointer_null(this)) return true;
+
+		gc_object_header* obj = gc_get_object_ptr((void*)this->ptr);
+		check(obj);
+		check(is_valid(obj));
+		check(!obj->root_chain || this->ptr == obj->root_chain->ptr);
+
+		if( !gc_is_managed(this))
+		{
+			check( !(this->next) || this->ptr == this->next->ptr );
+		}
+
+		return true;
+	}
+#endif
+
+forall(otype T) void ?{}(gcpointer(T)* this) {
+	(&this->internal) {};
+}
+
+forall(otype T) void ?{}(gcpointer(T)* this, void* address) {
+	(&this->internal) { address };
+}
+
+forall(otype T) void ?{}(gcpointer(T)* this, gcpointer(T) other) {
+	(&this->internal) { other.internal };
+}
+
+forall(otype T) void ^?{}(gcpointer(T)* this) {
+	^?{}(&this->internal);
+}
+
+forall(otype T) gcpointer(T) ?=?(gcpointer(T)* this, gcpointer(T) rhs) {
+	this->internal = rhs.internal;
+	return *this;
+}
+//
+// forall(otype T) T *?(gcpointer(T) this);
+
+forall(otype T) T* get(gcpointer(T)* this) {
+	return (T*)this->internal.ptr;
+}
+//
+// //Logical operators
+forall(otype T) int ?!=?(gcpointer(T) this, int zero) {
+	return this.internal.ptr != 0;
+}
+// forall(otype T) int ?!=?(gcpointer(T) this, gcpointer(T) rhs);
+// forall(otype T) int ?==?(gcpointer(T) this, gcpointer(T) rhs);
Index: tests/zombies/gc_no_raii/src/gcpointers.h
===================================================================
--- tests/zombies/gc_no_raii/src/gcpointers.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/gcpointers.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+forall(dtype T)
+struct gcpointer;
+
+struct gcpointer_t
+{
+	intptr_t ptr;
+	struct gcpointer_t* next;
+};
+
+void ?{}(gcpointer_t* this);
+void ?{}(gcpointer_t* this, void* address);
+void ?{}(gcpointer_t* this, gcpointer_t other);
+void ^?{}(gcpointer_t* this);
+gcpointer_t ?=?(gcpointer_t* this, gcpointer_t rhs);
+
+//Logical operators
+bool gcpointer_equal(gcpointer_t* this, gcpointer_t* rhs);
+bool gcpointer_not_equal(gcpointer_t* this, gcpointer_t* rhs);
+bool gcpointer_null(const gcpointer_t* this);
+
+
+#ifndef NDEBUG
+	bool is_valid(const gcpointer_t* this);
+#endif
+
+forall(dtype T)
+struct gcpointer
+{
+	gcpointer_t internal;
+};
+
+//
+forall(otype T) void ?{}(gcpointer(T)* this);
+forall(otype T) void ?{}(gcpointer(T)* this, void* address);
+forall(otype T) void ?{}(gcpointer(T)* this, gcpointer(T) other);
+forall(otype T) void ^?{}(gcpointer(T)* this);
+forall(otype T) gcpointer(T) ?=?(gcpointer(T)* this, gcpointer(T) rhs);
+
+
+// forall(otype T) T *?(gcpointer(T) this);
+forall(otype T) T* get(gcpointer(T)* this);
+
+//Logical operators
+forall(otype T) int ?!=?(gcpointer(T) this, int zero);
+forall(otype T) int ?!=?(gcpointer(T) this, gcpointer(T) rhs);
+forall(otype T) int ?==?(gcpointer(T) this, gcpointer(T) rhs);
Index: tests/zombies/gc_no_raii/src/internal/card_table.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/card_table.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/card_table.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "globals.h"
+#include "tools.h"
+
+static inline size_t card_of(void* address)
+{
+	size_t card = ( ((intptr_t)address) & CARDS_OFFSET_MASK ) >> CARDS_SIZE_EXP;
+	checkf(card < CARDS_COUNT, (const char*)"%lu %lu = (%lx & %lx) >> %lu\n", (size_t)CARDS_COUNT, (size_t)card, (size_t)address, (size_t)CARDS_OFFSET_MASK, (size_t)CARDS_SIZE_EXP);
+	check(card < CARDS_COUNT);
+	return card;
+}
+
+struct card_table_t
+{
+	size_t count;
+	void* cards_start[CARDS_COUNT];
+};
+
+static inline void ?{}(card_table_t* this)
+{
+	this->count = 0;
+}
+
+static inline void ^?{}(card_table_t* this)
+{
+
+}
+
+static inline void* object_at(card_table_t* const this, size_t card_number)
+{
+	return card_number < this->count ? this->cards_start[card_number] : NULL;
+}
+
+static inline void register_object(card_table_t* const this, void* object)
+{
+	size_t card = card_of(object);
+	if(card < this->count)
+	{
+		intptr_t card_obj_add = (intptr_t)object_at(this, card);
+		intptr_t obj_add = (intptr_t)object;
+		if(card_obj_add > obj_add)
+		{
+			this->cards_start[card] = object;
+		}
+	}
+	else
+	{
+		check(card == this->count);
+		this->count++;
+		this->cards_start[card] = object;
+	}
+}
+
+static inline void reset(card_table_t* const this)
+{
+	for(size_t i = 0; i < this->count; i++)
+	{
+		this->cards_start[i] = NULL;
+	}
+	this->count = 0;
+}
Index: tests/zombies/gc_no_raii/src/internal/collector.c
===================================================================
--- tests/zombies/gc_no_raii/src/internal/collector.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/collector.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,152 @@
+#include "collector.h"
+
+#ifdef __cforall
+extern "C" {
+#endif
+#include <string.h>
+#ifdef __cforall
+}
+#endif
+
+#include <fstream.hfa>
+
+#include "state.h"
+#include "gcpointers.h"
+#include "memory_pool.h"
+
+void* gc_finish_alloc_block(void* block, size_t actual_size, size_t target_size);
+void gc_assign_reference(void** ref, gc_object_header* ptr);
+
+gcpointer_t** gc_find_previous_ref(gcpointer_t* target)
+{
+	if(!(target)) return NULL;
+
+	bool managed = gc_is_managed(target);
+	gc_object_header* obj = gc_get_object_ptr((void*)target->ptr);
+
+	check(is_valid(obj));
+
+	gcpointer_t** prev_next_ptr = managed ? &obj->type_chain : &obj->root_chain;
+	while((*prev_next_ptr) && (*prev_next_ptr) != target)
+	{
+		prev_next_ptr = &(*prev_next_ptr)->next;
+	}
+
+	return prev_next_ptr;
+}
+
+void* gc_allocate(size_t target_size)
+{
+	// sout | "Allocating " | target_size | " bytes";
+
+	size_t size = gc_compute_size(target_size + sizeof(gc_object_header));
+
+	// sout | "Object header size: " | sizeof(gc_object_header) | " bytes";
+	// sout | "Actual allocation size: " | size | " bytes";
+
+	check(size < POOL_SIZE_BYTES);
+
+	void* block = NULL;
+	gc_state* gc = gc_get_state();
+
+	if((intptr_t)(block = gc_try_allocate(gc, size))) return gc_finish_alloc_block(block, size, target_size);
+
+	gc_collect(gc);
+
+	if((intptr_t)(block = gc_try_allocate(gc, size))) return gc_finish_alloc_block(block, size, target_size);
+
+	gc_allocate_pool(gc);
+
+	if((intptr_t)(block = gc_try_allocate(gc, size))) return gc_finish_alloc_block(block, size, target_size);
+
+	checkf( (int) 0, "ERROR: allocation in new pool failed");
+
+	return NULL;
+}
+
+void* gc_finish_alloc_block(void* block, size_t actual_size, size_t target_size)
+{
+	intptr_t data = ((intptr_t)block) + sizeof(gc_object_header);
+	void* header = block;
+
+	check( data > ((intptr_t)block));
+	check( data >= ((intptr_t)header));
+	check( gc_is_aligned( (void*)data ) );
+	check( data + target_size <= ((intptr_t)block) + actual_size );
+
+	gc_object_header* obj = placement_ctor(header, actual_size);
+
+	(void)obj; //remove unsused warning since this is for debug
+	check(obj == gc_get_object_ptr( (void*)data ));
+
+	gc_register_allocation(gc_get_state(), actual_size);
+
+	return (void*)data;
+}
+
+void gc_process_reference(void** ref, worklist_t* worklist)
+{
+	check(!gc_is_in_heap(gc_get_state(), ref));
+
+	gc_object_header* ptr = gc_get_object_ptr(*ref);
+	if(ptr)
+	{
+		if(!ptr->is_forwarded)
+		{
+			gc_copy_object(ptr);
+
+			gc_scan_object(ptr->forward, worklist);
+
+			gc_assign_reference(ref, ptr->forward);
+		}
+		else
+		{
+			//duplication to help debug
+			gc_assign_reference(ref, ptr->forward);
+		}
+	}
+}
+
+void gc_assign_reference(void** ref, gc_object_header* ptr)
+{
+	void* address = (void*)(((intptr_t)ptr) + sizeof(gc_object_header));
+
+	gc_write_aligned_ptr(ref, address);
+}
+
+gc_object_header* gc_copy_object(gc_object_header* ptr)
+{
+	check(!ptr->forward);
+	check(!ptr->is_forwarded);
+	check(gc_pool_is_from_space(gc_pool_of(ptr)));
+
+	gc_memory_pool* pool = gc_pool_of(ptr)->mirror;
+
+	void* new_block = gc_pool_allocate(pool, ptr->size, true);
+
+	memcpy(new_block, ptr, ptr->size);
+
+	gc_object_header* fwd_ptr = placement_copy_ctor(new_block, ptr);
+
+	ptr->forward = fwd_ptr;
+	ptr->is_forwarded = true;
+
+	return fwd_ptr;
+}
+
+void gc_scan_object(gc_object_header* object, worklist_t* worklist)
+{
+	gcpointer_t* field = object->type_chain;
+	while(field)
+	{
+		check(((intptr_t)field) > ((intptr_t)object));
+		check(((intptr_t)field) < ((intptr_t)((intptr_t)object) + object->size));
+
+		check(gc_is_in_to_space(gc_get_state(), &field->ptr));
+
+		intptr_t* ref = &field->ptr;
+		push_back(worklist, ref);
+
+		field = field->next;
+	}
+}
Index: tests/zombies/gc_no_raii/src/internal/collector.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/collector.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/collector.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <stdlib.h>
+
+#include "tools.h"
+//
+#include "gcpointers.h"
+#include "state.h"
+#include "internal/gc_tools.h"
+#include "internal/globals.h"
+#include "internal/object_header.h"
+#include "internal/state.h"
+#include "tools/worklist.h"
+
+static inline bool gc_is_managed(void* address)
+{
+	return gc_is_in_heap(gc_get_state(), address);
+}
+
+static inline gc_object_header* gc_get_object_ptr(void* ptr)
+{
+	void* clean = gc_get_aligned_ptr(ptr);
+	return ((gc_object_header*)clean) - 1;
+}
+
+static inline struct gc_memory_pool* gc_pool_of(void* address)
+{
+	return (struct gc_memory_pool*)(((intptr_t)address) & POOL_PTR_MASK);
+}
+
+static inline void gc_conditional_collect()
+{
+	if(gc_needs_collect(gc_get_state()))
+	{
+		gc_collect(gc_get_state());
+	}
+}
+
+gcpointer_t** gc_find_previous_ref(gcpointer_t* target);
+
+void* gc_allocate(size_t size);
+
+void gc_process_reference(void** ref, worklist_t* worklist);
+
+struct gc_object_header* gc_copy_object(struct gc_object_header* ptr);
+
+void gc_scan_object(struct gc_object_header* object, worklist_t* worklist);
Index: tests/zombies/gc_no_raii/src/internal/gc_tools.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/gc_tools.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/gc_tools.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "tools.h"
+#include "globals.h"
+
+static inline bool gc_is_aligned(void* address)
+{
+	return (((intptr_t)address) & (~OBJECT_PTR_MASK)) == 0;
+}
+
+static inline void* gc_get_aligned_ptr(void* address)
+{
+	return (void*)(((intptr_t)address) & (OBJECT_PTR_MASK));
+}
+
+static inline void* gc_write_aligned_ptr(void** reference, void* address)
+{
+	size_t ref_last_bits = ((intptr_t)*reference) & (~OBJECT_PTR_MASK);
+
+      size_t new_val = ((intptr_t)address) & OBJECT_PTR_MASK;
+
+      (*reference) = (void*)(new_val | ref_last_bits);
+
+	return *reference;
+}
+
+static inline size_t gc_compute_size(size_t size)
+{
+	size_t word_size = ((size - 1) / OBJECT_ALLIGNMENT) + 1;
+	size_t ret = word_size * OBJECT_ALLIGNMENT;
+
+	check(ret >= size);
+	check((ret % OBJECT_ALLIGNMENT) == 0);
+	check( ((size % OBJECT_ALLIGNMENT) != 0) || (ret == size) );
+
+	return ret;
+}
Index: tests/zombies/gc_no_raii/src/internal/globals.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/globals.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/globals.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,30 @@
+#pragma once
+
+// #include <stddef.h>
+// #include <stdint.h>
+//
+// static const size_t POOL_SIZE_EXP = 24;
+// static const size_t POOL_SIZE_BYTES = 0x1 << POOL_SIZE_EXP;
+// static const size_t POOL_PTR_MASK = ~(POOL_SIZE_BYTES - 1);
+//
+// static const size_t CARDS_SIZE_EXP = 12;
+// static const size_t CARDS_SIZE_BYTES = 0x1 << CARDS_SIZE_EXP;
+// static const size_t CARDS_OFFSET_MASK = (~(CARDS_SIZE_BYTES - 1)) & (POOL_SIZE_BYTES - 1);
+// static const size_t CARDS_COUNT = POOL_SIZE_BYTES / CARDS_SIZE_BYTES;
+//
+// static const size_t OBJECT_ALLIGNMENT = sizeof(size_t);
+// static const size_t OBJECT_PTR_MASK = ~(OBJECT_ALLIGNMENT - 1);
+
+enum {
+	POOL_SIZE_EXP 	= 24,
+	POOL_SIZE_BYTES 	= 0x1 << POOL_SIZE_EXP,
+	POOL_PTR_MASK 	= ~(POOL_SIZE_BYTES - 1),
+
+	CARDS_SIZE_EXP 	= 12,
+	CARDS_SIZE_BYTES 	= 0x1 << CARDS_SIZE_EXP,
+	CARDS_OFFSET_MASK	= (~(CARDS_SIZE_BYTES - 1)) & (POOL_SIZE_BYTES - 1),
+	CARDS_COUNT 	= POOL_SIZE_BYTES / CARDS_SIZE_BYTES,
+
+	OBJECT_ALLIGNMENT	= sizeof(size_t),
+	OBJECT_PTR_MASK 	= ~(OBJECT_ALLIGNMENT - 1),
+};
Index: tests/zombies/gc_no_raii/src/internal/memory_pool.c
===================================================================
--- tests/zombies/gc_no_raii/src/internal/memory_pool.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/memory_pool.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,160 @@
+#include "memory_pool.h"
+
+extern "C" {
+	#include <stdlib.h>
+	#include <string.h>
+}
+
+#include "collector.h"
+#include "object_header.h"
+
+const size_t gc_pool_header_size = (size_t)(  &(((gc_memory_pool*)NULL)->start_p) );
+
+void ?{}(gc_memory_pool* this, size_t size, gc_memory_pool* next, gc_memory_pool* mirror, uint8_t type)
+{
+	this->mirror = mirror;
+	this->next = next;
+	this->type_code = type;
+
+	this->cards = ( (card_table_t*)malloc(sizeof(card_table_t)) ){};
+
+	this->end_p = ((uint8_t*)this) + size;
+	this->free_p = this->start_p;
+
+	check( gc_pool_of( (void*)this ) == this);
+	check(this->cards);
+	gc_reset_pool(this);
+}
+
+void ^?{}(gc_memory_pool* this)
+{
+	^(&this->cards){};
+	free(this->cards);
+}
+
+void gc_reset_pool(gc_memory_pool *const this)
+{
+	this->free_p = this->start_p;
+	#ifndef NDEBUG
+		memset(this->start_p, 0xCD, gc_pool_size_total(this));
+	#endif
+
+	check(this->cards);
+	reset(this->cards);
+
+	check(gc_pool_size_left(this) == gc_pool_size_total(this));
+}
+
+void* gc_pool_allocate(gc_memory_pool *const this, size_t size, bool zero)
+{
+	void* ret = this->free_p;
+
+	this->free_p += size;
+
+	if (zero) memset(ret, 0x00, size);
+
+	check(this->cards);
+	register_object(this->cards, ret);
+
+	return ret;
+}
+
+void ?{}(	gc_pool_object_iterator* this,
+		struct gc_object_header* start_object
+		#ifndef NDEBUG
+			, intptr_t pool_start
+			, intptr_t pool_end
+		#endif
+	)
+{
+	this->object = start_object;
+	#ifndef NDEBUG
+		this->lower_limit = pool_start;
+		this->upper_limit = pool_end;
+	#endif
+
+	check( ((intptr_t)start_object) >= this->lower_limit );
+	check( ((intptr_t)start_object) <= this->upper_limit );
+}
+
+void ^?{}( gc_pool_object_iterator* this ) {}
+
+gc_pool_object_iterator gc_pool_iterator_for(gc_memory_pool* const this, void* member)
+{
+	size_t card = card_of(member);
+	intptr_t member_add = (intptr_t)member;
+	intptr_t start_obj;
+
+	do
+	{
+		check(card < CARDS_COUNT);
+		start_obj = (intptr_t)object_at(this->cards, card);
+		check(card != 0 || start_obj);
+		card--;
+	}
+	while(start_obj > member_add || !(start_obj));
+
+	check( start_obj );
+
+	struct gc_object_header* start_obj_typed = (struct gc_object_header*)start_obj;
+
+	return (gc_pool_object_iterator) {
+		start_obj_typed
+		#ifndef NDEBUG
+			, (intptr_t)this->start_p
+			, (intptr_t)this->free_p
+		#endif
+	};
+}
+
+bool ?!=?(const gc_pool_object_iterator lhs, const gc_pool_object_iterator rhs)
+{
+	return lhs.object != rhs.object;
+}
+
+gc_pool_object_iterator begin(gc_memory_pool* const this)
+{
+	struct gc_object_header* start_obj = (struct gc_object_header*)this->start_p;
+	return (gc_pool_object_iterator) {
+		start_obj
+		#ifndef NDEBUG
+			, (intptr_t)this->start_p
+			, (intptr_t)this->free_p
+		#endif
+	};
+}
+
+gc_pool_object_iterator end(gc_memory_pool* const this)
+{
+	return (gc_pool_object_iterator) {
+		(struct gc_object_header*)this->free_p
+		#ifndef NDEBUG
+			, (intptr_t)this->start_p
+			, (intptr_t)this->free_p
+		#endif
+	};
+}
+
+gc_pool_object_iterator* ++?(gc_pool_object_iterator* it)
+{
+	struct gc_object_header* object = it->object;
+	intptr_t next_ptr = ((intptr_t)object) + object->size;
+	check(next_ptr > it->lower_limit);
+	check(next_ptr <= it->upper_limit);
+
+	struct gc_object_header* next_obj = ((struct gc_object_header*)next_ptr);
+	check(next_ptr == it->upper_limit || is_valid(next_obj));
+
+	it->object = next_obj;
+	return it;
+}
+
+const struct gc_object_header* *?(const gc_pool_object_iterator it)
+{
+	return it.object;
+}
+
+struct gc_object_header* *?(gc_pool_object_iterator it)
+{
+	return it.object;
+}
Index: tests/zombies/gc_no_raii/src/internal/memory_pool.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/memory_pool.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/memory_pool.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,92 @@
+#pragma once
+
+extern "C" {
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+}
+
+#include "tools.h"
+
+#include "card_table.h"
+#include "globals.h"
+#include "state.h"
+
+struct gc_memory_pool
+{
+	struct memory_pool* mirror;
+	struct memory_pool* next;
+
+	uint8_t type_code;
+
+	card_table_t* cards;
+
+	uint8_t* end_p;
+	uint8_t* free_p;
+	uint8_t start_p[1];
+};
+
+void ?{}(	gc_memory_pool* this,
+		size_t size,
+		gc_memory_pool* next,
+		gc_memory_pool* mirror,
+		uint8_t type
+	);
+
+void ^?{}(gc_memory_pool* this);
+
+struct gc_pool_object_iterator
+{
+	struct gc_object_header* object;
+	#ifndef NDEBUG
+		intptr_t lower_limit;
+		intptr_t upper_limit;
+	#endif
+};
+
+
+void ?{}( 	gc_pool_object_iterator* this,
+		struct gc_object_header* start_object
+		#ifndef NDEBUG
+			, intptr_t pool_start
+			, intptr_t pool_end
+		#endif
+	);
+
+void ^?{}( gc_pool_object_iterator* this );
+
+bool ?!=?(const gc_pool_object_iterator lhs, const gc_pool_object_iterator rhs);
+
+gc_pool_object_iterator begin(gc_memory_pool* const this);
+gc_pool_object_iterator end(gc_memory_pool* const);
+
+gc_pool_object_iterator* ++?(gc_pool_object_iterator* it);
+
+const struct gc_object_header* *?(const gc_pool_object_iterator it);
+struct gc_object_header* *?(gc_pool_object_iterator it);
+
+static inline bool gc_pool_is_from_space(const gc_memory_pool* pool)
+{
+	return gc_from_space_code(gc_get_state()) == pool->type_code;
+}
+
+void gc_reset_pool(gc_memory_pool* const pool);
+
+static inline size_t gc_pool_size_used(const gc_memory_pool* pool)
+{
+	return pool->free_p - pool->start_p;
+}
+
+static inline size_t gc_pool_size_total(const gc_memory_pool* pool)
+{
+	return pool->end_p - pool->start_p;
+}
+
+static inline size_t gc_pool_size_left(const gc_memory_pool* pool)
+{
+	return pool->end_p - pool->free_p;
+}
+
+void* gc_pool_allocate(gc_memory_pool* const pool, size_t size, bool zero);
+
+gc_pool_object_iterator gc_pool_iterator_for(gc_memory_pool* const pool, void* member);
Index: tests/zombies/gc_no_raii/src/internal/object_header.c
===================================================================
--- tests/zombies/gc_no_raii/src/internal/object_header.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/object_header.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,110 @@
+#include "object_header.h"
+
+#include <stdint.h>
+
+#include "collector.h"
+#include "globals.h"
+#include "gcpointers.h"
+
+void ctor(gc_object_header* const this, size_t inSize)
+{
+	#ifndef NDEBUG
+		this->canary_start = CANARY_VALUE;
+	#endif
+
+	this->size = inSize;
+	this->root_chain = NULL;
+	this->type_chain = NULL;
+	this->forward = NULL;
+	this->is_forwarded = false;
+
+	#ifndef NDEBUG
+		this->canary_end = CANARY_VALUE;
+	#endif
+}
+
+void copy_ctor(gc_object_header* const this, const gc_object_header* const other)
+{
+	#ifndef NDEBUG
+		this->canary_start = CANARY_VALUE;
+	#endif
+
+	this->size = other->size;
+	this->root_chain = other->root_chain;
+	this->type_chain = NULL;
+	this->forward = NULL;
+	this->is_forwarded = false;
+
+	#ifndef NDEBUG
+		this->canary_end = CANARY_VALUE;
+	#endif
+
+	gcpointer_t* root = this->root_chain;
+	while(root)
+	{
+		check(gc_get_object_ptr( (void*)root->ptr ) == other);
+		root->ptr = ((intptr_t)this) + sizeof(gc_object_header);
+
+		check(gc_get_object_ptr( (void*)root->ptr ) == this);
+		root = root->next;
+	}
+
+	gcpointer_t* type = other->type_chain;
+
+	while(type)
+	{
+		check((intptr_t)type < (intptr_t)((intptr_t)other + other->size));
+
+		size_t offset = (intptr_t)type - (intptr_t)other;
+		check(offset < this->size);
+
+		gcpointer_t* member_ptr = (gcpointer_t*)( (intptr_t)this + offset );
+
+		if(!this->type_chain) this->type_chain = member_ptr;
+
+		size_t next_offset = type->next ? (intptr_t)type->next - (intptr_t)other : 0;
+		check(next_offset < this->size);
+
+		gcpointer_t* next_ptr = type->next ? (gcpointer_t*)((intptr_t)this + next_offset) : NULL;
+
+		member_ptr->ptr = type->ptr;
+		member_ptr->next = next_ptr;
+
+		type = type->next;
+	}
+
+	check(is_valid(this));
+}
+
+#ifndef NDEBUG
+	bool is_valid(const gc_object_header* const this)
+	{
+		check((intptr_t)this->canary_start == (intptr_t)CANARY_VALUE);
+		check((intptr_t)this->canary_end == (intptr_t)CANARY_VALUE);
+
+		check(this->is_forwarded == ( (intptr_t)this->forward != (intptr_t)NULL));
+
+		check(this->size < POOL_SIZE_BYTES);
+
+		gcpointer_t* root = this->root_chain;
+		while(root)
+		{
+			checkf(gc_get_object_ptr( (void*)root->ptr ) == this, (const char*)"Expected %lX got %lX\n", gc_get_object_ptr( (void*)root->ptr ), this);
+
+			root = root->next;
+		}
+
+		gcpointer_t* type = this->type_chain;
+		while(type)
+		{
+			check((intptr_t)type > (intptr_t)this);
+			check((intptr_t)type < (intptr_t)(((intptr_t)this) + this->size));
+
+			type = type->next;
+		}
+
+		return true;
+	}
+	#else
+	#error blarg
+#endif
Index: tests/zombies/gc_no_raii/src/internal/object_header.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/object_header.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/object_header.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "tools.h"
+
+#ifndef NDEBUG
+	static void* const CANARY_VALUE = (void*)0xCAFEBABACAFEBABA;
+#endif
+
+struct gcpointer_t;
+struct gc_object_header;
+
+struct gc_object_header
+{
+	#ifndef NDEBUG
+		void* canary_start;
+	#endif
+
+	size_t		size;
+	gcpointer_t* 	root_chain;
+	gcpointer_t*	type_chain;
+	gc_object_header*	forward;
+	bool			is_forwarded;
+
+	#ifndef NDEBUG
+		void* canary_end;
+	#endif
+};
+
+void ctor(gc_object_header* const this, size_t size);
+void copy_ctor(gc_object_header* const this, const gc_object_header* const other);
+
+static inline gc_object_header* placement_ctor(void* address, size_t size)
+{
+	gc_object_header* const this = (gc_object_header* const) address;
+	ctor(this, size);
+	return this;
+}
+
+static inline gc_object_header* placement_copy_ctor(void* address, const gc_object_header* const other)
+{
+	gc_object_header* const this = (gc_object_header* const) address;
+	copy_ctor(this, other);
+	return this;
+}
+
+#ifndef NDEBUG
+	bool is_valid(const gc_object_header* const this);
+#endif
Index: tests/zombies/gc_no_raii/src/internal/state.c
===================================================================
--- tests/zombies/gc_no_raii/src/internal/state.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/state.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,312 @@
+#include "state.h"
+
+#include <stdlib.hfa>
+
+//general purpouse includes
+#include "tools.h"
+
+//platform abstraction includes
+#include "allocate-pool.h"
+
+//gc internal includes
+#include "collector.h"
+#include "globals.h"
+#include "memory_pool.h"
+#include "object_header.h"
+#include "tools/worklist.h"
+
+void gc_state_swap(gc_state *const this);
+void gc_state_sweep_roots(gc_state *const this, worklist_t* worklist);
+void gc_state_clear(gc_state *const this);
+void gc_state_calc_usage(gc_state *const this);
+
+#ifndef NDEBUG
+	bool gc_state_roots_match(gc_state *const this);
+	bool gc_state_no_from_space_ref(gc_state *const this);
+#endif
+
+static gc_state s;
+
+gc_state* gc_get_state()
+{
+	if(!s.is_initialized) ctor(&s);
+	return &s;
+}
+
+void ctor(gc_state *const this)
+{
+	this->from_code = 0;
+	this->to_space = NULL;
+	this->from_space = NULL;
+	this->total_space = 0;
+	this->used_space = 0;
+	ctor(&this->pools_table);
+
+	gc_allocate_pool(this);
+
+	this->is_initialized = true;
+}
+
+void dtor(gc_state *const this)
+{
+	dtor(&this->pools_table);
+	this->is_initialized = false;
+}
+
+bool gc_is_in_heap(const gc_state* const this, const void* const address)
+{
+	gc_memory_pool* target_pool = gc_pool_of(address);
+
+	gc_memory_pool** first = cbegin(&this->pools_table);
+	gc_memory_pool** last = cend(&this->pools_table);
+	gc_memory_pool** result = find(first, &last, target_pool);
+	return result != last && gc_pool_is_from_space(*result);
+}
+
+bool gc_is_in_to_space(const gc_state* const this, const void* const address)
+{
+	gc_memory_pool* target_pool = gc_pool_of(address);
+
+	gc_memory_pool** first = cbegin(&this->pools_table);
+	gc_memory_pool** last = cend(&this->pools_table);
+	gc_memory_pool** result = find(first, &last, target_pool);
+	return result != last && !gc_pool_is_from_space(*result);
+}
+
+gc_object_header* gc_get_object_for_ref(gc_state* state, void* member)
+{
+	volatile int stage = 0;
+	intptr_t target = ((intptr_t)member);
+	if(!gc_is_in_heap(state, member)) return NULL;
+	stage++;
+
+	gc_memory_pool* pool = gc_pool_of(member);
+	stage++;
+	gc_pool_object_iterator it = gc_pool_iterator_for(pool, member);
+	stage++;
+	gc_pool_object_iterator end = end(pool);
+	stage++;
+
+	while(it != end)
+	{
+		gc_object_header* object = *it;
+		check(object);
+		check( is_valid(object) );
+		{
+			intptr_t start = ((intptr_t)object);
+			intptr_t end = ((intptr_t)start + object->size);
+			if(start < target && end > target)
+			{
+				return object;
+			}
+		}
+		stage++;
+		++it;
+	}
+
+	checkf( (int) 0, "is_in_heap() and iterator_for() return inconsistent data");
+	abort();
+	return NULL;
+}
+
+void* gc_try_allocate(gc_state* const this, size_t size)
+{
+	gc_memory_pool* pool = this->from_space;
+	while(pool != (gc_memory_pool*)0)
+	{
+		if(gc_pool_size_left(pool) > size)
+		{
+			return gc_pool_allocate(pool, size, true);
+		}
+		pool = pool->next;
+	}
+
+	return (void*)0;
+}
+
+void gc_allocate_pool(gc_state *const this)
+{
+	gc_memory_pool* old_from_space = this->from_space;
+      gc_memory_pool* old_to_space = this->to_space;
+
+      this->from_space = (gc_memory_pool*)(pal_allocPool(POOL_SIZE_BYTES, 1));
+      this->to_space   = (gc_memory_pool*)(pal_allocPool(POOL_SIZE_BYTES, 1));
+
+      this->from_space{ POOL_SIZE_BYTES, old_from_space, this->to_space,   this->from_code };
+      this->to_space  { POOL_SIZE_BYTES, old_to_space,   this->from_space, (~this->from_code) & 0x01 };
+
+	this->total_space += gc_pool_size_used(this->from_space);
+
+	push_back(&this->pools_table, this->from_space);
+	push_back(&this->pools_table, this->to_space);
+}
+
+void gc_collect(gc_state* const this)
+{
+	// DEBUG("collecting");
+	// DEBUG("previous usage " << this->used_space << " / " << this->total_space);
+
+	worklist_t worklist;
+	ctor(&worklist);
+	gc_state_sweep_roots(this, &worklist);
+
+	while(!empty(&worklist))
+	{
+		intptr_t* ref = back(&worklist);
+		pop_back(&worklist);
+		gc_process_reference((void**)ref, &worklist);
+	}
+
+	check(gc_state_roots_match(this));
+	check(gc_state_no_from_space_ref(this));
+
+	gc_state_swap(this);
+
+	gc_state_calc_usage(this);
+
+	if(gc_needs_collect(this)) gc_allocate_pool(this);
+
+	// DEBUG("done");
+	dtor(&worklist);
+}
+
+void gc_state_swap(gc_state* const this)
+{
+	swap(&this->from_space, &this->to_space);
+
+	gc_memory_pool* pool = this->to_space;
+	while(pool)
+	{
+		gc_reset_pool(pool);
+		pool = pool->next;
+	}
+
+	this->from_code = (~this->from_code) & 0x01;
+
+	#ifndef NDEBUG
+		{
+			gc_memory_pool* pool = this->from_space;
+			while(pool)
+			{
+				check(gc_pool_is_from_space(pool));
+				pool = pool->next;
+			}
+
+			pool = this->to_space;
+			while(pool)
+			{
+				check(!gc_pool_is_from_space(pool));
+				pool = pool->next;
+			}
+		}
+	#endif
+}
+
+void gc_state_sweep_roots(gc_state* const this, worklist_t* worklist)
+{
+	gc_memory_pool* pool = this->from_space;
+	while(pool)
+	{
+		gc_pool_object_iterator it = begin(pool);
+		gc_pool_object_iterator end = end(pool);
+		for(;it != end; ++it)
+		{
+			gc_object_header* object = *it;
+			if(!object->root_chain) continue;
+
+			gc_copy_object(object);
+
+			gc_scan_object(object->forward, worklist);
+		}
+
+		pool = pool->next;
+	}
+}
+
+void gc_state_clear(gc_state* const this)
+{
+	gc_memory_pool* pool = this->from_space;
+	while(pool)
+	{
+		gc_reset_pool(pool);
+		pool = pool->next;
+	}
+
+	pool = this->to_space;
+	while(pool)
+	{
+		gc_reset_pool(pool);
+		pool = pool->next;
+	}
+}
+
+void gc_state_calc_usage(gc_state* const this)
+{
+	this->total_space = 0;
+	this->used_space = 0;
+
+	gc_memory_pool* pool = this->from_space;
+	while(pool)
+	{
+		size_t size = gc_pool_size_total(pool);
+		size_t used = gc_pool_size_used(pool);
+		check(used <= size);
+		this->total_space += size;
+		this->used_space += used;
+
+		pool = pool->next;
+	}
+}
+
+#ifndef NDEBUG
+	bool gc_state_roots_match(gc_state* const this)
+	{
+		gc_memory_pool* pool = this->to_space;
+		while(pool)
+		{
+			size_t size = 0;
+			gc_pool_object_iterator it = begin(pool);
+			gc_pool_object_iterator end = end(pool);
+			for(;it != end; ++it)
+			{
+				gc_object_header* object = *it;
+				size += object->size;
+
+				gcpointer_t* ptr = object->root_chain;
+				while(ptr)
+				{
+					check(gc_get_object_ptr( (void*)ptr->ptr ) == object);
+					ptr = ptr->next;
+				}
+			}
+
+			checkf(size + gc_pool_size_left(pool) == gc_pool_size_total(pool),
+				(const char*)"expected %lu + %lu == %lu\n",
+				(size_t)size,
+				(size_t)gc_pool_size_left(pool),
+				(size_t)gc_pool_size_total(pool));
+
+			pool = pool->next;
+		}
+
+		return true;
+	}
+
+	bool gc_state_no_from_space_ref(gc_state* const this)
+	{
+		gc_memory_pool* pool = this->to_space;
+		while(pool)
+		{
+			void** potential_ref = (void**)pool->start_p;
+			while(potential_ref < (void**)pool->free_p)
+			{
+				check(!gc_is_in_heap(this, *potential_ref));
+				potential_ref++;
+			}
+
+			pool = pool->next;
+		}
+
+		return true;
+	}
+#endif
Index: tests/zombies/gc_no_raii/src/internal/state.h
===================================================================
--- tests/zombies/gc_no_raii/src/internal/state.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/internal/state.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,64 @@
+#pragma once
+
+#ifdef __cforall
+extern "C" {
+#endif
+#include <stddef.h>
+#include <stdint.h>
+#ifdef __cforall
+}
+#endif
+#include <fstream.hfa>
+#include <vector>
+
+#include "tools.h"
+
+typedef vector(struct gc_memory_pool*, heap_allocator(struct gc_memory_pool*)) pools_table_t;
+
+struct gc_state
+{
+	bool is_initialized;
+	uint8_t from_code;
+	struct gc_memory_pool* to_space;
+	struct gc_memory_pool* from_space;
+
+	size_t total_space;
+	size_t used_space;
+
+	pools_table_t 	pools_table;
+	size_t 		pools_table_count;
+};
+
+void ctor(gc_state* const state);
+
+void dtor(gc_state* const state);
+
+gc_state* gc_get_state();
+
+static inline bool gc_needs_collect(gc_state* state)
+{
+	// sout | "Used Space: " | state->used_space | " bytes";
+	return state->used_space * 2 > state->total_space;
+}
+
+void gc_collect(gc_state* const this);
+
+void* gc_try_allocate(gc_state* const this, size_t size);
+
+void gc_allocate_pool(gc_state* const state);
+
+bool gc_is_in_heap(const gc_state* const state, const void* const address);
+
+bool gc_is_in_to_space(const gc_state* const state, const void* const address);
+
+static inline uint8_t gc_from_space_code(const gc_state *const this)
+{
+	return this->from_code;
+}
+
+struct gc_object_header* gc_get_object_for_ref(gc_state* state, void*);
+
+static inline void gc_register_allocation(gc_state* state, size_t size)
+{
+	state->used_space += size;
+}
Index: tests/zombies/gc_no_raii/src/test_include.c
===================================================================
--- tests/zombies/gc_no_raii/src/test_include.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/test_include.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,4 @@
+/* definition to expand macro for string conversion*/
+#define xstr(s) sstr(s)
+#define sstr(s) #s
+#include xstr(../test/TEST_FILE.c)
Index: tests/zombies/gc_no_raii/src/tools.h
===================================================================
--- tests/zombies/gc_no_raii/src/tools.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/tools.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "tools/checks.h"
+#include "tools/print.h"
+
+// forall(otype T)
+// inline void swap(T* const a, T* const b)
+// {
+// 	T* temp = a;
+// 	*a = *b;
+// 	*b = *temp;
+// }
+
+trait has_equal(otype T)
+{
+	signed int ?==?(T a, T b);
+};
+
+trait InputIterator_t(otype T, otype InputIterator)
+{
+	signed int ?==?(InputIterator a, InputIterator b);
+	signed int ?!=?(InputIterator a, InputIterator b);
+	T *?(InputIterator a);
+	InputIterator ++?(InputIterator* a);
+	InputIterator ?++(InputIterator* a);
+};
+
+forall(otype T | has_equal(T), otype InputIterator | InputIterator_t(T, InputIterator))
+static inline InputIterator find( InputIterator first, const InputIterator* const last, T val)
+{
+	while ( first != *last)
+	{
+		if(*first == val) return first;
+		++first;
+	}
+	return *last;
+}
Index: tests/zombies/gc_no_raii/src/tools/checks.h
===================================================================
--- tests/zombies/gc_no_raii/src/tools/checks.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/tools/checks.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,29 @@
+#pragma once
+
+#ifdef NDEBUG
+
+#define check(x)
+
+#define checkf(x, format, ...)
+
+#warning no debug checks
+
+#else
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define check(x) do {\
+	if(!(x)) {\
+		printf("CHECK failed : %s at %s:%i\n", #x, __FILE__, __LINE__);\
+		abort();\
+	}}while( (int)0 )\
+
+#define checkf(x, ...) do {\
+	if(!(x)) {\
+		printf("CHECK failed : %s at %s:%i\n", #x, __FILE__, __LINE__);\
+		printf(__VA_ARGS__);\
+		abort();\
+	}}while( (int)0 )\
+
+#endif //NO_CHECKS
Index: tests/zombies/gc_no_raii/src/tools/print.c
===================================================================
--- tests/zombies/gc_no_raii/src/tools/print.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/tools/print.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,5 @@
+#include "tools.h"
+
+#ifndef NDEBUG
+	// ofstream *sout = ofstream_stdout();
+#endif
Index: tests/zombies/gc_no_raii/src/tools/print.h
===================================================================
--- tests/zombies/gc_no_raii/src/tools/print.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/tools/print.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,13 @@
+#pragma once
+
+// #ifndef NDEBUG
+//
+// #include <fstream.hfa>
+//
+// #define DEBUG_OUT(x) sout | x;
+//
+// #else
+
+#define DEBUG_OUT(x)
+
+// #endif //NO_CHECKS
Index: tests/zombies/gc_no_raii/src/tools/worklist.h
===================================================================
--- tests/zombies/gc_no_raii/src/tools/worklist.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/src/tools/worklist.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,14 @@
+#pragma once
+
+#ifdef __cforall
+extern "C" {
+#endif
+#include <stddef.h>
+#include <stdint.h>
+#ifdef __cforall
+}
+#endif
+
+#include <vector.hfa>
+
+typedef vector(intptr_t*, heap_allocator(intptr_t*)) worklist_t;
Index: tests/zombies/gc_no_raii/test/badlll.c
===================================================================
--- tests/zombies/gc_no_raii/test/badlll.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/test/badlll.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,71 @@
+#include "gc.h"
+
+#include <stdio.h>
+
+struct List_t
+{
+	gcpointer(List_t) next;
+	int val;
+};
+
+typedef gcpointer(List_t) LLL;
+
+#define MAX (1024 * 1)
+
+LLL buildLLL(int sz)
+{
+	int i = 0;
+	LLL ll0;
+
+	gcmalloc( &ll0 );
+	List_t* ll0_ptr = get( &ll0 );
+	ll0_ptr->val = i;
+	LLL lll = ll0;
+
+	for (i = 1; i < sz; i++)
+	{
+		LLL llc;
+		gcmalloc( &llc );
+		List_t* llc_ptr = get( &llc );
+		llc_ptr->val = i;
+		List_t* lll_ptr = get( &lll );
+		lll_ptr->next = llc;
+
+		lll = llc;
+	}
+
+	check(is_valid( &ll0.internal ));
+
+	return ll0;
+}
+
+void testLLL(LLL lll)
+{
+	unsigned char *counted;
+
+	counted = (unsigned char *) calloc(MAX, sizeof(unsigned char));
+	while (lll)
+	{
+		List_t* lll_ptr = get( &lll );
+		counted[lll_ptr->val]++;
+		if (counted[lll_ptr->val] > 1)
+		{
+			fprintf(stderr, "ERROR! Encountered %d twice!\n", lll_ptr->val);
+			exit(1);
+		}
+		lll = lll_ptr->next;
+	}
+
+	return;
+}
+
+int main(void)
+{
+	LLL mylll;
+
+	mylll = buildLLL(MAX);
+
+	testLLL(mylll);
+
+	return 0;
+}
Index: tests/zombies/gc_no_raii/test/gctest.c
===================================================================
--- tests/zombies/gc_no_raii/test/gctest.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/test/gctest.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,25 @@
+#include <fstream.hfa>
+
+#include "gc.h"
+#include "internal/collector.h"
+
+#warning default test
+
+int main() {
+	sout | "Bonjour au monde!\n";
+
+	gcpointer(int) theInt;
+	gcmalloc(&theInt);
+
+	for(int i = 0; i < 10; i++) {
+		int a;
+		{
+			gcpointer(int) anInt;
+			gcmalloc(&anInt);
+		}
+		int p;
+	}
+
+	gc_collect(gc_get_state());
+	gc_conditional_collect();
+}
Index: tests/zombies/gc_no_raii/test/operators.c
===================================================================
--- tests/zombies/gc_no_raii/test/operators.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/gc_no_raii/test/operators.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,22 @@
+#include "gc.h"
+
+#include <assert.h>
+
+int main(int argc, char *argv[])
+{
+	gcpointer(int) test, test1;
+
+	if(test != test1) { return 1; }
+	if(test == test1) { return 1; }
+	// if(test == 0)  { return 1; }
+	// if(test != 0)  { return 1; }
+	// if(test) { return 1; }
+
+	// *test.internal.ptr = 3;
+	// int i = *test;
+
+	gcmalloc();
+	// test = gcmalloc();
+
+	return 0;
+}
Index: tests/zombies/hashtable.cfa
===================================================================
--- tests/zombies/hashtable.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/hashtable.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,243 @@
+
+#include <containers/list.hfa>
+
+#include <exception.hfa>
+TRIVIAL_EXCEPTION(ht_fill_limit_crossed);
+
+
+
+void defaultResumptionHandler(ht_fill_limit_crossed &) {
+    printf("default resumption ht_fill_limit_crossed\n");
+}
+
+void defaultTerminationHandler(ht_fill_limit_crossed &) = void;
+
+
+trait has_hash( otype K ) {
+    size_t hash(K);
+    int ?==?( K, K );
+};
+
+trait hkey( otype K, dtype tN | has_hash(K) ) {
+    K key(tN &);
+};
+
+forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) ) {
+
+    struct hashtable {
+
+        size_t n_buckets;
+        dlist(tN, tE) *buckets;
+        
+        size_t item_count;
+        float ff_next_warn_up;
+
+        void (*defaultResumptionHandler) (ht_fill_limit_crossed &);
+    };
+
+    void ?{}( hashtable(K, tN, tE) & this ) = void;
+}
+
+forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) | { void defaultResumptionHandler(ht_fill_limit_crossed &); } ) {
+
+    void ?{}( hashtable(K, tN, tE) & this, size_t n_buckets, dlist(tN, tE) *buckets ) {
+
+        this.n_buckets = n_buckets;
+        this.buckets = buckets;
+
+        this.item_count = 0;
+        this.ff_next_warn_up = 0.5;
+
+        this.defaultResumptionHandler = defaultResumptionHandler;
+
+        for ( i; n_buckets ) {
+            ?{}( this.buckets[i] );
+        }
+    }
+}
+
+forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) ) {
+
+    float fill_frac( hashtable(K, tN, tE) & this ) with(this) {
+        return ((float)item_count) / n_buckets;
+    }
+
+    size_t bucket_of( hashtable(K, tN, tE) & this, K k ) {
+        return hash(k) % this.n_buckets;
+    }
+
+    tE & get( hashtable(K, tN, tE) & this, K k ) with (this) {
+
+        dlist(tN, tE) & bucket = buckets[ bucket_of(this, k) ];
+
+        for ( tN * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
+            if ( key(*item) == k ) {
+                return *item;
+            }
+        }
+
+        return *0p;
+    }
+
+    void check_ff_warning( hashtable(K, tN, tE) & this ) with (this) {
+        if (fill_frac(this) > ff_next_warn_up) {
+            throwResume (ht_fill_limit_crossed){};
+            ff_next_warn_up *= 2;
+        }
+    }
+
+    void put( hashtable(K, tN, tE) & this, tE & v ) with (this) {
+
+        check_ff_warning(this);
+
+        K k = key( $tempcv_e2n(v) );
+        dlist(tN, tE) & bucket = buckets[ bucket_of(this, k) ];
+
+        for ( tN * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
+            if ( key(*item) == k ) {
+                remove(*item);
+                break;
+            }
+        }
+
+        insert_first(bucket, v);
+        this.item_count ++;
+    }
+
+}
+
+// tactical usage:
+// HASHTABLE_STATIC(int, item_by_prority, item, n, ht)
+//
+// intended equivalent:
+// hashtable_static(int, item_by_prority, item, Z(n)) ht;
+#define HASHTABLE_STATIC(K, tN, tE, n_buckets, obj) \
+    struct __hashtable_static_ ## obj { \
+        inline hashtable(K, tN, tE); \
+        dlist(tN, tE) $items[n_buckets]; \
+    }; \
+    void ?{}( __hashtable_static_ ## obj & this )  { \
+        ((hashtable(K, tN, tE) &)this){ n_buckets, this.$items }; \
+    } \
+    __hashtable_static_ ## obj obj;
+
+
+
+trait heaped(dtype T) {
+    T * alloc( size_t );
+    void free( void * ); 
+};
+
+void __dynamic_defaultResumptionHandler(ht_fill_limit_crossed & ex) {
+    printf("dynamic limit crossed\n");
+}
+
+forall( otype K, dtype tN, dtype tE | $dlistable(tN, tE) | hkey(K, tN) | heaped( dlist(tN, tE) ) ) {
+
+    struct hashtable_dynamic { 
+        inline hashtable(K, tN, tE); 
+    };
+    void ?{}( hashtable_dynamic(K, tN, tE) & this, size_t n_buckets )  {
+        void (*defaultResumptionHandler) (ht_fill_limit_crossed &) = __dynamic_defaultResumptionHandler;
+        dlist(tN, tE) *buckets = alloc(n_buckets);
+        ((hashtable(K, tN, tE) &)this){ n_buckets, buckets };
+    }
+    void ^?{}( hashtable_dynamic(K, tN, tE) & this ) {
+        free(this.buckets);
+    }
+}
+
+
+
+
+struct request {
+
+    unsigned int src_id;
+    unsigned int tgt_id;
+
+    DLISTED_MGD_EXPL_IN(request, ht_by_src)
+    DLISTED_MGD_EXPL_IN(request, ht_by_tgt)
+};
+DLISTED_MGD_EXPL_OUT(request, ht_by_src)
+DLISTED_MGD_EXPL_OUT(request, ht_by_tgt)
+
+size_t hash( unsigned int k ) {
+    // not really a hash function, not really the point
+    return k;
+}
+
+unsigned int key( request_in_ht_by_src & v ) {
+    return v.src_id;
+}
+
+
+#include <stdlib.hfa>
+
+int main() {
+
+
+    HASHTABLE_STATIC(unsigned int, request_in_ht_by_src, request, 67, h_src)
+
+    request & wasnt_found = get(h_src, 17);
+    assert( &wasnt_found == 0p );
+
+    request r;
+    r.src_id = 117;
+    r.tgt_id = 998;
+
+    put(h_src, r);
+
+    request & found = get(h_src, 117);
+    assert( &found == &r );
+
+    & wasnt_found = & get(h_src, 998);
+    assert( &wasnt_found == 0p );
+
+    printf( "%f\n", fill_frac(h_src) );
+
+
+    request rs[500];
+    try {
+        for (i; 500) {
+            rs[i].src_id = 8000 * i;
+            put(h_src, rs[i]);
+        }
+    } catchResume(ht_fill_limit_crossed*) {
+        printf("fill limit tripped with h_src filled at %f\n", fill_frac(h_src));
+        throwResume;
+    }
+
+    assert(  & get(h_src, 117      ) );
+    assert(  & get(h_src, 8000*25  ) );
+    assert(! & get(h_src, 8000*25+1) );
+
+
+
+    dlist(request_in_ht_by_src, request) * (*old_alloc)( size_t ) = alloc;
+    dlist(request_in_ht_by_src, request) * alloc( size_t n ) {
+        dlist(request_in_ht_by_src, request) * ret = old_alloc(n);
+        printf("alloc'ed at %p\n", ret);
+        return ret;
+    }
+
+    void (*old_free)( void * ) = free;
+    void free( void * o ) {
+        printf("free'ing at %p\n", o);
+        old_free(o);
+    }
+
+    hashtable_dynamic(unsigned int, request_in_ht_by_src, request) ht2 = { 113 };
+    request rs2[500];
+    try {
+        for (i; 500) {
+            if (i % 10 == 0) {printf("%d(%f),", i, fill_frac(ht2));}
+            rs2[i].src_id = 8000 * i;
+            put(ht2, rs2[i]);
+        }
+    } catchResume(ht_fill_limit_crossed*) {
+        printf("fill limit tripped with ht2 filled at %f\n", fill_frac(ht2));
+        throwResume;
+    }
+
+
+}
Index: tests/zombies/hashtable2.cfa
===================================================================
--- tests/zombies/hashtable2.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/hashtable2.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,466 @@
+
+#include <containers/list.hfa>
+
+typedef unsigned int K;
+
+// workaround type for trac#185; used here as the ticket's examples use a spontaneous float
+typedef struct {} t_unused;
+
+struct request {
+
+    unsigned int src_id;
+    unsigned int tgt_id;
+
+    DLISTED_MGD_EXPL_IN(request, ht_by_src)
+    DLISTED_MGD_EXPL_IN(request, ht_by_tgt)
+};
+DLISTED_MGD_EXPL_OUT(request, ht_by_src)
+DLISTED_MGD_EXPL_OUT(request, ht_by_tgt)
+
+size_t hash( unsigned int k ) {
+    // not really a hash function, not really the point
+    return k;
+}
+
+unsigned int key( request_in_ht_by_src & v ) {
+    return v.src_id;
+}
+
+
+#include <exception.hfa>
+
+DATA_EXCEPTION(ht_fill_limit_crossed)(
+	void * theHashtable;
+    bool want_throwResume_ht_auto_resize_pending;
+    size_t size_for_ht_auto_resize_pending;
+);
+
+void ?{}(ht_fill_limit_crossed & this, void * theHashtable) {
+	VTABLE_INIT(this, ht_fill_limit_crossed);
+	this.theHashtable = theHashtable;
+    this.want_throwResume_ht_auto_resize_pending = false;
+    this.size_for_ht_auto_resize_pending = 0;
+}
+
+const char * ht_fill_limit_crossed_msg(ht_fill_limit_crossed * this) {
+	return "ht_fill_limit_crossed";
+}
+
+VTABLE_INSTANCE(ht_fill_limit_crossed)(ht_fill_limit_crossed_msg);
+
+
+DATA_EXCEPTION(ht_auto_resize_pending)(
+	void * theHashtable;
+    size_t new_size;
+);
+
+void ?{}(ht_auto_resize_pending & this, void * theHashtable, size_t new_size) {
+	VTABLE_INIT(this, ht_auto_resize_pending);
+	this.theHashtable = theHashtable;
+    this.new_size = new_size;
+}
+
+const char * ht_auto_resize_pending_msg(ht_auto_resize_pending * this) {
+	return "ht_auto_resize_pending";
+}
+
+VTABLE_INSTANCE(ht_auto_resize_pending)(ht_auto_resize_pending_msg);
+
+
+
+trait pretendsToMatter( dtype TTT ) {
+    void actsmart(TTT &);
+};
+
+forall( dtype TTTx )
+void actsmart(TTTx &) {}
+
+// probable bug, wrt otype Tt_unused...
+//   1. changing to dtype Tt_unused crashes GenPoly
+//   2. declaring function check_ff_warning as concrete, i.e. operating on type hashtable_rbs(t_unused) makes cfa-cc generate bad C
+// in both cases, it's on the throwResume call
+// where it implicitly uses this.defaultResumptionHandler as a throwResume argument
+// whereas that, of course, has to be surrounded in a cast
+// at GenPoly breakpoint, type of said cast appears as CFA generic struct, even as the "this" parameter appears as C struct; casted type ...
+//   1. is hashtable_rbs(Tt_unused); assertion complains you don't need a type arg here
+//   2. shows up in -CFA output as hashtable_rbs(), which is bad C; expecting hashtable_rbs*
+
+forall( otype Tt_unused | pretendsToMatter(Tt_unused) ) {
+
+    // hashtable of request by source
+    struct hashtable_rbs {
+
+        size_t n_buckets;
+        dlist(request_in_ht_by_src, request) *buckets;
+        
+        size_t item_count;
+        float ff_next_warn_up;
+        float ff_warn_step_factor;
+
+        void (*defaultResumptionHandler) (ht_fill_limit_crossed &);
+    };
+
+    void ?{}( hashtable_rbs(Tt_unused) & this ) = void;
+}
+
+forall( otype Tt_unused | pretendsToMatter(Tt_unused) | { void defaultResumptionHandler(ht_fill_limit_crossed &); } ) {
+
+    void ?{}( hashtable_rbs(Tt_unused) & this, size_t n_buckets, dlist(request_in_ht_by_src, request) *buckets,
+        float ff_next_warn_up, float ff_warn_step_factor ) {
+
+        printf( "base hashtable ctor with %ld buckets at %p\n", n_buckets, buckets);
+
+        this.n_buckets = n_buckets;
+        this.buckets = buckets;
+
+        this.item_count = 0;
+        this.ff_next_warn_up = ff_next_warn_up;
+        this.ff_warn_step_factor = ff_warn_step_factor;
+
+        this.defaultResumptionHandler = defaultResumptionHandler;
+
+        for ( i; n_buckets ) {
+            ?{}( this.buckets[i] );
+        }
+    }
+
+    void ?{}( hashtable_rbs(Tt_unused) & this, size_t n_buckets, dlist(request_in_ht_by_src, request) *buckets ) {
+        printf( "base hashtable ctor with default warning steps\n" );
+        ( this ) { n_buckets, buckets, 0.5, 2 };
+    }
+
+}
+
+// this fwd declaration is artifact of workaround trac#192
+void defaultResumptionHandler( ht_auto_resize_pending & ex );
+
+forall( otype Tt_unused | pretendsToMatter(Tt_unused) ) {
+
+    float fill_frac( hashtable_rbs(Tt_unused) & this ) with(this) {
+        return ((float)item_count) / n_buckets;
+    }
+
+    size_t bucket_of( hashtable_rbs(Tt_unused) & this, K k ) {
+        return hash(k) % this.n_buckets;
+    }
+
+    request & get( hashtable_rbs(Tt_unused) & this, K k ) with (this) {
+
+        dlist(request_in_ht_by_src, request) & bucket = buckets[ bucket_of(this, k) ];
+
+        for ( request_in_ht_by_src * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
+            if ( key(*item) == k ) {
+                return *item;
+            }
+        }
+
+        return *0p;
+    }
+
+    void check_ff_warning( hashtable_rbs(Tt_unused) & this ) with (this) {
+        if (fill_frac(this) > ff_next_warn_up) {
+            ht_fill_limit_crossed ex1 = { &this };
+            throwResume ex1;
+            // workaround trac#192: want the second throwResume to be in __dynamic_defaultResumptionHandler
+            // ... want base hashtable decoupled from resize
+            if ( ex1.want_throwResume_ht_auto_resize_pending ) {
+                throwResume( (ht_auto_resize_pending) { & this, ex1.size_for_ht_auto_resize_pending } );
+            }
+        }
+    }
+
+    void put( hashtable_rbs(Tt_unused) & this, request & v ) with (this) {
+
+        check_ff_warning(this);
+
+        K k = key( $tempcv_e2n(v) );
+        dlist(request_in_ht_by_src, request) & bucket = buckets[ bucket_of(this, k) ];
+
+        for ( request_in_ht_by_src * item = & $tempcv_e2n(bucket`first);  item != 0p;  item = & $tempcv_e2n((*item)`next) ) {
+            if ( key(*item) == k ) {
+                remove(*item);
+                break;
+            }
+        }
+
+        insert_first(bucket, v);
+        this.item_count ++;
+    }
+}
+
+
+
+
+// tactical usage:
+// HASHTABLE_RBS_STATIC(n, ht)
+//
+// intended equivalent:
+// hashtable_rbs_static(Z(n)) ht;
+#define HASHTABLE_RBS_STATIC(n_buckets, obj) \
+    struct __hashtable_static_ ## obj { \
+        inline hashtable_rbs(t_unused); \
+        dlist(request_in_ht_by_src, request) $items[n_buckets]; \
+    }; \
+    void ?{}( __hashtable_static_ ## obj & this )  { \
+        ((hashtable_rbs(t_unused) &)this){ n_buckets, this.$items }; \
+    } \
+    __hashtable_static_ ## obj obj;
+
+
+
+void defaultResumptionHandler(ht_fill_limit_crossed & ex) {
+    hashtable_rbs(t_unused) & ht = *(hashtable_rbs(t_unused) *)ex.theHashtable;
+    printf("base default resumption handler ht_fill_limit_crossed with ht filled at %f\n", fill_frac(ht));
+    ht.ff_next_warn_up *= ht.ff_warn_step_factor;
+}
+
+void defaultTerminationHandler(ht_fill_limit_crossed &) = void;
+
+
+
+
+
+trait heaped(dtype T) {
+    T * alloc( size_t );
+    void free( void * ); 
+};
+
+void __dynamic_defaultResumptionHandler(ht_fill_limit_crossed &);
+
+forall( otype Tt_unused ) {
+
+    struct hashtable_rbs_dynamic { 
+        inline hashtable_rbs(Tt_unused);
+
+        struct resize_policy {
+            // When fill factor exceeds grow limit, grow big enough for
+            // resulting fill factor to be lower than grow_target.  Vice versa.
+            // Using different grow and shrink limits prevents noisy current
+            // size from triggering grow-shrink oscillation.  OK to use same
+            // grow and shrink targets.
+            float grow_limit, shrink_limit, grow_target, shrink_target;
+
+            // warn with exception but do nothing, this many -1 times, then actually resize
+            unsigned short int warns_per_grow, warns_per_shrink;
+
+            // Don't shrink below.
+            size_t nbuckets_floor;
+        } policy;
+
+        dlist(request_in_ht_by_src, request) * (*alloc)( size_t );
+        void (*free)( void * ); 
+    };
+}
+
+// will be in list api
+void splice_all_to_last( dlist(request_in_ht_by_src, request) & src_to_empty, dlist(request_in_ht_by_src, request) & snk_to_fill_at_last ) {
+
+    // will re-implement as an actual splice
+    while ( & src_to_empty`first != 0p ) {
+        insert_last( snk_to_fill_at_last, pop_first( src_to_empty ) );
+    }
+}
+
+
+forall( otype Tt_unused | heaped( dlist(request_in_ht_by_src, request) ) ) {
+
+    void ?{}( hashtable_rbs_dynamic(Tt_unused).resize_policy & this, size_t nbuckets_floor ) {
+        printf("default dynamic policy ctor\n");
+
+        (this.grow_limit)      {2.0};
+        (this.shrink_limit)    {0.5};
+        (this.grow_target)     {1.0};
+        (this.shrink_target)   {1.0};
+        (this.warns_per_grow)  {4};
+        (this.warns_per_shrink){4};
+        (this.nbuckets_floor)  {nbuckets_floor};
+    }
+
+    void ?{}( hashtable_rbs_dynamic(Tt_unused) & this, size_t n_buckets, hashtable_rbs_dynamic(Tt_unused).resize_policy rp )  {
+        printf("ctor hashtable_rbs_dynamic{ size_t, resize_policy }\n");
+
+        float first_first_warn_up = rp.grow_target;
+        float ff_warn_step_factor = (rp.grow_limit / rp.grow_target) \ ( 1. / rp.warns_per_grow );
+
+        void (*defaultResumptionHandler) (ht_fill_limit_crossed &) = __dynamic_defaultResumptionHandler;
+        dlist(request_in_ht_by_src, request) *buckets = alloc(n_buckets);
+        ( ( hashtable_rbs( Tt_unused ) & ) this ){ n_buckets, buckets, first_first_warn_up, ff_warn_step_factor };
+        ( this.policy ){ rp };
+        this.alloc = alloc;
+        this.free = free;
+    }
+    void ?{}( hashtable_rbs_dynamic(Tt_unused) & this, hashtable_rbs_dynamic(Tt_unused).resize_policy rp )  {
+        printf("ctor hashtable_rbs_dynamic{ resize_policy }\n");
+        ( this ) { rp.nbuckets_floor, rp };
+    }
+    void ?{}( hashtable_rbs_dynamic(Tt_unused) & this, size_t n_buckets )  {
+        printf("ctor hashtable_rbs_dynamic{ size_t }\n");
+        ( this ) { n_buckets, (hashtable_rbs_dynamic(Tt_unused).resize_policy){ n_buckets } };
+    }
+    void ^?{}( hashtable_rbs_dynamic(Tt_unused) & this ) {
+        free(this.buckets);
+    }
+    void rehashToLarger( hashtable_rbs_dynamic(Tt_unused) & this, size_t new_n_buckets ) with(this) {
+        printf("resizing from %ld to %ld, old buckets at %p\n", n_buckets, new_n_buckets, buckets);
+
+        // collect hash items from old buckets
+        dlist(request_in_ht_by_src, request) items;
+        for (i; n_buckets) {
+            splice_all_to_last( buckets[i], items );
+        }
+
+        // make empty hash table of new size
+        dlist(request_in_ht_by_src, request) *oldBuckets = buckets;
+        float oldFfWarnStepFactor = ff_warn_step_factor;
+        float newFfNextWarnUp = ((float)item_count) / ((float) new_n_buckets);
+        ^?{}( (hashtable_rbs(Tt_unused) &)this );
+        free( oldBuckets );
+        ?{}( (hashtable_rbs(Tt_unused) &)this, new_n_buckets, alloc(new_n_buckets), newFfNextWarnUp, oldFfWarnStepFactor );
+
+        // fill new table with old items
+        while ( & items`first != 0p ) {
+            put( this, pop_first( items ) );
+        }
+    }
+}
+
+forall( otype Tt_unused ) {
+    void rehashToLarger_STEP( hashtable_rbs_dynamic(Tt_unused) & this, size_t new_n_buckets ) with (this) {
+        rehashToLarger( this, new_n_buckets );
+    }
+}
+
+void defaultResumptionHandler( ht_auto_resize_pending & ex ) {
+    hashtable_rbs_dynamic(t_unused) & ht = *(hashtable_rbs_dynamic(t_unused) *)ex.theHashtable;
+    printf("auto-resize unhandled: proceeding with resize\n");
+    rehashToLarger_STEP( ht, ex.new_size );
+}
+
+void __dynamic_defaultResumptionHandler(ht_fill_limit_crossed & ex) {
+    hashtable_rbs_dynamic(t_unused) & ht = *(hashtable_rbs_dynamic(t_unused) *)ex.theHashtable;
+    printf("dynamic warning received with fill_frac = %f and buckets at %p\n", fill_frac(ht), ht.buckets);
+    if ( fill_frac( ht ) >= ht.policy.grow_limit ) {
+        float grow_amount =  ht.policy.grow_limit / ht.policy.grow_target;
+        ex.want_throwResume_ht_auto_resize_pending = true;
+        ex.size_for_ht_auto_resize_pending = ( size_t )( grow_amount * ht.n_buckets );
+    } else {
+        // base handler, not specialized for dynamic
+        defaultResumptionHandler( ex );
+    }
+}
+
+
+
+
+
+
+#include <stdlib.hfa>
+
+void basicFillingTestHelper( hashtable_rbs(t_unused) & ht, size_t n_elems ) {
+
+    request & wasnt_found = get(ht, 17);
+    assert( &wasnt_found == 0p );
+
+    request r;
+    r.src_id = 117;
+    r.tgt_id = 998;
+
+    put(ht, r);
+
+    request & found = get(ht, 117);
+    assert( &found == &r );
+
+    & wasnt_found = & get(ht, 998);
+    assert( &wasnt_found == 0p );
+
+    request rs[n_elems];
+    for (i; n_elems) {
+        rs[i].src_id = 8000 * i;
+        put(ht, rs[i]);
+    }
+
+    assert(  & get(ht, 117      ) );
+    assert(  & get(ht, 8000*25  ) );
+    assert(! & get(ht, 8000*25+1) );
+}
+
+void basicFillingTest_static() {
+
+    printf("---start basic fill test static ----\n");
+
+    HASHTABLE_RBS_STATIC(67, ht)
+
+    basicFillingTestHelper(ht, 500);
+}
+
+void basicFillingTest_dynamic() {
+
+    dlist(request_in_ht_by_src, request) * (*old_alloc)( size_t ) = alloc;
+    dlist(request_in_ht_by_src, request) * alloc( size_t n ) {
+        dlist(request_in_ht_by_src, request) * ret = old_alloc(n);
+        printf("alloc'ed at %p\n", ret);
+        return ret;
+    }
+
+    void (*old_free)( void * ) = free;
+    void free( void * o ) {
+        printf("free'ing at %p\n", o);
+        old_free(o);
+    }
+
+    printf("---start basic fill test dynamic ----\n");
+
+    hashtable_rbs_dynamic(t_unused) ht = { 113 };
+
+    basicFillingTestHelper(ht, 500);
+}
+
+// Demonstrates user-provided instrumentation monitoring a fixed-size hash table
+void logTest() {
+
+    printf("---start log test ----\n");
+
+    HASHTABLE_RBS_STATIC(67, ht)
+
+    try {
+        basicFillingTestHelper(ht, 500);
+    } catchResume( ht_fill_limit_crossed * ) {
+        printf("log test instrumentation runs\n");
+        throwResume;
+    }
+}
+
+// Demonstrates "snoozing" a growing hash table's auto-resize event,
+// in that that next call to put will get the resize exception instead.
+void snoozeTest() {
+
+    printf("---start snooze test ----\n");
+
+    hashtable_rbs_dynamic(t_unused) ht = { 113 };
+
+    bool lastResizeSnoozed = false;
+
+    try {
+        basicFillingTestHelper(ht, 500);
+    } catchResume( ht_auto_resize_pending * ) {
+
+        if ( lastResizeSnoozed == false ) {
+            lastResizeSnoozed = true;
+            printf("snooze test intervention decides to snooze this time\n");
+        } else {
+            lastResizeSnoozed = false;
+            printf("snooze test intervention decides to allow the resize\n");
+            throwResume;
+        }
+
+    }
+}
+
+int main() {
+
+    basicFillingTest_static();
+    basicFillingTest_dynamic();
+
+    logTest();
+    snoozeTest();
+}
Index: tests/zombies/huge.c
===================================================================
--- tests/zombies/huge.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/huge.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,26 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// huge.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Mar  8 22:16:32 2016
+// Update Count     : 2
+//
+
+int huge( int n, forall( otype T ) T (*f)( T ) ) {
+	if ( n <= 0 )
+		return f( 0 );
+	else
+		return huge( n - 1, f( f ) );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa huge.c" //
+// End: //
Index: tests/zombies/includes.c
===================================================================
--- tests/zombies/includes.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/includes.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,250 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// includes.c -- 
+//
+// Author           : Peter A. Buhr
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Sep 30 13:59:18 2020
+// Update Count     : 598
+//
+
+// ***********************************************
+// USE -std=c99 WITH gxx TO GET SAME OUTPUT AS cfa
+// ***********************************************
+
+#ifdef __CFA__
+extern "C" {
+#endif // __CFA__
+
+#if 1
+#define _GNU_SOURCE
+// #include <a.out.h>
+// #include <aio.h>
+// #include <aliases.h>
+// #include <alloca.h>
+// #include <ansidecl.h>
+// #include <ar.h>
+// #include <argp.h>
+// #include <argz.h>
+// #include <assert.h>
+// #include <bfd.h>
+// #include <bfdlink.h>									// keyword with
+// #include <byteswap.h>
+// #include <bzlib.h>
+// #include <cblas.h>
+// #include <cblas_f77.h>
+// #include <complex.h>
+// #include <com_err.h>
+// #include <cpio.h>
+
+// #include <crypt.h>
+// #include <ctype.h>
+// #include <curses.h>
+// #include <dialog.h>
+// #include <dirent.h>
+// #include <dis-asm.h>
+// #include <dlfcn.h>
+// #include <dlg_colors.h>
+// #include <dlg_config.h>
+// #include <dlg_keys.h>
+// #include <elf.h>
+// #include <endian.h>
+// #include <envz.h>
+// #include <err.h>
+// #include <errno.h>
+// #include <error.h>
+// #include <eti.h>
+// #include <evdns.h>
+// #include <event.h>
+
+// #include <evhttp.h>
+// #include <sys/queue.h>
+// #include <evrpc.h>										// evrpc.h depends on sys/queue.h
+// #include <evutil.h>
+// #include <execinfo.h>
+// #include <expat.h>
+// #include <expat_config.h>
+// #include <expat_external.h>
+// #include <fcntl.h>
+// #include <features.h>
+// #include <fenv.h>
+// #include <fmtmsg.h>
+// #include <fnmatch.h>
+// #include <form.h>
+// #include <fpu_control.h>
+// #include <fstab.h>
+// #include <fts.h>
+// #include <ftw.h>
+
+// #include <gconv.h>
+// #include <getopt.h>
+// #include <gettext-po.h>
+// #include <glob.h>
+// #include <gmp.h>
+// #include <gnu-versions.h>
+// #include <grp.h>
+// #include <gshadow.h>
+// #include <gssapi.h>
+#include <hwloc.h>										// keyword thread (setjmp)
+// #include <iconv.h>
+// #include <idna.h>
+// #include <idn-free.h>
+// #include <idn-int.h>
+// #include <idn-int.h>
+// #include <ifaddrs.h>
+// #include <inttypes.h>
+// #include <jerror.h>
+
+//#include <jmorecfg.h>
+//#include <jpegint.h>
+// #include <jpeglib.h>
+// #include <kdb.h>
+// #include <krb5.h>					// keyword enable
+// #include <langinfo.h>
+// #include <lastlog.h>
+// #include <lber.h>
+// #include <lber_types.h>
+// #include <ldap.h>
+// #include <ldap_cdefs.h>
+// #include <ldap_features.h>
+// #include <ldap_schema.h>
+// #include <ldap_utf8.h>
+// #include <ldif.h>
+// #include <libgen.h>
+// #include <libintl.h>
+// #include <libio.h>
+// #include <libtasn1.h>
+// #include <libudev.h>
+// #include <limits.h>
+// #include <link.h>
+// #include <locale.h>
+// #include <ltdl.h>
+// #include <lzma.h>
+// #include <malloc.h>
+// #include <math.h>
+// #include <mcheck.h>
+// #include <memory.h>
+// #include <menu.h>
+// #include <mntent.h>
+// #include <monetary.h>
+// #include <mqueue.h>
+// #include <ncurses.h>
+// #include <ncurses_dll.h>
+// #include <nc_tparm.h>
+// #include <netdb.h>
+// #include <nl_types.h>
+// #include <nss.h>
+// #include <numa.h>
+// #include <numacompat1.h>
+// #include <numaif.h>
+// #include <obstack.h>
+// #include <panel.h>
+// #include <paths.h>
+// #include <pciaccess.h>
+// #include <pcre.h>
+// //#include <pcreposix.h>			// conflicts with regex.h
+// #include <plugin-api.h>
+// #include <png.h>										// setjmp
+// #include <pngconf.h>									// setjmp
+// #include <poll.h>
+// #include <pr29.h>
+// #include <printf.h>
+// #include <profile.h>
+// #include <pthread.h>									// setjmp
+// #include <pty.h>
+// #include <punycode.h>
+// #include <pwd.h>
+// #define INIT ;						// needed for regex.h
+// #define GETC() 'a'
+// #define PEEKC() 'a'
+// #define UNGETC( c ) ;
+// #define RETURN( ptr ) ;
+// #define ERROR( val ) ;
+// #include <regex.h>
+// //#include <regexp.h>				// GNU C Library no longer implements
+// #include <resolv.h>
+// #include <re_comp.h>
+// #include <sched.h>
+// #include <search.h>
+// #include <semaphore.h>
+// #include <setjmp.h>
+// #include <sgtty.h>
+// #include <shadow.h>
+// #include <signal.h>
+// #include <spawn.h>
+// #include <stab.h>
+// #include <stdatomic.h>
+// #include <stdarg.h>
+// #include <stdbool.h>
+// #include <stdint.h>
+// #include <stddef.h>
+// #include <stdio.h>
+// #include <stdio_ext.h>
+// #include <stdlib.h>
+// #include <string.h>
+// #include <stringprep.h>
+// #include <strings.h>
+// #include <stropts.h>
+// #include <sudo_plugin.h>
+// #include <symcat.h>
+// #include <syscall.h>
+// #include <sysexits.h>
+// #include <syslog.h>
+// #include <tar.h>
+// #include <term.h>
+// #include <termcap.h>
+// #include <termio.h>
+// #include <termios.h>
+// //#include <term_entry.h>
+// #include <tgmath.h>
+// #include <thread_db.h>			// CFA bug
+// #include <tic.h>
+// #include <time.h>
+// #include <tld.h>
+// #include <ttyent.h>
+// #include <turbojpeg.h>
+// #include <ucontext.h>
+// #include <ulimit.h>
+// #include <unctrl.h>
+// #include <unistd.h>
+// #include <ustat.h>
+// #include <utime.h>
+// #include <utmp.h>
+// #include <utmpx.h>
+// #include <wait.h>
+// #include <wchar.h>
+// #include <wctype.h>
+// #include <wordexp.h>
+// #include <xlocale.h>
+// #include <values.h>
+// #include <zconf.h>
+// #include <zlib.h>
+// #include <_G_config.h>
+
+// #include <jpeglib.h>				// after stdlib.h/stdio.h
+// #include <jpegint.h>
+// #include <jmorecfg.h>
+#if 0
+#endif // 0
+
+#else
+
+#define _GNU_SOURCE
+
+#include <setjmp.h>
+
+#endif // 0
+
+#ifdef __CFA__
+} // extern "C"
+#endif // __CFA__
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa includes.c" //
+// End: //
Index: tests/zombies/index.h
===================================================================
--- tests/zombies/index.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/index.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,25 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// index.h -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Mar  2 18:10:46 2016
+// Update Count     : 2
+//
+
+trait index( type T ) {
+	T ?+?( T, T );
+	T ?-?( T, T );
+	const T 0, 1;
+};
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa index.c" //
+// End: //
Index: tests/zombies/io/cat.c
===================================================================
--- tests/zombies/io/cat.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io/cat.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,78 @@
+/*
+This is a simple "cat" example that uses io_uring in IORING_SETUP_IOPOLL mode.
+It demonstrates the bare minimum needed to use io_uring in polling mode.
+It uses liburing for simplicity.
+*/
+
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <fcntl.h>
+#include <liburing.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+struct io_uring ring;
+
+__attribute__((aligned(1024))) char data[1024];
+
+int main(int argc,  char * argv[]) {
+      if(argc != 2) {
+            printf("usage:   %s FILE - prints file to console.\n", argv[0]);
+            return 1;
+      }
+
+      int fd = open(argv[1], O_DIRECT);
+      if(fd < 0) {
+            printf("Could not open file %s.\n", argv[1]);
+            return 2;
+      }
+
+      /* prep the array */
+      struct iovec iov = { data, 1024 };
+
+      /* init liburing */
+      io_uring_queue_init(256, &ring, IORING_SETUP_IOPOLL);
+
+      /* declare required structs */
+      struct io_uring_sqe * sqe;
+      struct io_uring_cqe * cqe;
+
+      /* get an sqe and fill in a READV operation */
+      sqe = io_uring_get_sqe(&ring);
+      io_uring_prep_readv(sqe, fd, &iov, 1, 0);
+      // io_uring_prep_read(sqe, fd, data, 1024, 0);
+
+      sqe->user_data = (uint64_t)(uintptr_t)data;
+
+      /* tell the kernel we have an sqe ready for consumption */
+      io_uring_submit(&ring);
+
+      /* wait for the sqe to complete */
+      int ret = io_uring_wait_cqe(&ring, &cqe);
+
+      /* read and process cqe event */
+      if(ret == 0) {
+            char * out = (char *)(uintptr_t)cqe->user_data;
+            signed int len = cqe->res;
+            io_uring_cqe_seen(&ring, cqe);
+
+            if(len > 0) {
+                  printf("%.*s", len, out);
+            }
+            else if( len < 0 ) {
+                  fprintf(stderr, "readv/read returned error : %s\n", strerror(-len));
+            }
+      }
+      else {
+            printf("%d\n", ret);
+            io_uring_cqe_seen(&ring, cqe);
+      }
+
+      io_uring_queue_exit(&ring);
+
+      close(fd);
+}
Index: tests/zombies/io/filereader.c
===================================================================
--- tests/zombies/io/filereader.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io/filereader.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,122 @@
+/*
+This is a file reading example that users io_uring in non-blocking mode.
+It demonstrates the bare minimum needed to use io_uring.
+It also optionally pre-registers the file descriptors (and a pipe, just to show it works).
+It uses liburing for simplicity.
+*/
+
+
+#include <errno.h>
+#include <fcntl.h>
+#include <liburing.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char * argv[]) {
+	if(argc != 3 && argc != 4) {
+            printf("usage:   %s FILE TIMES [fixed] - read FILE from disk TIMES times\n", argv[0]);
+            return EXIT_FAILURE;
+      }
+
+	bool fixed = false;
+	if(argc == 4) {
+		fixed = 0 == strcmp(argv[3], "fixed");
+	}
+
+      int times = atoi( argv[2] );
+      if(times <= 0) {
+            printf("Invalid number of times %d (from %s).\n", times, argv[2]);
+            return EXIT_FAILURE;
+      }
+
+      int fd = open(argv[1], 0);
+      if(fd < 0) {
+            printf("Could not open file %s.\n", argv[1]);
+            return EXIT_FAILURE;
+      }
+
+	int rfd = fd;
+
+	/* prep the array */
+      char data[100];
+      struct iovec iov = { data, 100 };
+
+	/* init liburing */
+	struct io_uring ring;
+      io_uring_queue_init(256, &ring, 0);
+
+	int pipefds[2];
+	if(fixed) {
+		int ret = pipe(pipefds);
+		if( ret < 0 ) {
+			printf("Pipe Error : %s\n", strerror( errno ));
+			return EXIT_FAILURE;
+		}
+		rfd = 0;
+		int fds[] = {
+			fd, pipefds[0], pipefds[1]
+		};
+		int cnt = sizeof(fds) / sizeof(fds[0]);
+		printf("Registering %d files as fixed\n", cnt);
+		ret = io_uring_register_files(&ring, fds, cnt);
+		if( ret < 0 ) {
+			printf("Register Error : %s\n", strerror( -ret ));
+			return EXIT_FAILURE;
+		}
+	}
+
+      /* declare required structs */
+	printf("Reading %s(%d) %d times\n", argv[1], fd, times);
+	size_t counter = 0;
+	for(int i = 0; i < times; i++) {
+		/* get an sqe and fill in a READV operation */
+	      struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_readv(sqe, rfd, &iov, 1, 0);
+		if(fixed) {
+			sqe->flags = IOSQE_FIXED_FILE;
+		}
+
+		/* tell the kernel we have an sqe ready for consumption */
+      	io_uring_submit(&ring);
+
+		/* poll the cq and count how much polling we did */
+		while(true) {
+			struct io_uring_cqe * cqe = NULL;
+			/* wait for the sqe to complete */
+			int ret = io_uring_wait_cqe_nr(&ring, &cqe, 0);
+
+			/* read and process cqe event */
+			switch(ret) {
+			case 0:
+				if( cqe->res < 0 ) {
+					printf("Completion Error : %s\n", strerror( -cqe->res ));
+					return EXIT_FAILURE;
+				}
+				io_uring_cqe_seen(&ring, cqe);
+				goto LOOP;
+			case -EAGAIN:
+				counter++;
+				break;
+			default:
+				printf("Wait Error : %s\n", strerror( -ret ));
+				return EXIT_FAILURE;
+			}
+		}
+
+		LOOP:;
+	}
+
+	printf("%zu\n", counter);
+
+      io_uring_queue_exit(&ring);
+
+      close(fd);
+
+	if(fixed) {
+		close(pipefds[0]);
+		close(pipefds[1]);
+	}
+}
Index: tests/zombies/io/simple/client.c
===================================================================
--- tests/zombies/io/simple/client.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io/simple/client.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,85 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <netdb.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int main(int argc, char * argv[]) {
+      if(argc != 2) {
+            printf("usage:    %s portnumber\n", argv[0]);
+            exit( EXIT_FAILURE );
+      }
+      int port = atoi(argv[1]);
+      if(port < 1) {
+            printf("Invalid port : %d (from %s)\n", port, argv[1]);
+            exit( EXIT_FAILURE );
+      }
+
+      int sock = socket(AF_INET, SOCK_STREAM, 0);
+      if(sock < 0) {
+            perror( "socket" );
+            exit( EXIT_FAILURE );
+      }
+
+      struct hostent * server = gethostbyname("localhost");
+      if(server == NULL) {
+            perror("localhost not found");
+            exit( EXIT_FAILURE );
+      }
+
+      struct sockaddr_in serv_addr;
+      memset(&serv_addr, 0, sizeof(serv_addr));
+      serv_addr.sin_family = AF_INET;
+      memcpy(&serv_addr.sin_addr.s_addr, server->h_addr, server->h_length);
+      serv_addr.sin_port = htons(port);
+
+      int ret = connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
+      if(ret < 0) {
+            perror( "connect" );
+            exit( EXIT_FAILURE );
+      }
+
+      char buffer[256];
+      struct iovec iov = { buffer, 0 };
+      struct msghdr msg;
+      msg.msg_name = NULL;
+      msg.msg_namelen = 0;
+      msg.msg_control = NULL;
+      msg.msg_controllen = 0;
+      msg.msg_iov = &iov;
+      msg.msg_iovlen = 1;
+      msg.msg_flags = 0;
+
+
+      int rd;
+      while(0 != (rd = read(STDIN_FILENO, buffer, 256))) {
+            if(rd < 0) {
+                  perror( "read" );
+                  exit( EXIT_FAILURE );
+            }
+
+            iov.iov_len = rd;
+            int sent = sendmsg(sock, &msg, 0);
+            if( sent < 0 ) {
+                  perror( "read" );
+                  exit( EXIT_FAILURE );
+            }
+
+            if(sent != rd) {
+                  printf("Expected to send %d bytes, sent %d\n", rd, sent);
+                  exit( EXIT_FAILURE );
+            }
+      }
+
+      ret = close(sock);
+      if(ret < 0) {
+            perror( "close" );
+            exit( EXIT_FAILURE );
+      }
+
+      exit( EXIT_SUCCESS );
+}
Index: tests/zombies/io/simple/server.c
===================================================================
--- tests/zombies/io/simple/server.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io/simple/server.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,143 @@
+/*
+This is a simple server that users io_uring in blocking mode.
+It demonstrates the bare minimum needed to use io_uring.
+It uses liburing for simplicity.
+*/
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <liburing.h>
+
+struct io_uring ring;
+
+char data[256];
+struct iovec iov = { data, 256 };
+struct msghdr msg = { (void *)"", 0, &iov, 1, NULL, 0, 0 };
+static void async_read(int sock) {
+	/* get an sqe and fill in a READ operation */
+      struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+      io_uring_prep_recvmsg(sqe, sock, &msg, 0);
+      sqe->user_data = 0;
+
+      /* tell the kernel we have an sqe ready for consumption */
+      int ret = io_uring_submit(&ring);
+      assert(ret == 1);
+}
+
+int main(int argc, char *argv[]) {
+	if(argc != 2) {
+            printf("usage:    %s portnumber\n", argv[0]);
+            exit( EXIT_FAILURE );
+      }
+      int port = atoi(argv[1]);
+      if(port < 1) {
+            printf("Invalid port : %d (from %s)\n", port, argv[1]);
+            exit( EXIT_FAILURE );
+      }
+
+	int sock = socket(AF_INET, SOCK_STREAM, 0);
+	if(sock < 0) {
+		perror( "socket" );
+		exit( EXIT_FAILURE );
+	}
+
+	struct sockaddr_in serv_addr;
+      memset(&serv_addr, 0, sizeof(serv_addr));
+      serv_addr.sin_family = AF_INET;
+      serv_addr.sin_addr.s_addr = INADDR_ANY;
+      serv_addr.sin_port = htons(port);
+
+	int ret = bind(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
+	if(ret < 0) {
+		perror( "bind" );
+		exit( EXIT_FAILURE );
+	}
+
+
+     	listen(sock,1);
+
+	struct sockaddr_in cli_addr;
+     	__socklen_t clilen = sizeof(cli_addr);
+	int newsock = accept(sock, (struct sockaddr *) &cli_addr, &clilen);
+     	if (newsock < 0) {
+		perror( "accept" );
+		exit( EXIT_FAILURE );
+	}
+
+	io_uring_queue_init( 16, &ring, 0 );
+
+	async_read( newsock );
+
+	while(1) {
+		struct io_uring_cqe * cqe;
+		struct __kernel_timespec ts = { 2, 0 };
+		// int ret = io_uring_wait_cqes( &ring, &cqe, 1, &ts, NULL); // Requires Linux 5.4
+		int ret = io_uring_wait_cqe( &ring, &cqe );
+
+		if( ret < 0 ) {
+                  printf( "Main Loop Error : %s\n", strerror(-ret) );
+			close( sock );
+                  exit( EXIT_FAILURE );
+            }
+
+		switch(cqe->user_data) {
+                  // Read completed
+                  case 0:
+                        // If it is the end of file we are done
+                        if( cqe->res == 0 ) {
+                              goto END;
+                        }
+
+				if( cqe->res < 0 ) {
+					perror( "Main Loop Error" );
+					close( sock );
+					exit( EXIT_FAILURE );
+				}
+
+				printf("'%.*s'\n", cqe->res, data);
+
+				async_read( newsock );
+
+                        // otherwise prepare a new read
+                        break;
+                  // Wait timed out, time to print
+			// Requires Linux 5.4
+                  case LIBURING_UDATA_TIMEOUT:
+                  	printf(".");
+                        break;
+                  // Problem
+                  default:
+                        printf("Unexpected user data : %llu", cqe->user_data);
+                        exit( EXIT_FAILURE );
+            }
+
+     		io_uring_cqe_seen( &ring, cqe );
+	}
+END:
+
+	io_uring_queue_exit( &ring );
+
+	ret = close(newsock);
+      if(ret < 0) {
+            perror( "close new" );
+            exit( EXIT_FAILURE );
+      }
+
+	ret = close(sock);
+      if(ret < 0) {
+            perror( "close old" );
+            exit( EXIT_FAILURE );
+      }
+
+	return 0;
+}
Index: tests/zombies/io/simple/server.cfa
===================================================================
--- tests/zombies/io/simple/server.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io/simple/server.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,137 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <unistd.h>
+
+extern "C" {
+	#include <sys/types.h>
+	#include <sys/socket.h>
+	#include <netinet/in.h>
+}
+
+#include <time.hfa>
+#include <thread.hfa>
+
+//----------
+monitor Printer {};
+
+void heartbeat( Printer & mutex ) {
+	fprintf(stderr, ".");
+}
+
+void message( Printer & mutex, char * msg, size_t len ) {
+	fprintf(stderr, "'%.*s'", len, msg);
+}
+
+void status( Printer & mutex, const char * st ) {
+	fprintf(stderr, "%s\n", st);
+}
+
+void error( Printer & mutex, const char * msg, int error) {
+	fprintf(stderr, "%s - %s\n", msg, strerror(error));
+}
+
+Printer printer;
+
+//----------
+thread HeartBeat {};
+
+void ^?{}( HeartBeat & mutex ) {}
+
+void main( HeartBeat & this ) {
+	while(true) {
+		waitfor( ^?{} : this ) { break; }
+		or else{
+			sleep( 5`s );
+			heartbeat( printer );
+		}
+	}
+}
+
+//----------
+extern ssize_t cfa_recvmsg(int sockfd, struct msghdr *msg, int flags);
+extern int cfa_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+extern int cfa_close(int fd);
+
+//----------
+thread Server { int port; };
+void main( Server & this ) {
+	char data[256];
+	struct iovec iov = { data, 256 };
+	struct msghdr msg = { "", 0, &iov, 1, NULL, 0, 0 };
+
+	int sock = socket(AF_INET, SOCK_STREAM, 0);
+	if(sock < 0) {
+		error( printer, "socket", -sock);
+		exit( EXIT_FAILURE );
+	}
+
+	status( printer, "Socket created" );
+
+	struct sockaddr_in serv_addr;
+      memset(&serv_addr, 0, sizeof(serv_addr));
+      serv_addr.sin_family = AF_INET;
+      serv_addr.sin_addr.s_addr = INADDR_ANY;
+      serv_addr.sin_port = htons(this.port);
+
+	int ret = bind(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
+	if(ret < 0) {
+		error( printer, "bind", -ret);
+		exit( EXIT_FAILURE );
+	}
+
+	status( printer, "Socket bound" );
+
+     	listen(sock,1);
+
+	struct sockaddr_in cli_addr;
+     	__socklen_t clilen = sizeof(cli_addr);
+	int newsock = cfa_accept4(sock, (struct sockaddr *) &cli_addr, &clilen, 0);
+     	if (newsock < 0) {
+		error( printer, "accept", -newsock);
+		exit( EXIT_FAILURE );
+	}
+
+	status( printer, "Socket accepted, looping" );
+
+	while(1) {
+		int res = cfa_recvmsg(newsock, &msg, 0);
+		if(res == 0) break;
+		if(res < 0) {
+			error( printer, "recvmsg", -res);
+			exit( EXIT_FAILURE );
+		}
+
+		message(printer, data, res);
+	}
+
+	ret = cfa_close(newsock);
+      if(ret < 0) {
+            error( printer, "close new", -ret);
+            exit( EXIT_FAILURE );
+      }
+
+	ret = cfa_close(sock);
+      if(ret < 0) {
+            error( printer, "close old", -ret);
+            exit( EXIT_FAILURE );
+      }
+}
+
+//----------
+int main(int argc, char * argv []) {
+	if(argc != 2) {
+            printf("usage:    %s portnumber\n", argv[0]);
+            exit( EXIT_FAILURE );
+      }
+      int port = atoi(argv[1]);
+      if(port < 1) {
+            printf("Invalid port : %d (from %s)\n", port, argv[1]);
+            exit( EXIT_FAILURE );
+      }
+
+	HeartBeat heartbeat;
+	Server server = { port };
+	// while(true);
+}
Index: tests/zombies/io/simple/server_epoll.c
===================================================================
--- tests/zombies/io/simple/server_epoll.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io/simple/server_epoll.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,177 @@
+/*
+Similar to the server in servier.c, this is a simple server
+that instead uses epoll to block.
+It opens the door to have several polling user-thread per cluster.
+It uses liburing for simplicity.
+*/
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <unistd.h>
+
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <liburing.h>
+
+#define MAX_EVENTS 10
+struct epoll_event ev, events[MAX_EVENTS];
+
+struct io_uring ring;
+
+char data[256];
+struct iovec iov = { data, 256 };
+struct msghdr msg = { (void *)"", 0, &iov, 1, NULL, 0, 0 };
+static void async_read(int sock) {
+	/* get an sqe and fill in a READ operation */
+      struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+      io_uring_prep_recvmsg(sqe, sock, &msg, 0);
+      sqe->user_data = 0;
+
+      /* tell the kernel we have an sqe ready for consumption */
+      int ret = io_uring_submit(&ring);
+      assert(ret == 1);
+}
+
+int main(int argc, char *argv[]) {
+	if(argc != 2) {
+            printf("usage:    %s portnumber\n", argv[0]);
+            exit( EXIT_FAILURE );
+      }
+      int port = atoi(argv[1]);
+      if(port < 1) {
+            printf("Invalid port : %d (from %s)\n", port, argv[1]);
+            exit( EXIT_FAILURE );
+      }
+
+	int sock = socket(AF_INET, SOCK_STREAM, 0);
+	if(sock < 0) {
+		perror( "socket" );
+		exit( EXIT_FAILURE );
+	}
+
+	struct sockaddr_in serv_addr;
+      memset(&serv_addr, 0, sizeof(serv_addr));
+      serv_addr.sin_family = AF_INET;
+      serv_addr.sin_addr.s_addr = INADDR_ANY;
+      serv_addr.sin_port = htons(port);
+
+	int ret = bind(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
+	if(ret < 0) {
+		perror( "bind" );
+		exit( EXIT_FAILURE );
+	}
+
+
+     	listen(sock,1);
+
+	struct sockaddr_in cli_addr;
+     	__socklen_t clilen = sizeof(cli_addr);
+	int newsock = accept(sock, (struct sockaddr *) &cli_addr, &clilen);
+     	if (newsock < 0) {
+		perror( "accept" );
+		exit( EXIT_FAILURE );
+	}
+
+	io_uring_queue_init( 16, &ring, 0 );
+
+      int epollfd = epoll_create1(0);
+      if (epollfd == -1) {
+            perror("epoll_create1");
+            exit(EXIT_FAILURE);
+      }
+
+      ev.events = EPOLLIN | EPOLLONESHOT;
+      ev.data.u64 = (uint64_t)&ring;
+      if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ring.ring_fd, &ev) == -1) {
+            perror("epoll_ctl: first");
+            exit(EXIT_FAILURE);
+      }
+
+
+	async_read( newsock );
+
+	while(1) {
+            BLOCK:
+            int nfds = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+            if (nfds == -1) {
+                  perror("epoll_wait");
+                  exit(EXIT_FAILURE);
+            }
+
+
+		while(1) {
+                  struct io_uring_cqe * cqe;
+                  int ret = io_uring_peek_cqe( &ring, &cqe );
+
+                  if( ret < 0 ) {
+                        if(-ret == EAGAIN) {
+                              if (epoll_ctl(epollfd, EPOLL_CTL_MOD, ring.ring_fd, &ev) == -1) {
+                                    perror("epoll_ctl: loop");
+                                    exit(EXIT_FAILURE);
+                              }
+                              goto BLOCK;
+                        }
+                        printf( "Main Loop Error : %s\n", strerror(-ret) );
+                        close( sock );
+                        exit( EXIT_FAILURE );
+                  }
+
+                  switch(cqe->user_data) {
+                        // Read completed
+                        case 0:
+                              // If it is the end of file we are done
+                              if( cqe->res == 0 ) {
+                                    goto END;
+                              }
+
+                              if( cqe->res < 0 ) {
+                                    perror( "Main Loop Error" );
+                                    close( sock );
+                                    exit( EXIT_FAILURE );
+                              }
+
+                              printf("'%.*s'\n", cqe->res, data);
+
+                              async_read( newsock );
+
+                              // otherwise prepare a new read
+                              break;
+                        // Wait timed out, time to print
+                        // Requires Linux 5.4
+                        case LIBURING_UDATA_TIMEOUT:
+                              printf(".");
+                              break;
+                        // Problem
+                        default:
+                              printf("Unexpected user data : %llu", cqe->user_data);
+                              exit( EXIT_FAILURE );
+                  }
+
+                  io_uring_cqe_seen( &ring, cqe );
+            }
+	}
+END:
+
+	io_uring_queue_exit( &ring );
+
+	ret = close(newsock);
+      if(ret < 0) {
+            perror( "close new" );
+            exit( EXIT_FAILURE );
+      }
+
+	ret = close(sock);
+      if(ret < 0) {
+            perror( "close old" );
+            exit( EXIT_FAILURE );
+      }
+
+	return 0;
+}
Index: tests/zombies/io_uring.txt
===================================================================
--- tests/zombies/io_uring.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/io_uring.txt	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,1 @@
+Hello World!
Index: tests/zombies/it_out.c
===================================================================
--- tests/zombies/it_out.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/it_out.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,69 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// it_out.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Mar  8 22:14:39 2016
+// Update Count     : 8
+//
+
+typedef unsigned long streamsize_type;
+
+trait ostream( dtype os_type ) {
+	os_type *write( os_type *, const char *, streamsize_type );
+	int fail( os_type * );
+};
+
+trait writeable( otype T ) {
+	forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, T );
+};
+
+forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, char );
+forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, int );
+forall( dtype os_type | ostream( os_type ) ) os_type * ?<<?( os_type *, const char * );
+
+trait istream( dtype is_type ) {
+	is_type *read( is_type *, char *, streamsize_type );
+	is_type *unread( is_type *, char );
+	int fail( is_type * );
+	int eof( is_type * );
+};
+
+trait readable( otype T ) {
+	forall( dtype is_type | istream( is_type ) ) is_type * ?<<?( is_type *, T );
+};
+
+forall( dtype is_type | istream( is_type ) ) is_type * ?>>?( is_type *, char* );
+forall( dtype is_type | istream( is_type ) ) is_type * ?>>?( is_type *, int* );
+
+trait iterator( otype iterator_type, otype elt_type ) {
+	iterator_type ?++( iterator_type* );
+	iterator_type ++?( iterator_type* );
+	int ?==?( iterator_type, iterator_type );
+	int ?!=?( iterator_type, iterator_type );
+
+	lvalue elt_type *?( iterator_type );
+};
+
+forall( otype elt_type | writeable( elt_type ),
+		otype iterator_type | iterator( iterator_type, elt_type ),
+		dtype os_type | ostream( os_type ) )
+void write_all( iterator_type begin, iterator_type end, os_type *os );
+
+forall( otype elt_type | writeable( elt_type ),
+		otype iterator_type | iterator( iterator_type, elt_type ),
+		dtype os_type | ostream( os_type ) )
+void write_all( elt_type begin, iterator_type end, os_type *os ) {
+	os << begin;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa it_out.c" //
+// End: //
Index: tests/zombies/multicore.c
===================================================================
--- tests/zombies/multicore.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/multicore.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,25 @@
+#include <kernel.hfa>
+#include <thread.hfa>
+
+struct MyThread { thread_desc __thrd; };
+
+DECL_THREAD(MyThread);
+
+void ?{}( MyThread * this ) {}
+
+void main( MyThread* this ) {
+	for(int i = 0; i < 1000000; i++) {
+		yield();
+	}
+}
+
+int main(int argc, char* argv[]) {
+	// sout | "User main begin";
+	{
+		processor p;
+		{
+			scoped(MyThread) f[4];
+		}
+	}
+	// sout | "User main end";
+}
Index: tests/zombies/new.c
===================================================================
--- tests/zombies/new.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/new.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,32 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// new.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Mar  8 22:13:20 2016
+// Update Count     : 4
+//
+
+forall( otype T )
+void f( T *t ) {
+	t--;
+	*t;
+	++t;
+	t += 2;
+	t + 2;
+	--t;
+	t -= 2;
+	t - 4;
+	t[7];
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa new.c" //
+// End: //
Index: tests/zombies/poly-bench.c
===================================================================
--- tests/zombies/poly-bench.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/poly-bench.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,207 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// poly-bench.cc -- 
+//
+// Author           : Aaron Moss
+// Created On       : Sat May 16 07:26:30 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed May 27 18:25:19 2015
+// Update Count     : 5
+//
+
+extern "C" {
+#include <stdio.h>
+//#include "my_time.h"
+}
+
+#define N 200000000
+
+struct ipoint {
+	int x;
+	int y;
+};
+
+struct ipoint ?+?(struct ipoint a, struct ipoint b) {
+	struct ipoint r;
+	r.x = a.x + b.x;
+	r.y = a.y + b.y;
+	return r;
+}
+
+struct ipoint ?-?(struct ipoint a, struct ipoint b) {
+	struct ipoint r;
+	r.x = a.x - b.x;
+	r.y = a.y - b.y;
+	return r;
+}
+
+struct ipoint ?*?(struct ipoint a, struct ipoint b) {
+	struct ipoint r;
+	r.x = a.x * b.x;
+	r.y = a.y * b.y;
+	return r;
+}
+
+struct dpoint {
+	double x;
+	double y;
+};
+
+struct dpoint ?+?(struct dpoint a, struct dpoint b) {
+	struct dpoint r;
+	r.x = a.x + b.x;
+	r.y = a.y + b.y;
+	return r;
+}
+
+struct dpoint ?-?(struct dpoint a, struct dpoint b) {
+	struct dpoint r;
+	r.x = a.x - b.x;
+	r.y = a.y - b.y;
+	return r;
+}
+
+struct dpoint ?*?(struct dpoint a, struct dpoint b) {
+	struct dpoint r;
+	r.x = a.x * b.x;
+	r.y = a.y * b.y;
+	return r;
+}
+
+int a2b2_mono_int(int a, int b) {
+	return (a - b)*(a + b);
+}
+
+double a2b2_mono_double(double a, double b) {
+	return (a - b)*(a + b);
+}
+
+struct ipoint a2b2_mono_ipoint(struct ipoint a, struct ipoint b) {
+	return (a - b)*(a + b);
+}
+
+struct dpoint a2b2_mono_dpoint(struct dpoint a, struct dpoint b) {
+	return (a - b)*(a + b);
+}
+
+forall(type T | { T ?+?(T,T); T ?-?(T,T); T ?*?(T,T); })
+T a2b2_poly(T a, T b) {
+	return (a - b)*(a + b);
+}
+
+typedef int clock_t;
+long ms_between(clock_t start, clock_t end) {
+//	return (end - start) / (CLOCKS_PER_SEC / 1000);
+	return 0;
+}
+int clock() { return 3; }
+
+int main(int argc, char** argv) {
+	clock_t start, end;
+	int i;
+	
+	int a, b;
+	double c, d;
+	struct ipoint p, q;
+	struct dpoint r, s;
+	
+	printf("\n## a^2-b^2 ##\n");
+	
+	a = 5, b = 3;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		a = a2b2_mono_int(a, b);
+		b = a2b2_mono_int(b, a);
+	}
+	end = clock();
+	printf("mono_int:   %7ld  [%d,%d]\n", ms_between(start, end), a, b);
+	
+	a = 5, b = 3;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		a = a2b2_poly(a, b);
+		b = a2b2_poly(b, a);
+	}
+	end = clock();
+	printf("poly_int:   %7ld  [%d,%d]\n", ms_between(start, end), a, b);
+	
+/*	{
+	a = 5, b = 3;
+	// below doesn't actually work; a2b2_poly isn't actually assigned, just declared
+	* [int] (int, int) a2b2_poly = a2b2_mono_int;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+//			printf("\t[%d,%d]\n", a, b);
+a = a2b2_poly(a, b);
+//			printf("\t[%d,%d]\n", a, b);
+b = a2b2_poly(b, a);
+}
+end = clock();
+printf("spec_int:   %7ld  [%d,%d]\n", ms_between(start, end), a, b);
+}
+*/	
+	c = 5.0, d = 3.0;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		c = a2b2_mono_double(c, d);
+		d = a2b2_mono_double(d, c);
+	}
+	end = clock();
+	printf("mono_double:%7ld  [%f,%f]\n", ms_between(start, end), c, d);
+		
+	c = 5.0, d = 3.0;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		c = a2b2_poly(c, d);
+		d = a2b2_poly(d, c);
+	}
+	end = clock();
+	printf("poly_double:%7ld  [%f,%f]\n", ms_between(start, end), c, d);
+	
+	p.x = 5, p.y = 5, q.x = 3, q.y = 3;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		p = a2b2_mono_ipoint(p, q);
+		q = a2b2_mono_ipoint(q, p);
+	}
+	end = clock();
+	printf("mono_ipoint:%7ld  [(%d,%d),(%d,%d)]\n", ms_between(start, end), p.x, p.y, q.x, q.y);
+		
+	p.x = 5, p.y = 5, q.x = 3, q.y = 3;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		p = a2b2_poly(p, q);
+		q = a2b2_poly(q, p);
+	}
+	end = clock();
+	printf("poly_ipoint:%7ld  [(%d,%d),(%d,%d)]\n", ms_between(start, end), p.x, p.y, q.x, q.y);
+	
+	r.x = 5.0, r.y = 5.0, s.x = 3.0, s.y = 3.0;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		r = a2b2_mono_dpoint(r, s);
+		s = a2b2_mono_dpoint(s, r);
+	}
+	end = clock();
+	printf("mono_dpoint:%7ld  [(%f,%f),(%f,%f)]\n", ms_between(start, end), r.x, r.y, s.x, s.y);
+		
+	r.x = 5.0, r.y = 5.0, s.x = 3.0, s.y = 3.0;
+	start = clock();
+	for (i = 0; i < N/2; ++i) {
+		r = a2b2_poly(r, s);
+		s = a2b2_poly(s, r);
+	}
+	end = clock();
+	printf("poly_dpoint:%7ld  [(%f,%f),(%f,%f)]\n", ms_between(start, end), r.x, r.y, s.x, s.y);
+
+	return 0;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa poly-bench.c" //
+// End: //
Index: tests/zombies/prolog.c
===================================================================
--- tests/zombies/prolog.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/prolog.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,50 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// prolog.c --
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Dec 11 23:27:19 2018
+// Update Count     : 6
+//
+
+#include <fstream.hfa>
+
+void printResult( int x ) { sout | "int"; }
+void printResult( double x ) { sout | "double"; }
+void printResult( char * x ) { sout | "char*"; }
+
+void is_arithmetic( int x ) {}
+void is_arithmetic( double x ) {}
+
+void is_integer( int x ) {}
+
+trait ArithmeticType( otype T ) {
+	void is_arithmetic( T );
+};
+
+trait IntegralType( otype T | ArithmeticType( T ) ) {
+	void is_integer( T );
+};
+
+forall( otype T | IntegralType( T ) | { void printResult( T ); } )
+void hornclause( T param ) {
+	printResult( param );
+}
+
+int main() {
+	int x;
+	double x;
+	char * x;
+	hornclause( x );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa prolog.c" //
+// End: //
Index: tests/zombies/quad.c
===================================================================
--- tests/zombies/quad.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/quad.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,36 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// quad.c --
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Dec 11 23:26:58 2018
+// Update Count     : 9
+//
+
+#include <fstream.hfa>
+
+forall( otype T | { T ?*?( T, T ); } )
+T square( T t ) {
+	return t * t;
+}
+
+forall( otype U | { U square( U ); } )
+U quad( U u ) {
+	return square( square( u ) );
+}
+
+int main() {
+	int N = 2;
+	sout | "result of quad of" | N | "is" | quad( N );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa quad.c" //
+// End: //
Index: tests/zombies/s.c
===================================================================
--- tests/zombies/s.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/s.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,28 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// s.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Sun Jan  3 22:38:45 2016
+// Update Count     : 3
+//
+
+//int ?!=?( int, int );
+
+void f() {
+	int a;
+	a ? 4 : 5;
+	1 ? 4 : 5;
+	0 ? 4 : 5;
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa s.c" //
+// End: //
Index: tests/zombies/simplePoly.c
===================================================================
--- tests/zombies/simplePoly.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/simplePoly.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,34 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// simplePoly.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Mar  8 22:06:41 2016
+// Update Count     : 3
+//
+
+forall( otype T, otype U | { T f( T, U ); } )
+T q( T t, U u ) {
+	return f( t, u );
+//  return t;
+}
+
+int f( int, double* );
+
+void g( void ) {
+	int y;
+	double x;
+//  if ( y )
+	q( 3, &x );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa simplePoly.c" //
+// End: //
Index: tests/zombies/simpler.c
===================================================================
--- tests/zombies/simpler.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/simpler.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,25 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// simpler.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Mar  8 22:06:30 2016
+// Update Count     : 2
+//
+
+forall( otype T ) T id( T, T );
+
+int main() {
+	id( 0, 7 );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa simpler.c" //
+// End: //
Index: tests/zombies/specialize.c
===================================================================
--- tests/zombies/specialize.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/specialize.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,59 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// specialize.c -- 
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Mar  8 22:06:17 2016
+// Update Count     : 3
+//
+
+/// void f( const int * );
+/// 
+/// void m()
+/// {
+///   f( 0 );
+/// }
+
+/// forall( dtype T ) T* f( T* );
+/// void g( int* (*)(int*) );
+/// 
+/// int m() {
+///   g( f );
+/// }
+
+/// void f1( void (*q)( forall( dtype U ) U* (*p)( U* ) ) );
+/// void g1( int* (*)(int*) );
+/// 
+/// int m1() {
+///   f1( g1 );
+/// }
+
+extern "C" {
+	int printf( const char*, ... );
+}
+
+forall( otype T ) T f( T t )
+{
+	printf( "in f; sizeof T is %d\n", sizeof( T ) );
+	return t;
+}
+
+void g( int (*p)(int) )
+{
+	printf( "g: f(7) returned %d\n", f(7) );
+}
+
+int main() {
+	g( f );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa specialize.c" //
+// End: //
Index: tests/zombies/square.c
===================================================================
--- tests/zombies/square.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/square.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,71 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// square.c --
+//
+// Author           : Richard C. Bilson
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Dec 11 23:28:24 2018
+// Update Count     : 28
+//
+
+#include <fstream.hfa>
+
+forall( otype T | { T ?*?( T, T ); } )
+T square( T t ) {
+	return t * t;
+} // square
+
+int main() {
+#if 0
+	sout | "result of squaring 9 is ";
+
+	// char does not have multiplication.
+	char ?*?( char a1, char a2 ) {
+		return (char)((int)a1 * (int)a2);
+	} // ?*?
+	char c = 9;
+	sout | "char\t\t\t" | square( c );
+
+	sout | square( s );
+#endif
+	short s = 9;
+	square( s );
+#if 0
+	signed int i = 9;
+	sout | "signed int\t\t" | square( i );
+
+	unsigned int ui = 9;
+	sout | "unsigned int\t\t" | square( ui );
+
+	long int li = 9;
+	sout | "signed long int\t\t" | square( li );
+
+	unsigned long int uli = 9;
+	sout | "unsigned long int\t" | square( uli );
+
+	signed long long int lli = 9;
+	sout | "signed long long int\t" | square( lli );
+
+	unsigned long long int ulli = 9;
+	sout | "unsigned long long int\t" | square( ulli );
+
+	float f = 9.0;
+	sout | "float\t\t\t" | square( f );
+
+	double d = 9.0;
+	sout | "double\t\t\t" | square( d );
+
+	long double ld = 9.0;
+	sout | "long double\t\t" | square( ld );
+#endif
+} // main
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa square.c" //
+// End: //
Index: tests/zombies/structMember.cfa
===================================================================
--- tests/zombies/structMember.cfa	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tests/zombies/structMember.cfa	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -53,4 +53,5 @@
 // C useless declarations
 
+#ifdef ERROR
 	int;
 	TD;
@@ -70,4 +71,5 @@
 	W(int);
 	W(int).X;
+#endif // ERROR
 };
 
Index: tests/zombies/twice.c
===================================================================
--- tests/zombies/twice.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/twice.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,36 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// twice.c --
+//
+// Author           : Peter A. Buhr
+// Created On       : Wed May 27 17:56:53 2015
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Tue Dec 11 23:28:08 2018
+// Update Count     : 47
+//
+
+#include <fstream.hfa>
+
+forall( otype T | { T ?+?( T, T ); } )
+T twice( const T t ) {
+	return t + t;
+}
+
+// char does not have addition
+char ?+?( char op1, char op2 ) { return (int)op1 + op2; } // cast forces integer addition or recursion
+
+// signed char does not have addition
+signed char ?+?( signed char op1, signed char op2 ) { return (int)op1 + op2; } // cast forces integer addition or recursion
+
+int main( void ) {
+	sout | twice( ' ' ) | ' ' | twice( (signed char)0 ) | twice( (int)1 ) | twice( 3.2 );
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "cfa twice.c" //
+// End: //
Index: tests/zombies/wrapper/.gitignore
===================================================================
--- tests/zombies/wrapper/.gitignore	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/wrapper/.gitignore	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,3 @@
+.tags
+build/
+test
Index: tests/zombies/wrapper/premake4.lua
===================================================================
--- tests/zombies/wrapper/premake4.lua	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/wrapper/premake4.lua	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,79 @@
+#!lua
+
+-- Additional Linux libs: "X11", "Xxf86vm", "Xi", "Xrandr", "stdc++"
+
+includeDirList = {
+	"src/",
+	"../",
+}
+
+libDirectories = {
+
+}
+
+
+if os.get() == "linux" then
+    linkLibs = {
+	"bsd"
+    }
+end
+
+-- Build Options:
+buildOptions = {"\n  CC = cfa\n  CXX = cfa"}
+
+solution "strings"
+	configurations  { "debug", "release",
+				"cproc-debug", "cproc-release",
+				"cfa-debug", "cfa-release" }
+
+	project "test"
+		kind "ConsoleApp"
+		language "C"
+		location "build"
+		objdir "build"
+		targetdir "."
+		buildoptions (buildOptions)
+		defines {	"bool=_Bool",
+				"\"true=((_Bool)(const signed int)1)\"",
+				"\"false=((_Bool)(const signed int)0)\"",
+				"_GNU_SOURCE",
+				"__cforall",
+				"USE_BSD_LIB"
+			}
+		libdirs (libDirectories)
+		links (linkLibs)
+		linkoptions (linkOptionList)
+		includedirs (includeDirList)
+		files { "src/**.c" }
+
+	configuration "debug"
+		defines { "DEBUG" }
+		flags { "Symbols" }
+
+	configuration "release"
+		defines { "NDEBUG" }
+		flags { "Optimize" }
+
+	configuration "cproc-debug"
+		buildoptions ({"-E"})
+		linkoptions ({"-E"})
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
+
+	configuration "cproc-release"
+		buildoptions ({"-E"})
+		linkoptions ({"-E"})
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
+
+	configuration "cfa-debug"
+		linkoptions ({"-E"})
+		files { "build/cproc-debug/*.o" }
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
+
+	configuration "cfa-release"
+		linkoptions ({"-E"})
+		files { "build/cproc-debug/*.o" }
+	      defines { "DEBUG" }
+	      flags { "Symbols" }
Index: tests/zombies/wrapper/src/main.c
===================================================================
--- tests/zombies/wrapper/src/main.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/wrapper/src/main.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,16 @@
+#include "pointer.h"
+
+wrapper_t make_copy(wrapper_t copy) {
+	return copy;
+}
+
+int main(int argc, char const *argv[]) {
+	wrapper_t p = wrap(6);
+	sout | nl | "test started";
+	wrapper_t p2 = p;
+	clear(&p);
+	p = p2;
+	wrapper_t p3 = make_copy(p2);
+	sout | nl | "test ended";
+	return 0;
+}
Index: tests/zombies/wrapper/src/pointer.h
===================================================================
--- tests/zombies/wrapper/src/pointer.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/wrapper/src/pointer.h	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,122 @@
+#pragma once
+
+#include <fstream.hfa>
+#include <stddef.h>
+#include <stdlib.hfa>
+
+//==============================================================================
+// type safe malloc / free
+
+forall(otype T)
+T* new()
+{
+	T* p = malloc();
+	p{};
+	return p;
+}
+
+forall(otype T)
+void delete(T* p)
+{
+	^p{};
+	free(p);
+}
+
+//==============================================================================
+// ref counter content
+
+struct content_t
+{
+	int value;
+	size_t count;
+};
+
+void ?{}(content_t* this)
+{
+	sout | "Constructing content";
+	this->count = 0;
+}
+
+void ^?{}(content_t* this)
+{
+	sout | "Destroying content";
+}
+
+//==============================================================================
+// ref counter wrapper
+
+struct wrapper_t
+{
+	content_t* ptr;
+};
+
+void ?{}(wrapper_t* this)
+{
+	sout | "Constructing empty ref pointer" | nl;
+	this->ptr = NULL;
+}
+
+void ?{}(wrapper_t* this, wrapper_t rhs)
+{
+	sout | "Constructing ref pointer from copy";
+	this->ptr = rhs.ptr;
+	this->ptr->count++;
+	sout | "Reference is " | this->ptr->count | nl;
+}
+
+void ^?{}(wrapper_t* this)
+{
+	if(this->ptr)
+	{
+		sout | "Destroying ref pointer";
+		this->ptr->count--;
+		sout | "Reference is " | this->ptr->count | nl;
+		if(!this->ptr->count) delete(this->ptr);
+	}
+	else
+	{
+		sout | "Destroying empty ref pointer" | nl;
+	}
+}
+
+wrapper_t ?=?(wrapper_t* this, wrapper_t rhs)
+{
+	sout | "Setting ref pointer";
+	if(this->ptr)
+	{
+		this->ptr->count--;
+		sout | "Reference is " | this->ptr->count | nl;
+		if(!this->ptr->count) delete(this->ptr);
+	}
+	this->ptr = rhs.ptr;
+	this->ptr->count++;
+	sout | "Reference is " | this->ptr->count | nl;
+}
+
+void set(wrapper_t* this, content_t* c)
+{
+	this->ptr = c;
+	this->ptr->count++;
+	sout | "Setting ref pointer";
+	sout | "Reference is " | this->ptr->count | nl;
+}
+
+void clear(wrapper_t* this)
+{
+	sout | "Clearing ref pointer";
+	this->ptr->count--;
+	sout | "Reference is " | this->ptr->count | nl;
+	if(!this->ptr->count) delete(this->ptr);
+	this->ptr = NULL;
+}
+
+
+wrapper_t wrap(int val)
+{
+	wrapper_t w;
+	content_t* c = malloc();
+	c{};
+	c->value = val;
+	set(&w, c);
+	return w;
+}
Index: tests/zombies/zero_one.c
===================================================================
--- tests/zombies/zero_one.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
+++ tests/zombies/zero_one.c	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -0,0 +1,24 @@
+#include <fstream.hfa>
+
+void foo(zero_t o)
+{
+	sout | "It's a Zero!";
+}
+
+void foo(one_t o)
+{
+	sout | "It's a One!";
+}
+
+void foo(int o)
+{
+	sout | "It's a Number!";
+}
+
+int main()
+{
+	foo(0);
+	foo(1);
+	foo(2);
+	return 0;
+}
Index: tools/gdb/utils-gdb.py
===================================================================
--- tools/gdb/utils-gdb.py	(revision 33c3dedce60b8ef643a2e67759069cd5d6be774b)
+++ tools/gdb/utils-gdb.py	(revision 223a63306c486d5cffd07812bcf08afea940d3c4)
@@ -44,8 +44,4 @@
 STACK = []
 
-# A global variable to keep all system task name
-SysTask_Name = ["uLocalDebuggerReader", "uLocalDebugger", "uProcessorTask", "uBootTask", "uSystemTask",
-"uProcessorTask", "uPthread", "uProfiler"]
-
 not_supported_error_msg = "Not a supported command for this language"
 
@@ -101,4 +97,76 @@
 	return cluster_root
 
+def get_sched_lock():
+	"""
+	Return: gdb.Value of __scheduler_lock
+	"""
+	lock = gdb.parse_and_eval('_X16__scheduler_lockPS20__scheduler_RWLock_t_1')
+	if lock.address == 0x0:
+		print('No scheduler lock, program terminated')
+	return lock
+
+def all_clusters():
+	if not is_cforall():
+		return None
+
+	cluster_root = get_cluster_root()
+	if cluster_root.address == 0x0:
+		return
+
+	curr = cluster_root
+	ret = [curr]
+
+	while True:
+		curr = curr['_X4nodeS26__cluster____dbg_node_cltr_1']['_X4nextPS7cluster_1']
+		if curr == cluster_root:
+			break
+
+		ret.append(curr)
+
+	return ret
+
+def all_processors():
+	if not is_cforall():
+		return None
+
+	cfa_t = get_cfa_types()
+
+	# get processors from registration to the RWlock
+	lock = get_sched_lock()
+
+	#get number of elements
+	count = lock['_X5readyVj_1']
+
+	#find all the procs
+	raw_procs = [lock['_X4dataPS21__scheduler_lock_id_t_1'][i]['_X6handleVPS16__processor_id_t_1'] for i in range(count)]
+
+	# pre cast full procs
+	procs = [p.cast(cfa_t.processor_ptr) for p in raw_procs if p['_X9full_procb_1']]
+
+	# sort procs by clusters
+	return sorted(procs, key=lambda p: p['_X4cltrPS7cluster_1'])
+
+def tls_for_pthread(pthrd):
+	prev = gdb.selected_thread()
+	inf = gdb.selected_inferior()
+
+	thrd = inf.thread_from_thread_handle( pthrd )
+	thrd.switch()
+	tls = gdb.parse_and_eval('&_X9kernelTLSS16KernelThreadData_1')
+
+	prev.switch()
+	return tls
+
+def tls_for_proc(proc):
+	return tls_for_pthread(proc['_X13kernel_threadm_1'])
+
+def thread_for_pthread(pthrd):
+	return tls_for_pthread(pthrd)['_X11this_threadVPS7$thread_1']
+
+def thread_for_proc(proc):
+	return tls_for_proc(proc)['_X11this_threadVPS7$thread_1']
+
+
+
 def find_curr_thread():
 	# btstr = gdb.execute('bt', to_string = True).splitlines()
@@ -108,25 +176,4 @@
 	# return btstr[0].split('this=',1)[1].split(',')[0].split(')')[0]
 	return None
-
-def all_clusters():
-	if not is_cforall():
-		return None
-
-	cluster_root = get_cluster_root()
-	if cluster_root.address == 0x0:
-		return
-
-	curr = cluster_root
-	ret = [curr]
-
-	while True:
-		curr = curr['_X4nodeS26__cluster____dbg_node_cltr_1']['_X4nextPS7cluster_1']
-		if curr == cluster_root:
-			break
-
-		ret.append(curr)
-
-	return ret
-
 
 def lookup_cluster(name = None):
@@ -239,6 +286,5 @@
 	"""Cforall: Display currently known processors
 Usage:
-	info processors                 : print out all the processors in the Main Cluster
-	info processors all             : print out all processors in all clusters
+	info processors                 : print out all the processors
 	info processors <cluster_name>  : print out all processors in a given cluster
 """
@@ -247,32 +293,31 @@
 		super(Processors, self).__init__('info processors', gdb.COMMAND_USER)
 
-	def print_processor(self, name, status, pending, address):
-		print('{:>20}  {:>11}  {:>13}  {:>20}'.format(name, status, pending, address))
-
-	def iterate_procs(self, root, active):
-		if root == 0x0:
-			return
-
-		cfa_t = get_cfa_types()
-		curr = root
-
-		while True:
-			processor = curr
-			should_stop = processor['_X12do_terminateVb_1']
+	def print_processor(self, processor):
+		should_stop = processor['_X12do_terminateVb_1']
+		if not should_stop:
+			midle = processor['_X6$linksS7$dlinks_S9processor__1']['_X4nextS9$mgd_link_Y13__tE_generic___1']['_X4elemPY13__tE_generic__1'] != 0x0
+			end   = processor['_X6$linksS7$dlinks_S9processor__1']['_X4nextS9$mgd_link_Y13__tE_generic___1']['_X10terminatorPv_1'] != 0x0
+
+			status = 'Idle' if midle or end else 'Active'
+		else:
 			stop_count  = processor['_X10terminatedS9semaphore_1']['_X5counti_1']
-			if not should_stop:
-				status = 'Active' if active else 'Idle'
-			else:
-				status_str  = 'Last Thread' if stop_count >= 0 else 'Terminating'
-				status      = '{}({},{})'.format(status_str, should_stop, stop_count)
-
-			self.print_processor(processor['_X4namePKc_1'].string(),
-					status, str(processor['_X18pending_preemptionb_1']), str(processor)
-				)
-
-			curr = curr['_X4nodeS28__processor____dbg_node_proc_1']['_X4nextPS9processor_1']
-
-			if curr == root or curr == 0x0:
-				break
+			status_str  = 'Last Thread' if stop_count >= 0 else 'Terminating'
+			status      = '{}({},{})'.format(status_str, should_stop, stop_count)
+
+		print('{:>20}  {:>11}  {:<7}  {:<}'.format(
+			processor['_X4namePKc_1'].string(),
+			status,
+			str(processor['_X18pending_preemptionb_1']),
+			str(processor)
+		))
+		tls = tls_for_proc( processor )
+		thrd = tls['_X11this_threadVPS7$thread_1']
+		if thrd != 0x0:
+			tname = '{} {}'.format(thrd['self_cor']['name'].string(), str(thrd))
+		else:
+			tname = None
+
+		print('{:>20}  {}'.format('Thread', tname))
+		print('{:>20}  {}'.format('TLS', tls))
 
 	#entry point from gdb
@@ -282,6 +327,4 @@
 
 		if not arg:
-			clusters = [lookup_cluster(None)]
-		elif arg == "all":
 			clusters = all_clusters()
 		else:
@@ -292,24 +335,19 @@
 			return
 
-		cfa_t = get_cfa_types()
-		for cluster in clusters:
-			print('Cluster: "{}"({})'.format(cluster['_X4namePKc_1'].string(), cluster.cast(cfa_t.cluster_ptr)))
-
-			active_root = cluster.cast(cfa_t.cluster_ptr) \
-					['_X5procsS8__dllist_S9processor__1'] \
-					['_X4headPY15__TYPE_generic__1'] \
-					.cast(cfa_t.processor_ptr)
-
-			idle_root = cluster.cast(cfa_t.cluster_ptr) \
-					['_X5idlesS8__dllist_S9processor__1'] \
-					['_X4headPY15__TYPE_generic__1'] \
-					.cast(cfa_t.processor_ptr)
-
-			if idle_root != 0x0 or active_root != 0x0:
-				self.print_processor('Name', 'Status', 'Pending Yield', 'Address')
-				self.iterate_procs(active_root, True)
-				self.iterate_procs(idle_root, False)
-			else:
-				print("No processors on cluster")
+		procs = all_processors()
+
+		print('{:>20}  {:>11}  {:<7}  {}'.format('Processor', '', 'Pending', 'Object'))
+		print('{:>20}  {:>11}  {:<7}  {}'.format('Name', 'Status', 'Yield', 'Address'))
+		cl = None
+		for p in procs:
+			# if this is a different cluster print it
+			if cl != p['_X4cltrPS7cluster_1']:
+				if cl:
+					print()
+				cl = p['_X4cltrPS7cluster_1']
+				print('Cluster {}'.format(cl['_X4namePKc_1'].string()))
+
+			# print the processor information
+			self.print_processor(p)
 
 		print()
