Index: INSTALL
===================================================================
--- INSTALL	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ INSTALL	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,32 +1,44 @@
-cfa-cc: The Cforall->C Compiler System
+cfa-cc: Cforall to C Trans-compiler
 ======================================
 
 Cforall is built using GNU Make and the GNU Autoconf system.  It also requires
-g++ version >= 4.6, bison and flex.  On systems where GNU Make is the default
+g++ version >= 6, bison and flex.  On systems where GNU Make is the default
 make, the system is built by entering the commands:
 
-For devs using the root git:
+For developers using the root git:
 
-  ./autogen.sh
-	./configure
-	make
-	make install
+  $ ./autogen.sh
+  $ ./configure [ --prefix=/some/directory ]
+  $ make -j 8 install
 
-For users using the distributed tarball:
+For users using the distributed tarball / github:
 
-	./configure
-	make
-	make install
+  $ ./configure
+  $ make -j 8 install
 
-Options for 'configure'
------------------------
-The script 'configure' accepts many command line arguments.  Run './configure
---help' to see a list of all of them.  This document attempts to summarize the
-most useful arguments.
+where 8 is the number of CPUs on your computer.
 
---prefix=/some/directory controls the path prefix common to all installed
-  cfa-cc components.  Some components are installed in /some/directory/bin,
-  others in /some/directory/lib.  If unspecified, this defaults to /usr/local.
-  To use (a subdirectory of) your home directory, ${HOME}/some/dir works, but
-  it is important not to put quotes around the directory path; Cforall may
-  appear to build, but the installed version may not work properly.
+
+Options for configure
+======================================
+The script 'configure' accepts many command-line arguments.  Run
+
+  $ ./configure --help
+
+to list them.  The most common argument is:
+
+  --prefix=/some/directory controls the path prefix common to all installed
+    cfa-cc components.  Components are installed in directories bin and lib.
+    If unspecified, prefix defaults to /usr/local.  To use (a subdirectory of)
+    your home directory, ${HOME}/some/dir, but do not put quotes around the
+    directory path; Cforall may appear to build, but the installed version may
+    not work properly.
+
+
+Build Test
+======================================
+
+  $ cd ./test
+  $ make -j 8 all-tests
+
+The tests take about 2-5 minutes and can be stopped at any time.
Index: Jenkins/Distribute
===================================================================
--- Jenkins/Distribute	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ Jenkins/Distribute	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -2,7 +2,4 @@
 
 import groovy.transform.Field
-
-// For skipping stages
-import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 
 //===========================================================================================================
@@ -10,47 +7,45 @@
 //===========================================================================================================
 
-node('master') {
-	// Globals
-	BuildDir  = pwd tmp: true
-	SrcDir    = pwd tmp: false
-	Settings  = null
-	Version   = ''
+// Globals
+BuildDir  = null
+SrcDir    = null
+Settings  = null
+Version   = ''
 
-	// Local variables
-	def err = null
-	def log_needed = false
+// Local variables
+def err = null
+def log_needed = false
 
-	currentBuild.result = "SUCCESS"
+currentBuild.result = "SUCCESS"
+
+final commit, build
+node {
 
 	//Wrap build to add timestamp to command line
 	wrap([$class: 'TimestamperBuildWrapper']) {
+		(commit, build) = prepare_build()
+	}
+}
 
-		final commit, build
-		(commit, build) = prepare_build()
-
-		node('x64') {
-			BuildDir  = pwd tmp: true
-			SrcDir    = pwd tmp: false
-
-			Tools.Clean()
-
-			Tools.Checkout( commit )
-
-			Version = GetVersion( build )
-
-			Configure()
-
-			Package()
-
-			Test()
-
-			Archive()
-		}
-
-		// Update the build directories when exiting the node
+node('x64') {
+	//Wrap build to add timestamp to command line
+	wrap([$class: 'TimestamperBuildWrapper']) {
 		BuildDir  = pwd tmp: true
 		SrcDir    = pwd tmp: false
+
+		Tools.Clean()
+
+		Tools.Checkout( commit )
+
+		Version = GetVersion( build )
+
+		Configure()
+
+		Package()
+
+		Test()
+
+		Archive()
 	}
-
 }
 
Index: Jenkins/FullBuild
===================================================================
--- Jenkins/FullBuild	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ Jenkins/FullBuild	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -5,5 +5,5 @@
 //===========================================================================================================
 
-node ('master') {
+node {
 	def err = null
 
@@ -18,13 +18,13 @@
 
 				parallel (
-					gcc_8_x86_new: { trigger_build( 'gcc-8',   'x86' ) },
-					gcc_7_x86_new: { trigger_build( 'gcc-7',   'x86' ) },
-					gcc_6_x86_new: { trigger_build( 'gcc-6',   'x86' ) },
-					gcc_9_x64_new: { trigger_build( 'gcc-9',   'x64' ) },
-					gcc_8_x64_new: { trigger_build( 'gcc-8',   'x64' ) },
-					gcc_7_x64_new: { trigger_build( 'gcc-7',   'x64' ) },
-					gcc_6_x64_new: { trigger_build( 'gcc-6',   'x64' ) },
-					gcc_5_x64_new: { trigger_build( 'gcc-5',   'x64' ) },
-					clang_x64_new: { trigger_build( 'clang',   'x64' ) },
+					gcc_08_x86_new: { trigger_build( 'gcc-8',   'x86' ) },
+					gcc_07_x86_new: { trigger_build( 'gcc-7',   'x86' ) },
+					gcc_06_x86_new: { trigger_build( 'gcc-6',   'x86' ) },
+					gcc_10_x64_new: { trigger_build( 'gcc-10',  'x64' ) },
+					gcc_09_x64_new: { trigger_build( 'gcc-9',   'x64' ) },
+					gcc_08_x64_new: { trigger_build( 'gcc-8',   'x64' ) },
+					gcc_07_x64_new: { trigger_build( 'gcc-7',   'x64' ) },
+					gcc_06_x64_new: { trigger_build( 'gcc-6',   'x64' ) },
+					clang_x64_new:  { trigger_build( 'clang',   'x64' ) },
 				)
 			}
@@ -106,5 +106,5 @@
 
 	if(result.result != 'SUCCESS') {
-		sh("wget -q -O - http://localhost:8084/jenkins/job/Cforall/job/master/${result.number}/consoleText")
+		sh("wget -q -O - https://cforall.uwaterloo.ca/jenkins/job/Cforall/job/master/${result.number}/consoleText")
 		error(result.result)
 	}
@@ -144,31 +144,33 @@
 //Email notification on a full build failure
 def promote_email(boolean success) {
-	echo('notifying users')
+	node {
+		echo('notifying users')
 
-	def result = success ? "PROMOTE - SUCCESS" : "PROMOTE - FAILURE"
+		def result = success ? "PROMOTE - SUCCESS" : "PROMOTE - FAILURE"
 
-	//Since tokenizer doesn't work, figure stuff out from the environnement variables and command line
-	//Configurations for email format
-	def email_subject = "[cforall git][${result}]"
-	def email_body = """<p>This is an automated email from the Jenkins build machine. It was
-generated following the result of the C\u2200 nightly build.</p>
+		//Since tokenizer doesn't work, figure stuff out from the environnement variables and command line
+		//Configurations for email format
+		def email_subject = "[cforall git][${result}]"
+		def email_body = """<p>This is an automated email from the Jenkins build machine. It was
+	generated following the result of the C\u2200 nightly build.</p>
 
-<p>Check console output at ${env.BUILD_URL} to view the results.</p>
+	<p>Check console output at ${env.BUILD_URL} to view the results.</p>
 
-<p>- Status --------------------------------------------------------------</p>
+	<p>- Status --------------------------------------------------------------</p>
 
-<p>${result}</p>
+	<p>${result}</p>
 
-<p>- Performance ---------------------------------------------------------</p>
+	<p>- Performance ---------------------------------------------------------</p>
 
-<img src="https://cforall.uwaterloo.ca/jenkins/job/Cforall/job/master/plot/Compilation/getPlot?index=0" >
-<img src="https://cforall.uwaterloo.ca/jenkins/job/Cforall/job/master/plot/Compilation/getPlot?index=1" >
+	<img src="https://cforall.uwaterloo.ca/jenkins/job/Cforall/job/master/plot/Compilation/getPlot?index=0" >
+	<img src="https://cforall.uwaterloo.ca/jenkins/job/Cforall/job/master/plot/Compilation/getPlot?index=1" >
 
-<p>- Logs ----------------------------------------------------------------</p>
-"""
+	<p>- Logs ----------------------------------------------------------------</p>
+	"""
 
-	def email_to = "cforall@lists.uwaterloo.ca"
+		def email_to = "cforall@lists.uwaterloo.ca"
 
-	//send email notification
-	emailext body: email_body, subject: email_subject, to: email_to, attachLog: !success
+		//send email notification
+		emailext body: email_body, subject: email_subject, to: email_to, attachLog: !success
+	}
 }
Index: Jenkins/Promote
===================================================================
--- Jenkins/Promote	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ Jenkins/Promote	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,76 @@
+#!groovy
+
+node {
+	// Globals
+	BuildDir   = pwd tmp: true
+	SrcDir     = pwd tmp: false
+	RemoteRepo = 'git@github.com:cforall/cforall.git'
+	ArchiveUrl = 'https://cforall.uwaterloo.ca/jenkins/job/Cforall_Distribute_Ref/lastSuccessfulBuild/artifact/*zip*/archive.zip'
+
+	// Local variables
+	def err = null
+	def log_needed = false
+
+	currentBuild.result = "SUCCESS"
+
+	//Wrap build to add timestamp to command line
+	wrap([$class: 'TimestamperBuildWrapper']) {
+
+		PrepRepo();
+
+		def name = GetArchive();
+
+		PushRepo(name);
+	}
+
+}
+
+def GetTarName() {
+	def files = findFiles(glob: 'archive/cfa-cc-*.tar.gz')
+	echo "found: ${files[0].name}"
+	return files[0].name - '.tar.gz';
+}
+
+def PrepRepo() {
+	stage('Clone') { // for display purposes
+		dir (BuildDir) {
+		    sh 'rm -rf *'
+			sshagent (credentials: ['github_key_jun1']) {
+				sh "git clone --bare ${RemoteRepo} repo"
+			}
+			dir ('repo') {
+				sh "mkdir .git"
+				sh "mv * .git"
+				sh "git init"
+			}
+		}
+	}
+}
+
+def GetArchive() {
+	def tarball
+	stage('Unzip') { // for display purposes
+		dir (BuildDir) {
+			sh "wget -q ${ArchiveUrl}"
+			sh "unzip archive.zip"
+			tarball = GetTarName();
+			sh "tar -xzf archive/${tarball}.tar.gz"
+			sh "mv ${tarball}/* repo/."
+		}
+	}
+
+	return tarball
+}
+
+def PushRepo(name) {
+	stage('Push') { // for display purposes
+		dir ("${BuildDir}/repo") {
+			sh "git add -A"
+			sh "git status"
+			sh "git diff-index --quiet HEAD || git commit -m 'Push from build machine: ${name}'"
+			sshagent (credentials: ['github_key_jun1']) {
+				sh "git push origin master"
+			}
+		}
+	}
+}
Index: Jenkins/tools.groovy
===================================================================
--- Jenkins/tools.groovy	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ Jenkins/tools.groovy	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -61,10 +61,5 @@
 }
 
-PrevGitOldRef = ''
-PrevGitNewRef = ''
-def GitLogMessage(String oldRef = '', String newRef = '') {
-	if (!oldRef) { if(!PrevGitOldRef) { return "\nERROR retrieveing current git information!\n"  } else { oldRef = PrevGitOldRef } }
-	if (!newRef) { if(!PrevGitNewRef) { return "\nERROR retrieveing previous git information!\n" } else { newRef = PrevGitNewRef } }
-
+def ConstructGitLogMessage(String oldRef, String newRef) {
 	def revText = sh(returnStdout: true, script: "git rev-list ${oldRef}..${newRef}").trim()
 	def revList = SplitLines( revText )
@@ -87,18 +82,11 @@
 	gitDiff = gitDiff.replace('[m', '</span>')
 
-	PrevGitOldRef = oldRef
-	PrevGitNewRef = newRef
+	return """
+<p>- Changes -------------------------------------------------------------</p>
 
-	return """
 <pre>
 The branch ${env.BRANCH_NAME} has been updated.
 ${gitUpdate}
 </pre>
-
-<p>Check console output at ${env.BUILD_URL} to view the results.</p>
-
-<p>- Status --------------------------------------------------------------</p>
-
-<p>BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}</p>
 
 <p>- Log -----------------------------------------------------------------</p>
@@ -116,3 +104,19 @@
 }
 
+EmailMessage = ''
+def GitLogMessage(String oldRef = '', String newRef = '') {
+	if(!EmailMessage) {
+		if (!oldRef) { return "\nERROR retrieveing current git information!\n"  }
+		if (!newRef) { return "\nERROR retrieveing previous git information!\n" }
+
+		echo "Constructing new git message"
+
+		EmailMessage = ConstructGitLogMessage(oldRef, newRef)
+	}
+	else {
+		echo "Reusing previously constructed message"
+	}
+	return EmailMessage;
+}
+
 return this;
Index: Jenkinsfile
===================================================================
--- Jenkinsfile	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ Jenkinsfile	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -7,75 +7,73 @@
 //===========================================================================================================
 
-node('master') {
-	// Globals
-	BuildDir  = pwd tmp: true
-	SrcDir    = pwd tmp: false
-	Settings  = null
-	Tools     = null
-
-	// Local variables
-	def err = null
-	def log_needed = false
-
-	currentBuild.result = "SUCCESS"
-
-	try {
+// Globals
+BuildDir  = null
+SrcDir    = null
+Settings  = null
+Tools     = null
+
+// Local variables
+def err = null
+def log_needed = false
+
+currentBuild.result = "SUCCESS"
+
+try {
+	node {
 		//Wrap build to add timestamp to command line
 		wrap([$class: 'TimestamperBuildWrapper']) {
-
 			Settings = prepare_build()
-
-			node(Settings.Architecture.node) {
-				BuildDir  = pwd tmp: true
-				SrcDir    = pwd tmp: false
-				currentBuild.description = "${currentBuild.description} on ${env.NODE_NAME}"
-
-				Tools.Clean()
-
-				Tools.Checkout()
-
-				build()
-
-				test()
-
-				benchmark()
-
-				build_doc()
-
-				publish()
-			}
-
-			// Update the build directories when exiting the node
+		}
+	}
+
+	node(Settings.Architecture.node) {
+		//Wrap build to add timestamp to command line
+		wrap([$class: 'TimestamperBuildWrapper']) {
 			BuildDir  = pwd tmp: true
 			SrcDir    = pwd tmp: false
-		}
-	}
-
-	//If an exception is caught we need to change the status and remember to
-	//attach the build log to the email
-	catch (Exception caughtError) {
-		// Store the result of the build log
-		currentBuild.result = "FAILURE"
-
-		// An error has occured, the build log is relevent
-		log_needed = true
-
-		// rethrow error later
-		err = caughtError
-
-		// print the error so it shows in the log
-		echo err.toString()
-	}
-
-	finally {
-		//Send email with final results if this is not a full build
-		email(log_needed)
-
-		echo 'Build Completed'
-
-		/* Must re-throw exception to propagate error */
-		if (err) {
-			throw err
-		}
+			currentBuild.description = "${currentBuild.description} on ${env.NODE_NAME}"
+
+			Tools.Clean()
+
+			Tools.Checkout()
+
+			build()
+
+			test()
+
+			benchmark()
+
+			build_doc()
+
+			publish()
+		}
+	}
+}
+
+//If an exception is caught we need to change the status and remember to
+//attach the build log to the email
+catch (Exception caughtError) {
+	// Store the result of the build log
+	currentBuild.result = "FAILURE"
+
+	// An error has occured, the build log is relevent
+	log_needed = true
+
+	// rethrow error later
+	err = caughtError
+
+	// print the error so it shows in the log
+	echo err.toString()
+}
+
+finally {
+	//Send email with final results if this is not a full build
+	email(log_needed)
+
+	echo 'Build Completed'
+
+	/* Must re-throw exception to propagate error */
+	if (err) {
+		throw err
 	}
 }
@@ -228,24 +226,31 @@
 //Standard build email notification
 def email(boolean log) {
-	//Since tokenizer doesn't work, figure stuff out from the environnement variables and command line
-	//Configurations for email format
-	echo 'Notifying users of result'
-
-	def project_name = (env.JOB_NAME =~ /(.+)\/.+/)[0][1].toLowerCase()
-	def email_subject = "[${project_name} git][BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}] - branch ${env.BRANCH_NAME}"
-	def email_body = """<p>This is an automated email from the Jenkins build machine. It was
+	node {
+		//Since tokenizer doesn't work, figure stuff out from the environnement variables and command line
+		//Configurations for email format
+		echo 'Notifying users of result'
+
+		def project_name = (env.JOB_NAME =~ /(.+)\/.+/)[0][1].toLowerCase()
+		def email_subject = "[${project_name} git][BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}] - branch ${env.BRANCH_NAME}"
+		def email_body = """<p>This is an automated email from the Jenkins build machine. It was
 generated because of a git hooks/post-receive script following
 a ref change which was pushed to the C\u2200 repository.</p>
+
+<p>- Status --------------------------------------------------------------</p>
+
+<p>BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}</p>
+<p>Check console output at ${env.BUILD_URL} to view the results.</p>
 """ + Tools.GitLogMessage()
 
-	def email_to = !Settings.IsSandbox ? "cforall@lists.uwaterloo.ca" : "tdelisle@uwaterloo.ca"
-
-	if( Settings && !Settings.Silent ) {
-		//send email notification
-		emailext body: email_body, subject: email_subject, to: email_to, attachLog: log
-	} else {
-		echo "Would send email to: ${email_to}"
-		echo "With title: ${email_subject}"
-		echo "Content: \n${email_body}"
+		def email_to = !Settings.IsSandbox ? "cforall@lists.uwaterloo.ca" : "tdelisle@uwaterloo.ca"
+
+		if( Settings && !Settings.Silent ) {
+			//send email notification
+			emailext body: email_body, subject: email_subject, to: email_to, attachLog: log
+		} else {
+			echo "Would send email to: ${email_to}"
+			echo "With title: ${email_subject}"
+			echo "Content: \n${email_body}"
+		}
 	}
 }
@@ -300,4 +305,10 @@
 	BuildSettings(java.util.Collections$UnmodifiableMap param, String branch) {
 		switch( param.Compiler ) {
+			case 'gcc-11':
+				this.Compiler = new CC_Desc('gcc-11', 'g++-11', 'gcc-11', '-flto=auto')
+			break
+			case 'gcc-10':
+				this.Compiler = new CC_Desc('gcc-10', 'g++-10', 'gcc-10', '-flto=auto')
+			break
 			case 'gcc-9':
 				this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
@@ -319,5 +330,5 @@
 			break
 			case 'clang':
-				this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-9', '-flto=thin -flto-jobs=0')
+				this.Compiler = new CC_Desc('clang', 'clang++-10', 'gcc-10', '-flto=thin -flto-jobs=0')
 			break
 			default :
@@ -390,5 +401,5 @@
 					description: 'Which compiler to use',					\
 					name: 'Compiler',									\
-					choices: 'gcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang',	\
+					choices: 'gcc-11\ngcc-10\ngcc-9\ngcc-8\ngcc-7\ngcc-6\ngcc-5\ngcc-4.9\nclang',	\
 					defaultValue: 'gcc-8',								\
 				],												\
Index: README
===================================================================
--- README	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ README	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,3 +1,3 @@
-cfa-cc: The Cforall->C Compiler System
+cfa-cc: Cforall to C Trans-compiler
 ======================================
 
@@ -6,4 +6,5 @@
 responsibility for the consequences of any malfunction of the software,
 including the malfunction of any programs compiled using the software.
+
 
 What is Cforall?
@@ -25,93 +26,77 @@
 into a modern programming language.
 
+
 What is cfa-cc?
 ---------------
-cfa-cc is a collection of programs centred around a translator that takes
-Cforall code as input and outputs corresponding C code.  This is complemented
-by a compiler driver in the style of "gcc", which handles preprocessing,
-compiling, assembling, and linking and invokes the translator at appropriate
-moments.
+cfa-cc is a collection of programs centred around a translator (trans-compiler)
+that takes Cforall code as input and outputs augmented C code that implements
+new features.  The translator is complemented by a compiler driver in the style
+of "gcc", which handles preprocessing (including cfa-cc after cpp), compiling,
+assembling, and linking.
 
-What is required in order to use cfa-cc?
+cfa-cc is currently written in C++, but will be eventually rewritten in Cforall.
+
+
+How to download and build cfa-cc?
 ----------------------------------------
-Building cfa-cc requires GNU Make and gcc/g++ 4.  cfa-cc is written in C++.
+Download cfa-cc using
 
-The compiler driver uses an installed version of gcc to handle all aspects of
-the compilation process except for the Cforall->C translation.  Currently, only
-gcc 4.x is supported.
+  $ git clone https://github.com/cforall/cforall.git
 
-How is cfa-cc used?
+Read the ./INSTALL file for build instructions.
+
+
+How to use cfa-cc?
 -------------------
-The compiler driver "cfa" accepts all of the arguments of gcc, and is used in
+The compiler driver "cfa" accepts all of the arguments for gcc, and is used in
 the same way.  For example:
 
-	cfa -c test.c
-	cfa test.o
+  cfa -c test.c
+  cfa test.o
 
-Cforall source files must end with '.c' in order to be compiled by the compiler
-driver.  In addition, the flag "-CFA" causes cfa to invoke the preprocessor and
-translator and send the translator output to standard output.
+Cforall source files may end with '.c' or '.cfa' in order to be compiled by the
+compiler driver.  In addition, the flag "-CFA" causes cfa to invoke the C
+preprocessor and Cforall translator and write the translator output to standard
+output.
 
-It is possible to invoke the translator directly.  The translator is installed
-by default as /usr/local/lib/cfa-cpp.  A typical invocation is:
 
-	/usr/local/lib/cfa-cpp -cp infile outfile
+How to use C code with cfa-cc?
+-----------------------------------
+cfa-cc should be able to compile and link most ANSI C programs with associated
+C standard libraries.
 
-If outfile is omitted, output goes to standard output; if infile is also
-omitted, input comes from standard input.  Options to the translator other than
-"-cp" will not produce valid C code and are only useful for debugging the
-translator.
+Like C++, Cforall supports overloading, resulting in duplicate names that are
+disambiguated using name mangling in the translated C code.  To prevent
+mangling of C names, it is necessary to wrap C declarations in an extern "C"
+block, as for C++.  For example:
 
-How can C code be used with cfa-cc?
------------------------------------
-cfa-cc should be able to compile most ANSI C programs.  It is also possible to
-link against C libraries in most cases.  Since Cforall supports overloading,
-however, names used in Cforall code are mangled in the output C code.  This
-caused linker failures when the names refer to functions and objects in code
-compiled with a standard C compiler.  For this reason, it is necessary to
-enclose the declarations of these functions and objects in extern "C" {}
-blocks.  For example:
-
-	extern "C" {
-	#include <stdio.h>
-	#include <stdlib.h>
-	}
+  extern "C" {
+  #include <curses.h>
+  #include <getopt.h>
+  }
 
 The extern "C" turns off name mangling for functions and objects declared
-within the block.  As a result, it is not possible to overload their names.
+within the block. All C standard headers are pre-wrapped, so most wrapping is
+unnecessary.
+
 
 What's wrong with cfa-cc?
 -------------------------
 
-The authors consider this software to be in an unstable state.  It is quite
-likely that there are many reasonable programs that will fail to compile.  We
-encourage users to report their experiences to cforall@plg.uwaterloo.ca, but we
-make no promises regarding support.
+The authors consider cfa-cc to be in a semi-stable state.  It is possible for
+reasonable Cforall programs to fail compilation.  A list of bugs and fixes is
+available here: https://cforall.uwaterloo.ca/trac.  We encourage users to
+report their experiences to cforall@plg.uwaterloo.ca, but we can make no
+promises regarding support.
 
-We have fixed most of the problems that we are aware of.  There are some
-exceptions:
+Also, the Cforall features web-page https://cforall.uwaterloo.ca/features lists
+small syntactic and semantic differences with standard C.
 
-- initializers are poorly implemented; in particular, file-scope initializers
-  may result in the generation of invalid C code
-
-- the ISO C99 designated initialization syntax '[n] = m' or '.n = m' is not
-  supported; use a colon in place of the equal sign
-
-- some legitimate programs will produce warnings from the C compiler; these are
-  harmless (in particular, the creation of libcfa.a in the build process should
-  cause four warnings from gcc)
-
-- abstract types introduced using the keyword 'type' are not implemented
-  (although 'type' can be used to introduce type parameters)
-
-- the implicit coercion of structure types to the type of their first member is
-  not implemented
 
 Who is responsible for cfa-cc?
 ------------------------------
-cfa-cc was written by Peter Buhr, Richard Bilson, and Rodolfo Esteves.
-Questions and comments can be sent to cforall@plg.uwaterloo.ca.
+Cforall was designed and implemented by Andrew Beach, Richard Bilson, Michael
+Brooks, Peter A. Buhr, Thierry Delisle Glen Ditchfield, Rodolfo G. Esteves,
+Aaron Moss, Colby Parsons, Rob Schluntz, Fangren Yu, Mubeen Zulfiqar, and others.
 
-The Cforall project maintains a web page:
-
-	https://cforall.uwaterloo.ca
+Check the Cforall web site https://cforall.uwaterloo.ca for news and updates.
Index: benchmark/io/io_uring.h
===================================================================
--- benchmark/io/io_uring.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ benchmark/io/io_uring.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,6 +1,3 @@
 extern "C" {
-	#ifndef _GNU_SOURCE         /* See feature_test_macros(7) */
-	#define _GNU_SOURCE         /* See feature_test_macros(7) */
-	#endif
 	#include <errno.h>
 	#include <stdio.h>
Index: doc/bibliography/pl.bib
===================================================================
--- doc/bibliography/pl.bib	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/bibliography/pl.bib	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1673,5 +1673,5 @@
     address	= {Waterloo Ontario, Canada},
     month	= sep,
-    year	= 2018,
+    year	= 2020,
     note	= {\href{https://plg.uwaterloo.ca/~usystem/pub/uSystem/uC++.pdf}{https://\-plg.uwaterloo.ca/\-$\sim$usystem/\-pub/\-uSystem/uC++.pdf}},
 }
@@ -4552,6 +4552,5 @@
     author	= {Martin Karsten},
     title	= {{libfibre:~User-Level Threading Runtime}},
-    howpublished= {\href{https://git.uwaterloo.ca/mkarsten/libfibre}
-    		  {https://\-git.uwaterloo.ca/\-mkarsten/\-libfibre}},
+    howpublished= {\href{https://git.uwaterloo.ca/mkarsten/libfibre}{https://\-git.uwaterloo.ca/\-mkarsten/\-libfibre}},
     note	= {[Online; accessed 2020-04-15]},
 }
Index: doc/theses/andrew_beach_MMath/cfalab.sty
===================================================================
--- doc/theses/andrew_beach_MMath/cfalab.sty	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/cfalab.sty	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -143,4 +143,13 @@
 }
 
+% These somehow control how much of a page can be a floating element before
+% the float is forced onto its own page.   
+\renewcommand{\topfraction}{0.8}
+\renewcommand{\bottomfraction}{0.8}
+\renewcommand{\floatpagefraction}{0.8}
+% Sort of the reverse, I think it is the minimum amount of text that can
+% be on a page before its all removed. (0 for always fix what you can.)
+\renewcommand{\textfraction}{0.0}
+
 % common.tex Compatablity ===================================================
 % Below this line is for compatability with the old common.tex file.
Index: doc/theses/andrew_beach_MMath/existing.tex
===================================================================
--- doc/theses/andrew_beach_MMath/existing.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/existing.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,3 +1,3 @@
-\chapter{\CFA Existing Features}
+\chapter{\CFA{} Existing Features}
 \label{c:existing}
 
@@ -9,7 +9,7 @@
 existing C code-base allowing programmers to learn \CFA on an as-needed basis.
 
-Only those \CFA features pertaining to this thesis are discussed.  Many of the
-\CFA syntactic and semantic features used in the thesis should be fairly
-obvious to the reader.
+Only those \CFA features pertaining to this thesis are discussed.
+Also, only new features of \CFA will be discussed, a familiarity with
+C or C-like languages is assumed.
 
 \section{Overloading and \lstinline{extern}}
@@ -29,7 +29,7 @@
 // name mangling on by default
 int i; // _X1ii_1
-@extern "C"@ {  // disables name mangling
+extern "C" {  // disables name mangling
 	int j; // j
-	@extern "Cforall"@ {  // enables name mangling
+	extern "Cforall" {  // enables name mangling
 		int k; // _X1ki_1
 	}
@@ -47,6 +47,12 @@
 Reference-types are written the same way as a pointer-type but each
 asterisk (@*@) is replaced with a ampersand (@&@);
-this includes cv-qualifiers and multiple levels of reference, \eg:
-
+this includes cv-qualifiers and multiple levels of reference.
+
+Generally, references act like pointers with an implicate dereferencing
+operation added to each use of the variable.
+These automatic dereferences may be disabled with the address-of operator
+(@&@).
+
+% Check to see if these are generating errors.
 \begin{minipage}{0,5\textwidth}
 With references:
@@ -56,5 +62,5 @@
 int && rri = ri;
 rri = 3;
-&ri = &j; // reference assignment
+&ri = &j;
 ri = 5;
 \end{cfa}
@@ -67,75 +73,78 @@
 int ** ppi = &pi;
 **ppi = 3;
-pi = &j; // pointer assignment
+pi = &j;
 *pi = 5;
 \end{cfa}
 \end{minipage}
 
-References are intended for cases where you would want to use pointers but would
+References are intended to be used when you would use pointers but would
 be dereferencing them (almost) every usage.
-In most cases a reference can just be thought of as a pointer that
-automatically puts a dereference in front of each of its uses (per-level of
-reference).
-The address-of operator (@&@) acts as an escape and removes one of the
-automatic dereference operations.
-Mutable references may be assigned by converting them to a pointer
-with a @&@ and then assigning a pointer to them, as in @&ri = &j;@ above.
+Mutable references may be assigned to by converting them to a pointer
+with a @&@ and then assigning a pointer to them, as in @&ri = &j;@ above
 
 \section{Operators}
 
-In general, operator names in \CFA are constructed by bracketing an operator
-token with @?@, which indicates the position of the arguments. For example,
+\CFA implements operator overloading by providing special names.
+Operator uses are translated into function calls using these names.
+These names are created by taking the operator symbols and joining them with
+@?@s to show where the arguments go.
+For example,
 infixed multiplication is @?*?@ while prefix dereference is @*?@.
 This syntax make it easy to tell the difference between prefix operations
 (such as @++?@) and post-fix operations (@?++@).
 
-An operator name may describe any function signature (it is just a name) but
-only certain signatures may be called in operator form.
-\begin{cfa}
-int ?+?( int i, int j, int k ) { return i + j + k; }
-{
-	sout | ?+?( 3, 4, 5 ); // no infix form
-}
-\end{cfa}
-Some ``near-misses" for unary/binary operator prototypes generate warnings.
+\begin{cfa}
+point ?+?(point a, point b) { return point{a.x + b.x, a.y + b.y}; }
+bool ?==?(point a, point b) { return a.x == b.x && a.y == b.y; }
+{
+	assert(point{1, 2} + point{3, 4} == point{4, 6});
+}
+\end{cfa}
+Note that these special names are not limited to just being used for these
+operator functions, and may be used name other declarations.
+Some ``near misses", that will not match an operator form but looks like
+it may have been supposed to, will generate wantings but otherwise they are
+left alone.
+
+%\subsection{Constructors and Destructors}
 
 Both constructors and destructors are operators, which means they are
 functions with special operator names rather than type names in \Cpp. The
-special operator names may be used to call the functions explicitly (not
-allowed in \Cpp for constructors).
-
-The special name for a constructor is @?{}@, where the name @{}@ comes from the
-initialization syntax in C, \eg @Structure s = {...}@.
-% That initialization syntax is also the operator form.
-\CFA generates a constructor call each time a variable is declared,
-passing the initialization arguments to the constructor.
-\begin{cfa}
-struct Structure { ... };
-void ?{}(Structure & this) { ... }
-{
-	Structure a;
-	Structure b = {};
-}
-void ?{}(Structure & this, char first, int num) { ... }
-{
-	Structure c = {'a', 2};
-}
-\end{cfa}
-Both @a@ and @b@ are initialized with the first constructor,
-while @c@ is initialized with the second.
-Currently, there is no general way to skip initialization.
+special operator names may be used to call the functions explicitly.
+% Placement new means that this is actually equivant to C++.
+
+The special name for a constructor is @?{}@, which comes from the
+initialization syntax in C, \eg @Example e = { ... }@.
+\CFA will generate a constructor call each time a variable is declared,
+passing the initialization arguments to the constructort.
+\begin{cfa}
+struct Example { ... };
+void ?{}(Example & this) { ... }
+{
+	Example a;
+	Example b = {};
+}
+void ?{}(Example & this, char first, int num) { ... }
+{
+	Example c = {'a', 2};
+}
+\end{cfa}
+Both @a@ and @b@ will be initalized with the first constructor,
+while @c@ will be initalized with the second.
+Currently, there is no general way to skip initialation.
 
 % I don't like the \^{} symbol but $^\wedge$ isn't better.
-Similarly, destructors use the special name @^?{}@ (the @^@ has no special
-meaning).  Normally, they are implicitly called on a variable when it goes out
-of scope but they can be called explicitly as well.
-\begin{cfa}
-void ^?{}(Structure & this) { ... }
-{
-	Structure d;
+Similarly destructors use the special name @^?{}@ (the @^@ has no special
+meaning).
+These are a normally called implicitly called on a variable when it goes out
+of scope. They can be called explicitly as well.
+\begin{cfa}
+void ^?{}(Example & this) { ... }
+{
+	Example d;
 } // <- implicit destructor call
 \end{cfa}
 
-Whenever a type is defined, \CFA creates a default zero-argument
+Whenever a type is defined, \CFA will create a default zero-argument
 constructor, a copy constructor, a series of argument-per-field constructors
 and a destructor. All user constructors are defined after this.
@@ -198,16 +207,18 @@
 void do_once(double y) { ... }
 int quadruple(int x) {
-	void do_once(int y) { y = y * 2; } // replace global do_once
-	do_twice(x); // use local do_once
-	do_twice(x + 1.5); // use global do_once
+	void do_once(int & y) { y = y * 2; }
+	do_twice(x);
 	return x;
 }
 \end{cfa}
 Specifically, the complier deduces that @do_twice@'s T is an integer from the
-argument @x@. It then looks for the most \emph{specific} definition matching the
+argument @x@. It then looks for the most specific definition matching the
 assertion, which is the nested integral @do_once@ defined within the
 function. The matched assertion function is then passed as a function pointer
-to @do_twice@ and called within it.  The global definition of @do_once@ is used
-for the second call because the float-point argument is a better match.
+to @do_twice@ and called within it.
+The global definition of @do_once@ is ignored, however if quadruple took a
+@double@ argument then the global definition would be used instead as it
+would be a better match.
+% Aaron's thesis might be a good reference here.
 
 To avoid typing long lists of assertions, constraints can be collect into
@@ -279,5 +290,6 @@
 Each coroutine has a @main@ function, which takes a reference to a coroutine
 object and returns @void@.
-\begin{cfa}[numbers=left]
+%[numbers=left] Why numbers on this one?
+\begin{cfa}
 void main(CountUp & this) {
 	for (unsigned int next = 0 ; true ; ++next) {
Index: doc/theses/andrew_beach_MMath/features.tex
===================================================================
--- doc/theses/andrew_beach_MMath/features.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/features.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -2,16 +2,16 @@
 \label{c:features}
 
-This chapter covers the design and user interface of the \CFA
-EHM, % or exception system.
+This chapter covers the design and user interface of the \CFA EHM
 and begins with a general overview of EHMs. It is not a strict
 definition of all EHMs nor an exhaustive list of all possible features.
-However it does cover the most common structures and features found in them.
-
+However it does cover the most common structure and features found in them.
+
+\section{Overview of EHMs}
 % We should cover what is an exception handling mechanism and what is an
 % exception before this. Probably in the introduction. Some of this could
 % move there.
-\section{Raise / Handle}
+\subsection{Raise / Handle}
 An exception operation has two main parts: raise and handle.
-These terms are sometimes also known as throw and catch but this work uses
+These terms are sometimes known as throw and catch but this work uses
 throw/catch as a particular kind of raise/handle.
 These are the two parts that the user writes and may
@@ -24,6 +24,6 @@
 
 Some well known examples include the @throw@ statements of \Cpp and Java and
-the \code{Python}{raise} statement from Python. A raise may
-perform some other work (such as memory management) but for the
+the \code{Python}{raise} statement from Python. In real systems a raise may
+preform some other work (such as memory management) but for the
 purposes of this overview that can be ignored.
 
@@ -33,10 +33,10 @@
 
 A handler has three common features: the previously mentioned user code, a
-region of code they guard, and an exception label/condition that matches
+region of code they guard and an exception label/condition that matches
 certain exceptions.
 Only raises inside the guarded region and raising exceptions that match the
 label can be handled by a given handler.
-Different EHMs have different rules to pick a handler,
-if multiple handlers could be used, such as ``best match" or ``first found".
+If multiple handlers could can handle an exception,
+EHMs will define a rule to pick one, such as ``best match" or ``first found".
 
 The @try@ statements of \Cpp, Java and Python are common examples. All three
@@ -44,9 +44,9 @@
 region.
 
-\section{Propagation}
+\subsection{Propagation}
 After an exception is raised comes what is usually the biggest step for the
 EHM: finding and setting up the handler. The propagation from raise to
 handler can be broken up into three different tasks: searching for a handler,
-matching against the handler, and installing the handler.
+matching against the handler and installing the handler.
 
 \paragraph{Searching}
@@ -55,17 +55,17 @@
 thrown as it looks for handlers that have the raise site in their guarded
 region.
-This search includes handlers in the current function, as well as any in callers
-on the stack that have the function call in their guarded region.
+The search includes handlers in the current function, as well as any in
+callers on the stack that have the function call in their guarded region.
 
 \paragraph{Matching}
 Each handler found has to be matched with the raised exception. The exception
-label defines a condition that is used with the exception to decide if
+label defines a condition that is used with exception and decides if
 there is a match or not.
 
 In languages where the first match is used, this step is intertwined with
-searching: a match check is performed immediately after the search finds
+searching; a match check is preformed immediately after the search finds
 a possible handler.
 
-\section{Installing}
+\paragraph{Installing}
 After a handler is chosen it must be made ready to run.
 The implementation can vary widely to fit with the rest of the
@@ -74,13 +74,13 @@
 case when stack unwinding is involved.
 
-If a matching handler is not guarantied to be found, the EHM needs a
+If a matching handler is not guaranteed to be found, the EHM needs a
 different course of action for the case where no handler matches.
 This situation only occurs with unchecked exceptions as checked exceptions
 (such as in Java) can make the guarantee.
-This unhandled action can abort the program or install a very general handler.
+This unhandled action is usually very general, such as aborting the program.
 
 \paragraph{Hierarchy}
 A common way to organize exceptions is in a hierarchical structure.
-This organization is often used in object-orientated languages where the
+This pattern comes from object-orientated languages where the
 exception hierarchy is a natural extension of the object hierarchy.
 
@@ -90,5 +90,5 @@
 \end{center}
 
-A handler labelled with any given exception can handle exceptions of that
+A handler labeled with any given exception can handle exceptions of that
 type or any child type of that exception. The root of the exception hierarchy
 (here \code{C}{exception}) acts as a catch-all, leaf types catch single types
@@ -104,6 +104,6 @@
 % Could I cite the rational for the Python IO exception rework?
 
-\paragraph{Completion}
-After the handler has finished the entire exception operation has to complete
+\subsection{Completion}
+After the handler has finished, the entire exception operation has to complete
 and continue executing somewhere else. This step is usually simple,
 both logically and in its implementation, as the installation of the handler
@@ -111,27 +111,30 @@
 
 The EHM can return control to many different places,
-the most common are after the handler definition (termination) and after the raise (resumption).
-
-\paragraph{Communication}
+the most common are after the handler definition (termination)
+and after the raise (resumption).
+
+\subsection{Communication}
 For effective exception handling, additional information is often passed
 from the raise to the handler and back again.
 So far only communication of the exceptions' identity has been covered.
-A common communication method is putting fields into the exception instance and giving the
-handler access to them. References in the exception instance can push data back to the raise.
+A common communication method is putting fields into the exception instance
+and giving the handler access to them.
+Passing the exception by reference instead of by value can allow data to be
+passed in both directions.
 
 \section{Virtuals}
 Virtual types and casts are not part of \CFA's EHM nor are they required for
 any EHM.
-However, one of the best ways to support an exception hierarchy is via a virtual system
-among exceptions and used for exception matching.
+However, it is one of the best ways to support an exception hierarchy
+is via a virtual hierarchy and dispatch system.
 
 Ideally, the virtual system would have been part of \CFA before the work
 on exception handling began, but unfortunately it was not.
-Therefore, only the features and framework needed for the EHM were
+Hence, only the features and framework needed for the EHM were
 designed and implemented. Other features were considered to ensure that
-the structure could accommodate other desirable features in the future but they were not
-implemented.
-The rest of this section discusses the implemented subset of the
-virtual-system design.
+the structure could accommodate other desirable features in the future
+but they were not implemented.
+The rest of this section will only discuss the implemented subset of the
+virtual system design.
 
 The virtual system supports multiple ``trees" of types. Each tree is
@@ -143,5 +146,5 @@
 % A type's ancestors are its parent and its parent's ancestors.
 % The root type has no ancestors.
-% A type's decedents are its children and its children's decedents.
+% A type's descendants are its children and its children's descendants.
 
 Every virtual type also has a list of virtual members. Children inherit
@@ -150,10 +153,10 @@
 of object-orientated programming, and can be of any type.
 
-\PAB{I do not understand these sentences. Can you add an example? $\Rightarrow$
 \CFA still supports virtual methods as a special case of virtual members.
 Function pointers that take a pointer to the virtual type are modified
 with each level of inheritance so that refers to the new type.
 This means an object can always be passed to a function in its virtual table
-as if it were a method.}
+as if it were a method.
+\todo{Clarify (with an example) virtual methods.}
 
 Each virtual type has a unique id.
@@ -161,6 +164,5 @@
 into a virtual table type. Each virtual type has a pointer to a virtual table
 as a hidden field.
-
-\PAB{God forbid, maybe you need a UML diagram to relate these entities.}
+\todo{Might need a diagram for virtual structure.}
 
 Up until this point the virtual system is similar to ones found in
@@ -173,11 +175,11 @@
 types can begin to satisfy a trait, stop satisfying a trait or satisfy the same
 trait in a different way at any lexical location in the program.
-In this sense, they are ``open" as they can change at any time. This capability means it
-is impossible to pick a single set of functions that represent the type's
-implementation across the program.
+In this sense, they are ``open" as they can change at any time.
+This capability means it is impossible to pick a single set of functions
+that represent the type's implementation across the program.
 
 \CFA side-steps this issue by not having a single virtual table for each
 type. A user can define virtual tables that are filled in at their
-declaration and given a name. Anywhere that name is visible, even if
+declaration and given a name. Anywhere that name is visible, even if it is
 defined locally inside a function (although that means it does not have a
 static lifetime), it can be used.
@@ -186,14 +188,11 @@
 through the object.
 
-\PAB{The above explanation is very good!}
-
 While much of the virtual infrastructure is created, it is currently only used
 internally for exception handling. The only user-level feature is the virtual
-cast
+cast, which is the same as the \Cpp \code{C++}{dynamic_cast}.
 \label{p:VirtualCast}
 \begin{cfa}
 (virtual TYPE)EXPRESSION
 \end{cfa}
-which is the same as the \Cpp \code{C++}{dynamic_cast}.
 Note, the syntax and semantics matches a C-cast, rather than the function-like
 \Cpp syntax for special casts. Both the type of @EXPRESSION@ and @TYPE@ must be
@@ -218,9 +217,9 @@
 The trait is defined over two types, the exception type and the virtual table
 type. Each exception type should have a single virtual table type.
-There are no actual assertions in this trait because currently the trait system
-cannot express them (adding such assertions would be part of
+There are no actual assertions in this trait because the trait system
+cannot express them yet (adding such assertions would be part of
 completing the virtual system). The imaginary assertions would probably come
 from a trait defined by the virtual system, and state that the exception type
-is a virtual type, is a descendent of @exception_t@ (the base exception type)
+is a virtual type, is a descendant of @exception_t@ (the base exception type)
 and note its virtual table type.
 
@@ -241,5 +240,6 @@
 };
 \end{cfa}
-Both traits ensure a pair of types are an exception type and its virtual table,
+Both traits ensure a pair of types are an exception type, its virtual table
+type
 and defines one of the two default handlers. The default handlers are used
 as fallbacks and are discussed in detail in \vref{s:ExceptionHandling}.
@@ -269,24 +269,24 @@
 \section{Exception Handling}
 \label{s:ExceptionHandling}
-As stated, \CFA provides two kinds of exception handling: termination and resumption.
+As stated,
+\CFA provides two kinds of exception handling: termination and resumption.
 These twin operations are the core of \CFA's exception handling mechanism.
-This section covers the general patterns shared by the two operations and
-then go on to cover the details of each individual operation.
+This section will cover the general patterns shared by the two operations and
+then go on to cover the details each individual operation.
 
 Both operations follow the same set of steps.
-Both start with the user performing a raise on an exception.
+Both start with the user preforming a raise on an exception.
 Then the exception propagates up the stack.
 If a handler is found the exception is caught and the handler is run.
-After that control returns to a point specific to the kind of exception.
-If the search fails a default handler is run, and if it returns, control
-continues after the raise. Note, the default handler may further change control flow rather than return.
+After that control continues at a raise-dependent location.
+If the search fails a default handler is run and, if it returns, then control
+continues after the raise.
 
 This general description covers what the two kinds have in common.
-Differences include how propagation is performed, where exception continues
+Differences include how propagation is preformed, where exception continues
 after an exception is caught and handled and which default handler is run.
 
 \subsection{Termination}
 \label{s:Termination}
-
 Termination handling is the familiar kind and used in most programming
 languages with exception handling.
@@ -313,12 +313,13 @@
 
 The throw copies the provided exception into managed memory to ensure
-the exception is not destroyed when the stack is unwound.
+the exception is not destroyed if the stack is unwound.
 It is the user's responsibility to ensure the original exception is cleaned
 up whether the stack is unwound or not. Allocating it on the stack is
 usually sufficient.
 
-Then propagation starts the search. \CFA uses a ``first match" rule so
-matching is performed with the copied exception as the search continues.
-It starts from the throwing function and proceeds towards the base of the stack,
+% How to say propagation starts, its first sub-step is the search.
+Then propagation starts with the search. \CFA uses a ``first match" rule so
+matching is preformed with the copied exception as the search continues.
+It starts from the throwing function and proceeds towards base of the stack,
 from callee to caller.
 At each stack frame, a check is made for resumption handlers defined by the
@@ -334,27 +335,30 @@
 \end{cfa}
 When viewed on its own, a try statement simply executes the statements
-in \snake{GUARDED_BLOCK} and when those are finished, the try statement finishes.
+in \snake{GUARDED_BLOCK} and when those are finished,
+the try statement finishes.
 
 However, while the guarded statements are being executed, including any
-invoked functions, all the handlers in these statements are included on the search
-path. Hence, if a termination exception is raised, the search includes the added handlers associated with the guarded block and those further up the
-stack from the guarded block.
+invoked functions, all the handlers in these statements are included in the
+search path.
+Hence, if a termination exception is raised these handlers may be matched
+against the exception and may handle it.
 
 Exception matching checks the handler in each catch clause in the order
 they appear, top to bottom. If the representation of the raised exception type
 is the same or a descendant of @EXCEPTION_TYPE@$_i$ then @NAME@$_i$
-(if provided) is bound to a pointer to the exception and the statements in
-@HANDLER_BLOCK@$_i$ are executed.
-If control reaches the end of the handler, the exception is
+(if provided) is
+bound to a pointer to the exception and the statements in @HANDLER_BLOCK@$_i$
+are executed. If control reaches the end of the handler, the exception is
 freed and control continues after the try statement.
 
-If no termination handler is found during the search, the default handler
-(\defaultTerminationHandler) visible at the raise statement is called.
-Through \CFA's trait system, the best match at the raise sight is used.
-This function is run and is passed the copied exception. If the default
-handler returns, control continues after the throw statement.
+If no termination handler is found during the search then the default handler
+(\defaultTerminationHandler) visible at the raise statement is run.
+Through \CFA's trait system the best match at the raise statement will be used.
+This function is run and is passed the copied exception.
+If the default handler is run control continues after the raise statement.
 
 There is a global @defaultTerminationHandler@ that is polymorphic over all
-termination exception types. Since it is so general, a more specific handler can be
+termination exception types.
+Since it is so general a more specific handler can be
 defined and is used for those types, effectively overriding the handler
 for a particular exception type.
@@ -370,11 +374,9 @@
 matched a closure is taken from up the stack and executed,
 after which the raising function continues executing.
-These are most often used when a potentially repairable error occurs, some handler is found on the stack to fix it, and
-the raising function can continue with the correction.
-Another common usage is dynamic event analysis, \eg logging, without disrupting control flow.
-Note, if an event is raised and there is no interest, control continues normally.
-
-\PAB{We also have \lstinline{report} instead of \lstinline{throwResume}, \lstinline{recover} instead of \lstinline{catch}, and \lstinline{fixup} instead of \lstinline{catchResume}.
-You may or may not want to mention it. You can still stick with \lstinline{catch} and \lstinline{throw/catchResume} in the thesis.}
+The common uses for resumption exceptions include
+potentially repairable errors, where execution can continue in the same
+function once the error is corrected, and
+ignorable events, such as logging where nothing needs to happen and control
+should always continue from the same place.
 
 A resumption raise is started with the @throwResume@ statement:
@@ -382,4 +384,5 @@
 throwResume EXPRESSION;
 \end{cfa}
+\todo{Decide on a final set of keywords and use them everywhere.}
 It works much the same way as the termination throw.
 The expression must return a reference to a resumption exception,
@@ -387,12 +390,13 @@
 @is_resumption_exception@ at the call site.
 The assertions from this trait are available to
-the exception system, while handling the exception.
-
-Resumption does not need to copy the raised exception, as the stack is not unwound.
-The exception and
-any values on the stack remain in scope, while the resumption is handled.
-
-The EHM then begins propogation. The search starts from the raise in the
-resuming function and proceeds towards the base of the stack, from callee to caller.
+the exception system while handling the exception.
+
+At run-time, no exception copy is made.
+Resumption does not unwind the stack nor otherwise remove values from the
+current scope, so there is no need to manage memory to keep things in scope.
+
+The EHM then begins propagation. The search starts from the raise in the
+resuming function and proceeds towards the base of the stack,
+from callee to caller.
 At each stack frame, a check is made for resumption handlers defined by the
 @catchResume@ clauses of a @try@ statement.
@@ -412,10 +416,11 @@
 kind of raise.
 When a try statement is executed, it simply executes the statements in the
-@GUARDED_BLOCK@ and then returns.
+@GUARDED_BLOCK@ and then finishes.
 
 However, while the guarded statements are being executed, including any
-invoked functions, all the handlers in these statements are included on the search
-path. Hence, if a resumption exception is raised the search includes the added handlers associated with the guarded block and those further up the
-stack from the guarded block.
+invoked functions, all the handlers in these statements are included in the
+search path.
+Hence, if a resumption exception is raised these handlers may be matched
+against the exception and may handle it.
 
 Exception matching checks the handler in each catch clause in the order
@@ -427,24 +432,23 @@
 the raise statement that raised the handled exception.
 
-Like termination, if no resumption handler is found during the search, the default handler
-(\defaultResumptionHandler) visible at the raise statement is called.
-It uses the best match at the
-raise sight according to \CFA's overloading rules. The default handler is
-passed the exception given to the throw. When the default handler finishes
+Like termination, if no resumption handler is found during the search,
+the default handler (\defaultResumptionHandler) visible at the raise
+statement is called. It will use the best match at the raise sight according
+to \CFA's overloading rules. The default handler is
+passed the exception given to the raise. When the default handler finishes
 execution continues after the raise statement.
 
-There is a global \defaultResumptionHandler{} that is polymorphic over all
-resumption exception types and preforms a termination throw on the exception.
-The \defaultTerminationHandler{} can be
-customized by introducing a new or better match as well.
+There is a global \defaultResumptionHandler{} is polymorphic over all
+resumption exceptions and preforms a termination throw on the exception.
+The \defaultTerminationHandler{} can be overridden by providing a new
+function that is a better match.
 
 \subsubsection{Resumption Marking}
 \label{s:ResumptionMarking}
-
 A key difference between resumption and termination is that resumption does
 not unwind the stack. A side effect that is that when a handler is matched
-and run, its try block (the guarded statements) and every try statement
-searched before it are still on the stack. Their existence can lead to the recursive
-resumption problem.
+and run it's try block (the guarded statements) and every try statement
+searched before it are still on the stack. There presence can lead to
+the recursive resumption problem.
 
 The recursive resumption problem is any situation where a resumption handler
@@ -459,18 +463,18 @@
 \end{cfa}
 When this code is executed, the guarded @throwResume@ starts a
-search and matchs the handler in the @catchResume@ clause. This
-call is placed on the top of stack above the try-block. The second throw
-searchs the same try block and puts call another instance of the
-same handler on the stack leading to an infinite recursion.
+search and matches the handler in the @catchResume@ clause. This
+call is placed on the stack above the try-block. The second raise then
+searches the same try block and puts another instance of the
+same handler on the stack leading to infinite recursion.
 
 While this situation is trivial and easy to avoid, much more complex cycles
 can form with multiple handlers and different exception types.
 
-To prevent all of these cases, the exception search marks the try statements it visits.
-A try statement is marked when a match check is preformed with it and an
-exception. The statement is unmarked when the handling of that exception
-is completed or the search completes without finding a handler.
-While a try statement is marked, its handlers are never matched, effectify
-skipping over them to the next try statement.
+To prevent all of these cases, a each try statement is ``marked" from the
+time the exception search reaches it to either when the exception is being
+handled completes the matching handler or when the search reaches the base
+of the stack.
+While a try statement is marked, its handlers are never matched, effectively
+skipping over it to the next try statement.
 
 \begin{center}
@@ -478,18 +482,19 @@
 \end{center}
 
-These rules mirror what happens with termination.
-When a termination throw happens in a handler, the search does not look at
-any handlers from the original throw to the original catch because that
-part of the stack is unwound.
-A resumption raise in the same situation wants to search the entire stack,
-but with marking, the search does match exceptions for try statements at equivalent sections
-that would have been unwound by termination.
-
-The symmetry between resumption termination is why this pattern is picked.
-Other patterns, such as marking just the handlers that caught the exception, also work but
-lack the symmetry, meaning there are more rules to remember.
+There are other sets of marking rules that could be used,
+for instance, marking just the handlers that caught the exception,
+would also prevent recursive resumption.
+However, these rules mirror what happens with termination.
+
+The try statements that are marked are the ones that would be removed from
+the stack if this was a termination exception, that is those on the stack
+between the handler and the raise statement.
+This symmetry applies to the default handler as well, as both kinds of
+default handlers are run at the raise statement, rather than (physically
+or logically) at the bottom of the stack.
+% In early development having the default handler happen after
+% unmarking was just more useful. We assume that will continue.
 
 \section{Conditional Catch}
-
 Both termination and resumption handler clauses can be given an additional
 condition to further control which exceptions they handle:
@@ -504,5 +509,5 @@
 did not match.
 
-The condition matching allows finer matching to check
+The condition matching allows finer matching by checking
 more kinds of information than just the exception type.
 \begin{cfa}
@@ -519,5 +524,5 @@
 // Can't handle a failure relating to f2 here.
 \end{cfa}
-In this example, the file that experianced the IO error is used to decide
+In this example the file that experienced the IO error is used to decide
 which handler should be run, if any at all.
 
@@ -548,11 +553,11 @@
 
 \subsection{Comparison with Reraising}
-
 A more popular way to allow handlers to match in more detail is to reraise
 the exception after it has been caught, if it could not be handled here.
-On the surface these two features seem interchangable.
-
-If @throw@ is used to start a termination reraise then these two statements
-have the same behaviour:
+On the surface these two features seem interchangeable.
+
+If @throw;@ (no argument) starts a termination reraise,
+which is the same as a raise but reuses the last caught exception,
+then these two statements have the same behaviour:
 \begin{cfa}
 try {
@@ -574,36 +579,65 @@
 }
 \end{cfa}
-However, if there are further handlers after this handler only the first is
-check. For multiple handlers on a single try block that could handle the
-same exception, the equivalent translations to conditional catch becomes more complex, resulting is multiple nested try blocks for all possible reraises.
-So while catch-with-reraise is logically equivilant to conditional catch, there is a lexical explosion for the former.
-
-\PAB{I think the following discussion makes an incorrect assumption.
-A conditional catch CAN happen with the stack unwound.
-Roy talked about this issue in Section 2.3.3 here: \newline
-\url{http://plg.uwaterloo.ca/theses/KrischerThesis.pdf}}
-
-Specifically for termination handling, a
-conditional catch happens before the stack is unwound, but a reraise happens
-afterwards. Normally this might only cause you to loose some debug
-information you could get from a stack trace (and that can be side stepped
-entirely by collecting information during the unwind). But for \CFA there is
-another issue, if the exception is not handled the default handler should be
-run at the site of the original raise.
-
-There are two problems with this: the site of the original raise does not
-exist anymore and the default handler might not exist anymore. The site is
-always removed as part of the unwinding, often with the entirety of the
-function it was in. The default handler could be a stack allocated nested
-function removed during the unwind.
-
-This means actually trying to pretend the catch didn't happening, continuing
-the original raise instead of starting a new one, is infeasible.
-That is the expected behaviour for most languages and we can't replicate
-that behaviour.
+That is, they will have the same behaviour in isolation.
+Two things can expose differences between these cases.
+
+One is the existence of multiple handlers on a single try statement.
+A reraise skips all later handlers on this try statement but a conditional
+catch does not.
+Hence, if an earlier handler contains a reraise later handlers are
+implicitly skipped, with a conditional catch they are not.
+Still, they are equivalently powerful,
+both can be used two mimic the behaviour of the other,
+as reraise can pack arbitrary code in the handler and conditional catches
+can put arbitrary code in the predicate.
+% I was struggling with a long explanation about some simple solutions,
+% like repeating a condition on later handlers, and the general solution of
+% merging everything together. I don't think it is useful though unless its
+% for a proof.
+% https://en.cppreference.com/w/cpp/language/throw
+
+The question then becomes ``Which is a better default?"
+We believe that not skipping possibly useful handlers is a better default.
+If a handler can handle an exception it should and if the handler can not
+handle the exception then it is probably safer to have that explicitly
+described in the handler itself instead of implicitly described by its
+ordering with other handlers.
+% Or you could just alter the semantics of the throw statement. The handler
+% index is in the exception so you could use it to know where to start
+% searching from in the current try statement.
+% No place for the `goto else;` metaphor.
+
+The other issue is all of the discussion above assumes that the only
+way to tell apart two raises is the exception being raised and the remaining
+search path.
+This is not true generally, the current state of the stack can matter in
+a number of cases, even only for a stack trace after an program abort.
+But \CFA has a much more significant need of the rest of the stack, the
+default handlers for both termination and resumption.
+
+% For resumption it turns out it is possible continue a raise after the
+% exception has been caught, as if it hadn't been caught in the first place.
+This becomes a problem combined with the stack unwinding used in termination
+exception handling.
+The stack is unwound before the handler is installed, and hence before any
+reraises can run. So if a reraise happens the previous stack is gone,
+the place on the stack where the default handler was supposed to run is gone,
+if the default handler was a local function it may have been unwound too.
+There is no reasonable way to restore that information, so the reraise has
+to be considered as a new raise.
+This is the strongest advantage conditional catches have over reraising,
+they happen before stack unwinding and avoid this problem.
+
+% The one possible disadvantage of conditional catch is that it runs user
+% code during the exception search. While this is a new place that user code
+% can be run destructors and finally clauses are already run during the stack
+% unwinding.
+%
+% https://www.cplusplus.com/reference/exception/current_exception/
+%   `exception_ptr current_exception() noexcept;`
+% https://www.python.org/dev/peps/pep-0343/
 
 \section{Finally Clauses}
 \label{s:FinallyClauses}
-
 Finally clauses are used to preform unconditional clean-up when leaving a
 scope and are placed at the end of a try statement after any handler clauses:
@@ -618,5 +652,5 @@
 The @FINALLY_BLOCK@ is executed when the try statement is removed from the
 stack, including when the @GUARDED_BLOCK@ finishes, any termination handler
-finishes, or during an unwind.
+finishes or during an unwind.
 The only time the block is not executed is if the program is exited before
 the stack is unwound.
@@ -634,10 +668,15 @@
 
 Not all languages with unwinding have finally clauses. Notably \Cpp does
-without it as destructors with RAII serve a similar role. Although destructors and
-finally clauses have overlapping usage cases, they have their own
-specializations, like top-level functions and lambda functions with closures.
-Destructors take more work if a number of unrelated, local variables without destructors or dynamically allocated variables must be passed for de-intialization.
-Maintaining this destructor during local-block modification is a source of errors.
-A finally clause places local de-intialization inline with direct access to all local variables.
+without it as descructors, and the RAII design pattern, serve a similar role.
+Although destructors and finally clauses can be used in the same cases,
+they have their own strengths, similar to top-level function and lambda
+functions with closures.
+Destructors take more work for their first use, but if there is clean-up code
+that needs to be run every time a type is used they soon become much easier
+to set-up.
+On the other hand finally clauses capture the local context, so is easy to
+use when the clean-up is not dependent on the type of a variable or requires
+information from multiple variables.
+% To Peter: I think these are the main points you were going for.
 
 \section{Cancellation}
@@ -652,10 +691,9 @@
 raise, this exception is not used in matching only to pass information about
 the cause of the cancellation.
-(This restriction also means matching cannot fail so there is no default handler.)
+(This also means matching cannot fail so there is no default handler.)
 
 After @cancel_stack@ is called the exception is copied into the EHM's memory
-and the current stack is
-unwound.
-The result of a cancellation depends on the kind of stack that is being unwound.
+and the current stack is unwound.
+The behaviour after that depends on the kind of stack being cancelled.
 
 \paragraph{Main Stack}
@@ -664,9 +702,10 @@
 After the main stack is unwound there is a program-level abort. 
 
-There are two reasons for this semantics. The first is that it obviously had to do the abort
+There are two reasons for these semantics.
+The first is that it had to do this abort.
 in a sequential program as there is nothing else to notify and the simplicity
 of keeping the same behaviour in sequential and concurrent programs is good.
-\PAB{I do not understand this sentence. $\Rightarrow$ Also, even in concurrent programs, there is no stack that an innate connection
-to, so it would have be explicitly managed.}
+Also, even in concurrent programs there may not currently be any other stacks
+and even if other stacks do exist, main has no way to know where they are.
 
 \paragraph{Thread Stack}
@@ -680,13 +719,13 @@
 and an implicit join (from a destructor call). The explicit join takes the
 default handler (@defaultResumptionHandler@) from its calling context while
-the implicit join provides its own, which does a program abort if the
+the implicit join provides its own; which does a program abort if the
 @ThreadCancelled@ exception cannot be handled.
 
-\PAB{Communication can occur during the lifetime of a thread using shared variable and \lstinline{waitfor} statements.
-Are you sure you mean communication here? Maybe you mean synchronization (rendezvous) point. $\Rightarrow$ Communication is done at join because a thread only has two points of
-communication with other threads: start and join.}
+The communication and synchronization are done here because threads only have
+two structural points (not dependent on user-code) where
+communication/synchronization happens: start and join.
 Since a thread must be running to perform a cancellation (and cannot be
 cancelled from another stack), the cancellation must be after start and
-before the join, so join is use.
+before the join, so join is used.
 
 % TODO: Find somewhere to discuss unwind collisions.
@@ -695,5 +734,9 @@
 a destructor and prevents cascading the error across multiple threads if
 the user is not equipped to deal with it.
-Also you can always add an explicit join if that is the desired behaviour.
+It is always possible to add an explicit join if that is the desired behaviour.
+
+With explicit join and a default handler that triggers a cancellation, it is
+possible to cascade an error across any number of threads, cleaning up each
+in turn, until the error is handled or the main thread is reached.
 
 \paragraph{Coroutine Stack}
@@ -701,9 +744,9 @@
 satisfies the @is_coroutine@ trait.
 After a coroutine stack is unwound, control returns to the @resume@ function
-that most recently resumed it. The resume reports a
-@CoroutineCancelled@ exception, which contains references to the cancelled
+that most recently resumed it. @resume@ reports a
+@CoroutineCancelled@ exception, which contains a references to the cancelled
 coroutine and the exception used to cancel it.
 The @resume@ function also takes the \defaultResumptionHandler{} from the
-caller's context and passes it to the internal cancellation.
+caller's context and passes it to the internal report.
 
 A coroutine knows of two other coroutines, its starter and its last resumer.
@@ -711,2 +754,6 @@
 (in terms of coroutine state) called resume on this coroutine, so the message
 is passed to the latter.
+
+With a default handler that triggers a cancellation, it is possible to
+cascade an error across any number of coroutines, cleaning up each in turn,
+until the error is handled or a thread stack is reached.
Index: doc/theses/andrew_beach_MMath/future.tex
===================================================================
--- doc/theses/andrew_beach_MMath/future.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/future.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -3,13 +3,18 @@
 
 \section{Language Improvements}
+\todo{Future/Language Improvements seems to have gotten mixed up. It is
+presented as ``waiting on language improvements" but really its more
+non-research based impovements.}
 \CFA is a developing programming language. As such, there are partially or
 unimplemented features of the language (including several broken components)
 that I had to workaround while building an exception handling system largely in
 the \CFA language (some C components).  The following are a few of these
-issues, and once implemented/fixed, how this would affect the exception system.
+issues, and once implemented/fixed, how they would affect the exception system.
 \begin{itemize}
 \item
 The implementation of termination is not portable because it includes
-hand-crafted assembly statements. These sections must be ported by hand to
+hand-crafted assembly statements.
+The existing compilers cannot translate that for other platforms and those
+sections must be ported by hand to
 support more hardware architectures, such as the ARM processor.
 \item
@@ -17,5 +22,5 @@
 reference instead of a pointer. Since \CFA has a very general reference
 capability, programmers will want to use it. Once fixed, this capability should
-result in little or no change in the exception system.
+result in little or no change in the exception system but simplify usage.
 \item
 Termination handlers cannot use local control-flow transfers, \eg by @break@,
@@ -41,5 +46,5 @@
 The virtual system should be completed. It was not supposed to be part of this
 project, but was thrust upon it to do exception inheritance; hence, only
-minimal work was done. A draft for a complete virtual system is available but
+minimal work is done. A draft for a complete virtual system is available but
 it is not finalized.  A future \CFA project is to complete that work and then
 update the exception system that uses the current version.
@@ -67,7 +72,8 @@
 bad software engineering.
 
-Non-local/concurrent requires more coordination between the concurrency system
+Non-local/concurrent raise requires more
+coordination between the concurrency system
 and the exception system. Many of the interesting design decisions centre
-around masking (controlling which exceptions may be thrown at a stack). It
+around masking, \ie controlling which exceptions may be thrown at a stack. It
 would likely require more of the virtual system and would also effect how
 default handlers are set.
@@ -85,7 +91,7 @@
 
 \section{Checked Exceptions}
-Checked exceptions make exceptions part of a function's type by adding the
+Checked exceptions make exceptions part of a function's type by adding an
 exception signature. An exception signature must declare all checked
-exceptions that could propogate from the function (either because they were
+exceptions that could propagate from the function (either because they were
 raised inside the function or came from a sub-function). This improves safety
 by making sure every checked exception is either handled or consciously
@@ -93,18 +99,15 @@
 
 However checked exceptions were never seriously considered for this project
-for two reasons. The first is due to time constraints, even copying an
-existing checked exception system would be pushing the remaining time and
-trying to address the second problem would take even longer. The second
-problem is that checked exceptions have some real usability trade-offs in
+because they have significant trade-offs in usablity and code reuse in
 exchange for the increased safety.
-
 These trade-offs are most problematic when trying to pass exceptions through
 higher-order functions from the functions the user passed into the
 higher-order function. There are no well known solutions to this problem
-that were statifactory for \CFA (which carries some of C's flexability
-over safety design) so one would have to be researched and developed.
+that were satisfactory for \CFA (which carries some of C's flexibility
+over safety design) so additional research is needed.
 
-Follow-up work might add checked exceptions to \CFA, possibly using
-polymorphic exception signatures, a form of tunneling\cite{Zhang19} or
+Follow-up work might add some form of checked exceptions to \CFA,
+possibly using polymorphic exception signatures,
+a form of tunneling\cite{Zhang19} or
 checked and unchecked raises.
 
@@ -150,8 +153,10 @@
 For instance, resumption could be extended to cover this use by allowing local
 control flow out of it. This approach would require an unwind as part of the
-transition as there are stack frames that have to be removed.  This approach
-means there is no notify raise, but because \CFA does not have exception
-signatures, a termination can be thrown from within any resumption handler so
-there is already a way to do mimic this in existing \CFA.
+transition as there are stack frames that have to be removed between where
+the resumption handler is installed and where it is defined.
+This approach would not require, but might benefit from, a special statement
+to leave the handler.
+Currently, mimicking this behaviour in \CFA is possible by throwing a
+termination inside a resumption handler.
 
 % Maybe talk about the escape; and escape CONTROL_STMT; statements or how
Index: doc/theses/andrew_beach_MMath/implement.tex
===================================================================
--- doc/theses/andrew_beach_MMath/implement.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/implement.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -2,5 +2,11 @@
 \label{c:implement}
 
-The implementation work for this thesis covers two components: the virtual
+% Local Helpers:
+\newcommand\transformline[1][becomes...]{
+  \hrulefill#1\hrulefill
+  \medskip
+}
+
+The implementation work for this thesis covers the two components: virtual
 system and exceptions. Each component is discussed in detail.
 
@@ -21,88 +27,106 @@
 \todo{Talk about constructors for virtual types (after they are working).}
 
-This is what binds an instance of a virtual type to a virtual table. This
-pointer can be used as an identity check. It can also be used to access the
+The virtual table pointer binds an instance of a virtual type
+to a virtual table.
+The pointer is also the table's id and how the system accesses the
 virtual table and the virtual members there.
 
 \subsection{Type Id}
 Every virtual type has a unique id.
-Type ids can be compared for equality (the types reperented are the same)
+Type ids can be compared for equality,
+which checks if the types reperented are the same,
 or used to access the type's type information.
 The type information currently is only the parent's type id or, if the
-type has no parent, zero.
+type has no parent, the null pointer.
 
 The id's are implemented as pointers to the type's type information instance.
-Derefencing the pointer gets the type information.
-By going back-and-forth between the type id and
-the type info one can find every ancestor of a virtual type.
-It also pushes the issue of creating a unique value (for
+Dereferencing the pointer gets the type information.
+The ancestors of a virtual type are found by traversing type ids through
+the type information.
+The information pushes the issue of creating a unique value (for
 the type id) to the problem of creating a unique instance (for type
-information) which the linker can solve.
-
-Advanced linker support is required because there is no place that appears
-only once to attach the type information to. There should be one structure
-definition but it is included in multiple translation units. Each virtual
-table definition should be unique but there are an arbitrary number of thoses.
-So the special section prefix \texttt{.gnu.linkonce} is used.
-With a unique suffix (making the entire section name unique) the linker will
-remove multiple definition making sure only one version exists after linking.
-Then it is just a matter of making sure there is a unique name for each type.
-
-This is done in three phases.
-The first phase is to generate a new structure definition to store the type
+information), which the linker can solve.
+
+The advanced linker support is used here to avoid having to create
+a new declaration to attach this data to.
+With C/\CFA's header/implementation file divide for something to appear
+exactly once it must come from a declaration that appears in exactly one
+implementation file; the declarations in header files may exist only once
+they can be included in many different translation units.
+Therefore, structure's declaration will not work.
+Neither will attaching the type information to the virtual table -- although
+a vtable declarations are in implemention files they are not unique, see
+\autoref{ss:VirtualTable}.
+Instead the same type information is generated multiple times and then
+the new attribute \snake{cfa_linkone} is used to removed duplicates.
+
+Type information is constructed as follows:
+\begin{enumerate}
+\item
+Use the type's name to generate a name for the type information structure.
+This is saved so it may be reused.
+\item
+Generate a new structure definition to store the type
 information. The layout is the same in each case, just the parent's type id,
-but the types are changed.
-The structure's name is change, it is based off the virtual type's name, and
-the type of the parent's type id.
+but the types used change from instance to instance.
+The generated name is used for both this structure and, if relivant, the
+parent pointer.
 If the virtual type is polymorphic then the type information structure is
 polymorphic as well, with the same polymorphic arguments.
-
-The second phase is to generate an instance of the type information with a
-almost unique name, generated by mangling the virtual type name.
-
-The third phase is implicit with \CFA's overloading scheme. \CFA mangles
-names with type information so that all of the symbols exported to the linker
-are unique even if in \CFA code they are the same. Having two declarations
-with the same name and same type is forbidden because it is impossible for
-overload resolution to pick between them. This is why a unique type is
-generated for each virtual type.
-Polymorphic information is included in this mangling so polymorphic
-types will have seperate instances for each set of polymorphic arguments.
-
-\begin{cfa}
-struct TYPE_ID_TYPE {
-	PARENT_ID_TYPE const * parent;
+\item
+A seperate name for instances is generated from the type's name.
+\item
+The definition is generated and initialised.
+The parent id is set to the null pointer or to the address of the parent's
+type information instance. Name resolution handles the rest.
+\item
+\CFA's name mangler does its regular name mangling encoding the type of
+the declaration into the instance name. This gives a completely unique name
+including different instances of the same polymorphic type.
+\end{enumerate}
+\todo{The list is making me realise, some of this isn't ordered.}
+
+Writing that code manually, with helper macros for the early name mangling,
+would look like this:
+\begin{cfa}
+struct INFO_TYPE(TYPE) {
+	INFO_TYPE(PARENT) const * parent;
 };
 
 __attribute__((cfa_linkonce))
-TYPE_ID_TYPE const TYPE_ID_NAME = {
-	&PARENT_ID_NAME,
+INFO_TYPE(TYPE) const INFO_NAME(TYPE) = {
+	&INFO_NAME(PARENT),
 };
 \end{cfa}
 
-\subsubsection{cfa\_linkonce Attribute}
+\subsubsection{\lstinline{cfa\_linkonce} Attribute}
+% I just realised: This is an extension of the inline keyword.
+% An extension of C's at least, it is very similar to C++'s.
 Another feature added to \CFA is a new attribute: \texttt{cfa\_linkonce}.
-This attribute can be put on an object or function definition
-(any global declaration with a name and a type).
-This allows you to define that object or function multiple times.
-All definitions should have the link-once attribute on them and all should
-be identical.
-
-The simplist way to use it is to put a definition in a header where the
-forward declaration would usually go.
-This is how it is used for type-id instances. There was is no unique location
-associated with a type except for the type definition which is in a header.
-This allows the unique type-id object to be generated there.
-
-Internally @cfa_linkonce@ removes all @section@ attributes
-from the declaration (as well as itself) and replaces them with
+This attribute is attached to an object or function definition
+(any global declaration with a name and a type)
+allowing it to be defined multiple times.
+All matching definitions mush have the link-once attribute
+and their implementations should be identical as well.
+
+A single definition with the attribute can be included in a header
+file as if it was a forward declaration, except no definition is required.
+
+This technique is used for type-id instances. A link-once definition is
+generated each time the structure is seen. This will result in multiple
+copies but the link-once attribute ensures all but one are removed for a
+unique instance.
+
+Internally, @cfa_linkonce@ is replaced with
 @section(".gnu.linkonce.NAME")@ where \texttt{NAME} is replaced by the
 mangled name of the object.
+Any other @section@ attributes are removed from the declaration.
 The prefix \texttt{.gnu.linkonce} in section names is recognized by the
-linker. If two of these sections with the same name, including everything
-that comes after the special prefix, then only one will be used and the other
-will be discarded.
+linker. If two of these sections appear with the same name, including
+everything that comes after the special prefix, then only one is used
+and the other is discarded.
 
 \subsection{Virtual Table}
+\label{ss:VirtualTable}
 Each virtual type has a virtual table type that stores its type id and
 virtual members.
@@ -113,6 +137,8 @@
 
 The layout always comes in three parts.
+\todo{Add labels to the virtual table layout figure.}
 The first section is just the type id at the head of the table. It is always
-there to ensure that
+there to ensure that it can be found even when the accessing code does not
+know which virtual type it has.
 The second section are all the virtual members of the parent, in the same
 order as they appear in the parent's virtual table. Note that the type may
@@ -133,29 +159,33 @@
 prefix that has the same layout and types as its parent virtual table.
 This, combined with the fixed offset to the virtual table pointer, means that
-for any virtual type it doesn't matter if we have it or any of its
-descendants, it is still always safe to access the virtual table through
-the virtual table pointer.
-From there it is safe to check the type id to identify the exact type of the
+for any virtual type, it is always safe to access its virtual table and,
+from there, it is safe to check the type id to identify the exact type of the
 underlying object, access any of the virtual members and pass the object to
 any of the method-like virtual members.
 
-When a virtual table is declared the user decides where to declare it and its
+When a virtual table is declared, the user decides where to declare it and its
 name. The initialization of the virtual table is entirely automatic based on
 the context of the declaration.
 
-The type id is always fixed, each virtual table type will always have one
+The type id is always fixed; with each virtual table type having
 exactly one possible type id.
-The virtual members are usually filled in by resolution. The best match for
-a given name and type at the declaration site is filled in.
-There are two exceptions to that rule: the @size@ field is the type's size
-and is set to the result of a @sizeof@ expression, the @align@ field is the
-type's alignment and similarly uses an @alignof@ expression.
+The virtual members are usually filled in by type resolution.
+The best match for a given name and type at the declaration site is used.
+There are two exceptions to that rule: the @size@ field, the type's size,
+is set using a @sizeof@ expression and the @align@ field, the
+type's alignment, is set using an @alignof@ expression.
 
 \subsubsection{Concurrency Integration}
 Coroutines and threads need instances of @CoroutineCancelled@ and
 @ThreadCancelled@ respectively to use all of their functionality. When a new
-data type is declared with @coroutine@ or @thread@ the forward declaration for
+data type is declared with @coroutine@ or @thread@, a forward declaration for
 the instance is created as well. The definition of the virtual table is created
 at the definition of the main function.
+
+This is showned through code re-writing in
+\autoref{f:ConcurrencyTypeTransformation} and
+\autoref{f:ConcurrencyMainTransformation}.
+In both cases the original declaration is not modified,
+only new ones are added.
 
 \begin{figure}
@@ -165,4 +195,6 @@
 };
 \end{cfa}
+
+\transformline[appends...]
 
 \begin{cfa}
@@ -175,5 +207,9 @@
 extern CoroutineCancelled_vtable & _default_vtable;
 \end{cfa}
-
+\caption{Concurrency Type Transformation}
+\label{f:ConcurrencyTypeTransformation}
+\end{figure}
+
+\begin{figure}
 \begin{cfa}
 void main(Example & this) {
@@ -181,4 +217,6 @@
 }
 \end{cfa}
+
+\transformline[appends...]
 
 \begin{cfa}
@@ -191,8 +229,7 @@
 	&_default_vtable_object_declaration;
 \end{cfa}
-\caption{Concurrency Transformations}
-\label{f:ConcurrencyTransformations}
+\caption{Concurrency Main Transformation}
+\label{f:ConcurrencyMainTransformation}
 \end{figure}
-\todo{Improve Concurrency Transformations figure.}
 
 \subsection{Virtual Cast}
@@ -211,11 +248,12 @@
 the cast target is passed in as @child@.
 
-For C generation both arguments and the result are wrapped with type casts.
-There is also an internal store inside the compiler to make sure that the
+For generated C code wraps both arguments and the result with type casts.
+There is also an internal check inside the compiler to make sure that the
 target type is a virtual type.
 % It also checks for conflicting definitions.
 
-The virtual cast either returns the original pointer as a new type or null.
-So the function just does the parent check and returns the approprate value.
+The virtual cast either returns the original pointer or the null pointer
+as the new type.
+So the function does the parent check and returns the appropriate value.
 The parent check is a simple linear search of child's ancestors using the
 type information.
@@ -229,5 +267,5 @@
 % resumption doesn't as well.
 
-% Many modern languages work with an interal stack that function push and pop
+% Many modern languages work with an internal stack that function push and pop
 % their local data to. Stack unwinding removes large sections of the stack,
 % often across functions.
@@ -236,16 +274,16 @@
 stack. On function entry and return, unwinding is handled directly by the
 call/return code embedded in the function.
-In many cases the position of the instruction pointer (relative to parameter
+In many cases, the position of the instruction pointer (relative to parameter
 and local declarations) is enough to know the current size of the stack
 frame.
 
 Usually, the stack-frame size is known statically based on parameter and
-local variable declarations. Even with dynamic stack-size the information
-to determain how much of the stack has to be removed is still contained
+local variable declarations. Even with dynamic stack-size, the information
+to determine how much of the stack has to be removed is still contained
 within the function.
 Allocating/deallocating stack space is usually an $O(1)$ operation achieved by
 bumping the hardware stack-pointer up or down as needed.
-Constructing/destructing values on the stack takes longer put in terms of
-figuring out what needs to be done is of similar complexity.
+Constructing/destructing values within a stack frame has
+a similar complexity but can add additional work and take longer.
 
 Unwinding across multiple stack frames is more complex because that
@@ -261,16 +299,16 @@
 reseting to a snap-shot of an arbitrary but existing function frame on the
 stack. It is up to the programmer to ensure the snap-shot is valid when it is
-reset and that all required clean-up from the unwound stacks is preformed.
-This approach is fragile and forces a work onto the surounding code.
-
-With respect to that work forced onto the surounding code,
+reset and that all required clean-up from the unwound stacks is performed.
+This approach is fragile and requires extra work in the surrounding code.
+
+With respect to the extra work in the surounding code,
 many languages define clean-up actions that must be taken when certain
 sections of the stack are removed. Such as when the storage for a variable
 is removed from the stack or when a try statement with a finally clause is
 (conceptually) popped from the stack.
-None of these should be handled by the user, that would contradict the
-intention of these features, so they need to be handled automatically.
-
-To safely remove sections of the stack the language must be able to find and
+None of these should be handled by the user --- that would contradict the
+intention of these features --- so they need to be handled automatically.
+
+To safely remove sections of the stack, the language must be able to find and
 run these clean-up actions even when removing multiple functions unknown at
 the beginning of the unwinding.
@@ -294,5 +332,5 @@
 current stack frame, and what handlers should be checked. Theoretically, the
 LSDA can contain any information but conventionally it is a table with entries
-representing regions of the function and what has to be done there during
+representing regions of a function and what has to be done there during
 unwinding. These regions are bracketed by instruction addresses. If the
 instruction pointer is within a region's start/end, then execution is currently
@@ -314,11 +352,12 @@
 int avar __attribute__(( cleanup(clean_up) ));
 \end{cfa}
-The attribue is used on a variable and specifies a function,
+The attribute is used on a variable and specifies a function,
 in this case @clean_up@, run when the variable goes out of scope.
-This is enough to mimic destructors, but not try statements which can effect
+This feature is enough to mimic destructors,
+but not try statements which can effect
 the unwinding.
 
-To get full unwinding support all of this has to be done directly with
-assembly and assembler directives. Partiularly the cfi directives
+To get full unwinding support, all of these features must be handled directly
+in assembly and assembler directives; partiularly the cfi directives
 \snake{.cfi_lsda} and \snake{.cfi_personality}.
 
@@ -327,5 +366,5 @@
 section covers some of the important parts of the interface.
 
-A personality function can preform different actions depending on how it is
+A personality function can perform different actions depending on how it is
 called.
 \begin{lstlisting}
@@ -364,12 +403,12 @@
 
 The @exception_class@ argument is a copy of the
-\code{C}{exception}'s @exception_class@ field.
-This a number that identifies the exception handling mechanism that created
-the
-
-The \code{C}{exception} argument is a pointer to the user
+\code{C}{exception}'s @exception_class@ field,
+which is a number that identifies the exception handling mechanism
+that created the exception.
+
+The \code{C}{exception} argument is a pointer to a user
 provided storage object. It has two public fields: the @exception_class@,
 which is described above, and the @exception_cleanup@ function.
-The clean-up function is used by the EHM to clean-up the exception if it
+The clean-up function is used by the EHM to clean-up the exception, if it
 should need to be freed at an unusual time, it takes an argument that says
 why it had to be cleaned up.
@@ -382,8 +421,8 @@
 messages for special cases (some of which should never be used by the
 personality function) and error codes. However, unless otherwise noted, the
-personality function should always return @_URC_CONTINUE_UNWIND@.
+personality function always returns @_URC_CONTINUE_UNWIND@.
 
 \subsection{Raise Exception}
-Raising an exception is the central function of libunwind and it performs a
+Raising an exception is the central function of libunwind and it performs
 two-staged unwinding.
 \begin{cfa}
@@ -472,5 +511,5 @@
 % catches. Talk about GCC nested functions.
 
-\CFA termination exceptions use libunwind heavily because they match \Cpp
+\CFA termination exceptions use libunwind heavily because they match
 \Cpp exceptions closely. The main complication for \CFA is that the
 compiler generates C code, making it very difficult to generate the assembly to
@@ -485,11 +524,12 @@
 
 \begin{figure}
+\centering
 \input{exception-layout}
 \caption{Exception Layout}
 \label{f:ExceptionLayout}
 \end{figure}
-\todo*{Convert the exception layout to an actual diagram.}
-
-Exceptions are stored in variable-sized blocks (see \vref{f:ExceptionLayout}).
+
+Exceptions are stored in variable-sized blocks
+(see \autoref{f:ExceptionLayout}).
 The first component is a fixed-sized data structure that contains the
 information for libunwind and the exception system. The second component is an
@@ -498,13 +538,14 @@
 @_Unwind_Exception@ to the entire node.
 
-Multipe exceptions can exist at the same time because exceptions can be
+Multiple exceptions can exist at the same time because exceptions can be
 raised inside handlers, destructors and finally blocks.
 Figure~\vref{f:MultipleExceptions} shows a program that has multiple
 exceptions active at one time.
 Each time an exception is thrown and caught the stack unwinds and the finally
-clause runs. This will throw another exception (until @num_exceptions@ gets
-high enough) which must be allocated. The previous exceptions may not be
+clause runs. This handler throws another exception (until @num_exceptions@ gets
+high enough), which must be allocated. The previous exceptions may not be
 freed because the handler/catch clause has not been run.
-So the EHM must keep them alive while it allocates exceptions for new throws.
+Therefore, the EHM must keep all unhandled exceptions alive
+while it allocates exceptions for new throws.
 
 \begin{figure}
@@ -559,5 +600,5 @@
 \todo*{Work on multiple exceptions code sample.}
 
-All exceptions are stored in nodes which are then linked together in lists,
+All exceptions are stored in nodes, which are then linked together in lists
 one list per stack, with the
 list head stored in the exception context. Within each linked list, the most
@@ -566,5 +607,5 @@
 exception is being handled. The exception at the head of the list is currently
 being handled, while other exceptions wait for the exceptions before them to be
-removed.
+handled and removed.
 
 The virtual members in the exception's virtual table provide the size of the
@@ -573,9 +614,10 @@
 exception into managed memory. After the exception is handled, the free
 function is used to clean up the exception and then the entire node is
-passed to free so the memory can be given back to the heap.
+passed to free, returning the memory back to the heap.
 
 \subsection{Try Statements and Catch Clauses}
 The try statement with termination handlers is complex because it must
-compensate for the lack of assembly-code generated from \CFA. Libunwind
+compensate for the C code-generation versus
+assembly-code generated from \CFA. Libunwind
 requires an LSDA and personality function for control to unwind across a
 function. The LSDA in particular is hard to mimic in generated C code.
@@ -592,10 +634,10 @@
 embedded assembly. This assembly code is handcrafted using C @asm@ statements
 and contains
-enough information for the single try statement the function repersents.
+enough information for a single try statement the function repersents.
 
 The three functions passed to try terminate are:
 \begin{description}
-\item[try function:] This function is the try block, all the code inside the
-try block is placed inside the try function. It takes no parameters and has no
+\item[try function:] This function is the try block, it is where all the code
+from inside the try block is placed. It takes no parameters and has no
 return value. This function is called during regular execution to run the try
 block.
@@ -609,11 +651,14 @@
 handler that matches the exception.
 
-\item[handler function:] This function handles the exception. It takes a
+\item[handler function:] This function handles the exception, and contains
+all the code from the handlers in the try statement, joined with a switch
+statement on the handler's id.
+It takes a
 pointer to the exception and the handler's id and returns nothing. It is called
-after the cleanup phase. It is constructed by stitching together the bodies of
-each handler and dispatches to the selected handler.
+after the cleanup phase.
 \end{description}
 All three functions are created with GCC nested functions. GCC nested functions
-can be used to create closures, functions that can refer to the state of other
+can be used to create closures,
+in other words functions that can refer to the state of other
 functions on the stack. This approach allows the functions to refer to all the
 variables in scope for the function containing the @try@ statement. These
@@ -623,4 +668,8 @@
 Using this pattern, \CFA implements destructors with the cleanup attribute.
 
+\autoref{f:TerminationTransformation} shows the pattern used to transform
+a \CFA try statement with catch clauses into the approprate C functions.
+\todo{Explain the Termination Transformation figure.}
+
 \begin{figure}
 \begin{cfa}
@@ -633,4 +682,6 @@
 }
 \end{cfa}
+
+\transformline
 
 \begin{cfa}
@@ -683,5 +734,5 @@
 % The stack-local data, the linked list of nodes.
 
-Resumption simpler to implement than termination
+Resumption is simpler to implement than termination
 because there is no stack unwinding.
 Instead of storing the data in a special area using assembly,
@@ -692,23 +743,28 @@
 The nodes are stored in order, with the more recent try statements closer
 to the head of the list.
-Instead of traversing the stack resumption handling traverses the list.
-At each node the EHM checks to see if the try statement the node repersents
+Instead of traversing the stack, resumption handling traverses the list.
+At each node, the EHM checks to see if the try statement the node repersents
 can handle the exception. If it can, then the exception is handled and
 the operation finishes, otherwise the search continues to the next node.
 If the search reaches the end of the list without finding a try statement
-that can handle the exception the default handler is executed and the
+that can handle the exception, the default handler is executed and the
 operation finishes.
 
-In each node is a handler function which does most of the work there.
-The handler function is passed the raised the exception and returns true
-if the exception is handled and false if it cannot be handled here.
-
-For each @catchResume@ clause the handler function will:
-check to see if the raised exception is a descendant type of the declared
-exception type, if it is and there is a conditional expression then it will
-run the test, if both checks pass the handling code for the clause is run
-and the function returns true, otherwise it moves onto the next clause.
-If this is the last @catchResume@ clause then instead of moving onto
-the next clause the function returns false as no handler could be found.
+Each node has a handler function that does most of the work.
+The handler function is passed the raised exception and returns true
+if the exception is handled and false otherwise.
+
+The handler function checks each of its internal handlers in order,
+top-to-bottom, until it funds a match. If a match is found that handler is
+run, after which the function returns true, ignoring all remaining handlers.
+If no match is found the function returns false.
+The match is performed in two steps, first a virtual cast is used to see
+if the thrown exception is an instance of the declared exception or one of
+its descendant type, then check to see if passes the custom predicate if one
+is defined. This ordering gives the type guarantee used in the predicate.
+
+\autoref{f:ResumptionTransformation} shows the pattern used to transform
+a \CFA try statement with catch clauses into the approprate C functions.
+\todo{Explain the Resumption Transformation figure.}
 
 \begin{figure}
@@ -722,4 +778,6 @@
 }
 \end{cfa}
+
+\transformline
 
 \begin{cfa}
@@ -753,5 +811,6 @@
 
 % Recursive Resumption Stuff:
-Search skipping (see \vpageref{s:ResumptionMarking}), which ignores parts of
+\autoref{f:ResumptionMarking} shows search skipping
+(see \vpageref{s:ResumptionMarking}), which ignores parts of
 the stack
 already examined, is accomplished by updating the front of the list as the
@@ -759,14 +818,14 @@
 is updated to the next node of the current node. After the search is complete,
 successful or not, the head of the list is reset.
-
+% No paragraph?
 This mechanism means the current handler and every handler that has already
 been checked are not on the list while a handler is run. If a resumption is
-thrown during the handling of another resumption the active handlers and all
+thrown during the handling of another resumption, the active handlers and all
 the other handler checked up to this point are not checked again.
-
-This structure also supports new handler added while the resumption is being
+% No paragraph?
+This structure also supports new handlers added while the resumption is being
 handled. These are added to the front of the list, pointing back along the
-stack -- the first one points over all the checked handlers -- and the ordering
-is maintained.
+stack --- the first one points over all the checked handlers ---
+and the ordering is maintained.
 
 \begin{figure}
@@ -774,10 +833,10 @@
 \caption{Resumption Marking}
 \label{f:ResumptionMarking}
-\todo*{Convert Resumption Marking into a line figure.}
+\todo*{Label Resumption Marking to aid clarity.}
 \end{figure}
 
 \label{p:zero-cost}
-Note, the resumption implementation has a cost for entering/exiting a @try@
-statement with @catchResume@ clauses, whereas a @try@ statement with @catch@
+Finally, the resumption implementation has a cost for entering/exiting a try
+statement with @catchResume@ clauses, whereas a try statement with @catch@
 clauses has zero-cost entry/exit. While resumption does not need the stack
 unwinding and cleanup provided by libunwind, it could use the search phase to
@@ -810,23 +869,23 @@
 
 The first step of cancellation is to find the cancelled stack and its type:
-coroutine or thread. Fortunately, the thread library stores the main thread
-pointer and the current thread pointer, and every thread stores a pointer to
-its main coroutine and the coroutine it is currently executing.
-\todo*{Consider adding a description of how threads are coroutines.}
-
-If a the current thread's main and current coroutines are the same then the
-current stack is a thread stack. Furthermore it is easy to compare the
-current thread to the main thread to see if they are the same. And if this
-is not a thread stack then it must be a coroutine stack.
+coroutine, thread or main thread.
+In \CFA, a thread (the construct the user works with) is a user-level thread
+(point of execution) paired with a coroutine, the thread's main coroutine.
+The thread library also stores pointers to the main thread and the current
+thread.
+If the current thread's main and current coroutines are the same then the
+current stack is a thread stack, otherwise it is a coroutine stack.
+If the current stack is a thread stack, it is also the main thread stack
+if and only if the main and current threads are the same.
 
 However, if the threading library is not linked, the sequential execution is on
 the main stack. Hence, the entire check is skipped because the weak-symbol
-function is loaded. Therefore, a main thread cancellation is unconditionally
+function is loaded. Therefore, main thread cancellation is unconditionally
 performed.
 
 Regardless of how the stack is chosen, the stop function and parameter are
 passed to the forced-unwind function. The general pattern of all three stop
-functions is the same: they continue unwinding until the end of stack and
-then preform their transfer.
+functions is the same: continue unwinding until the end of stack and
+then preform the appropriate transfer.
 
 For main stack cancellation, the transfer is just a program abort.
@@ -834,5 +893,5 @@
 For coroutine cancellation, the exception is stored on the coroutine's stack,
 and the coroutine context switches to its last resumer. The rest is handled on
-the backside of the resume, which check if the resumed coroutine is
+the backside of the resume, which checks if the resumed coroutine is
 cancelled. If cancelled, the exception is retrieved from the resumed coroutine,
 and a @CoroutineCancelled@ exception is constructed and loaded with the
Index: doc/theses/andrew_beach_MMath/intro.tex
===================================================================
--- doc/theses/andrew_beach_MMath/intro.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/intro.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,19 +1,26 @@
 \chapter{Introduction}
 
-\PAB{Stay in the present tense. \newline
-\url{https://plg.uwaterloo.ca/~pabuhr/technicalWriting.shtml}}
-\newline
-\PAB{Note, \lstinline{lstlisting} normally bolds keywords. None of the keywords in your thesis are bolded.}
-
-% Talk about Cforall and exceptions generally.
-%This thesis goes over the design and implementation of the exception handling
-%mechanism (EHM) of
-%\CFA (pernounced sea-for-all and may be written Cforall or CFA).
-Exception handling provides alternative dynamic inter-function control flow.
+% The highest level overview of Cforall and EHMs. Get this done right away.
+This thesis goes over the design and implementation of the exception handling
+mechanism (EHM) of
+\CFA (pronounced sea-for-all and may be written Cforall or CFA).
+\CFA is a new programming language that extends C, that maintains
+backwards-compatibility while introducing modern programming features.
+Adding exception handling to \CFA gives it new ways to handle errors and
+make other large control-flow jumps.
+
+% Now take a step back and explain what exceptions are generally.
+Exception handling provides dynamic inter-function control flow.
 There are two forms of exception handling covered in this thesis:
 termination, which acts as a multi-level return,
 and resumption, which is a dynamic function call.
-Note, termination exception handling is so common it is often assumed to be the only form.
-Lesser know derivations of inter-function control flow are continuation passing in Lisp~\cite{CommonLisp}.
+Termination handling is much more common,
+to the extent that it is often seen
+This seperation is uncommon because termination exception handling is so
+much more common that it is often assumed.
+% WHY: Mention other forms of continuation and \cite{CommonLisp} here?
+A language's EHM is the combination of language syntax and run-time
+components that are used to construct, raise and handle exceptions,
+including all control flow.
 
 Termination exception handling allows control to return to any previous
@@ -24,6 +31,6 @@
 \end{center}
 
-Resumption exception handling calls a function, but asks the functions on the
-stack what function that is.
+Resumption exception handling seaches the stack for a handler and then calls
+it without adding or removing any other stack frames.
 \todo{Add a diagram showing control flow for resumption.}
 
@@ -35,14 +42,6 @@
 most of the cost only when the error actually occurs.
 
-% Overview of exceptions in Cforall.
-
-\PAB{You need section titles here. Don't take them out.}
-
 \section{Thesis Overview}
-
-This thesis goes over the design and implementation of the exception handling
-mechanism (EHM) of
-\CFA (pernounced sea-for-all and may be written Cforall or CFA).
-%This thesis describes the design and implementation of the \CFA EHM.
+This work describes the design and implementation of the \CFA EHM.
 The \CFA EHM implements all of the common exception features (or an
 equivalent) found in most other EHMs and adds some features of its own.
@@ -77,6 +76,4 @@
 harder to replicate in other programming languages.
 
-\section{Background}
-
 % Talk about other programming languages.
 Some existing programming languages that include EHMs/exception handling
@@ -84,10 +81,4 @@
 exceptions which unwind the stack as part of the
 Exceptions also can replace return codes and return unions.
-In functional languages will also sometimes fold exceptions into monads.
-
-\PAB{You must demonstrate knowledge of background material here.
-It should be at least a full page.}
-
-\section{Contributions}
 
 The contributions of this work are:
@@ -102,9 +93,143 @@
 \end{enumerate}
 
-\todo{I can't figure out a good lead-in to the overview.}
-Covering the existing \CFA features in \autoref{c:existing}.
-Then the new features are introduce in \autoref{c:features}, explaining their
-usage and design.
+\todo{I can't figure out a good lead-in to the roadmap.}
+The next section covers the existing state of exceptions.
+The existing state of \CFA is also covered in \autoref{c:existing}.
+The new features are introduced in \autoref{c:features},
+which explains their usage and design.
 That is followed by the implementation of those features in
 \autoref{c:implement}.
-% Future Work \autoref{c:future}
+The performance results are examined in \autoref{c:performance}.
+Possibilities to extend this project are discussed in \autoref{c:future}.
+
+\section{Background}
+\label{s:background}
+
+Exception handling is not a new concept,
+with papers on the subject dating back 70s.
+
+Their were popularised by \Cpp,
+which added them in its first major wave of non-object-orientated features
+in 1990.
+% https://en.cppreference.com/w/cpp/language/history
+
+Java was the next popular language to use exceptions. It is also the most
+popular language with checked exceptions.
+Checked exceptions are part of the function interface they are raised from.
+This includes functions they propogate through, until a handler for that
+type of exception is found.
+This makes exception information explicit, which can improve clarity and
+safety, but can slow down programming.
+Some of these, such as dealing with high-order methods or an overly specified
+throws clause, are technical. However some of the issues are much more
+human, in that writing/updating all the exception signatures can be enough
+of a burden people will hack the system to avoid them.
+Including the ``catch-and-ignore" pattern where a catch block is used without
+anything to repair or recover from the exception.
+
+%\subsection
+Resumption exceptions have been much less popular.
+Although resumption has a history as old as termination's, very few
+programming languages have implement them.
+% http://bitsavers.informatik.uni-stuttgart.de/pdf/xerox/parc/techReports/
+%   CSL-79-3_Mesa_Language_Manual_Version_5.0.pdf
+Mesa is one programming languages that did and experiance with that
+languages is quoted as being one of the reasons resumptions were not
+included in the \Cpp standard.
+% https://en.wikipedia.org/wiki/Exception_handling
+\todo{A comment about why we did include them when they are so unpopular
+might be approprate.}
+
+%\subsection
+Functional languages, tend to use solutions like the return union, but some
+exception-like constructs still appear.
+
+For instance Haskell's built in error mechanism can make the result of any
+expression, including function calls. Any expression that examines an
+error value will in-turn produce an error. This continues until the main
+function produces an error or until it is handled by one of the catch
+functions.
+
+%\subsection
+More recently exceptions seem to be vanishing from newer programming
+languages.
+Rust and Go reduce this feature to panics.
+Panicing is somewhere between a termination exception and a program abort.
+Notably in Rust a panic can trigger either, a panic may unwind the stack or
+simply kill the process.
+% https://doc.rust-lang.org/std/panic/fn.catch_unwind.html
+Go's panic is much more similar to a termination exception but there is
+only a catch-all function with \code{Go}{recover()}.
+So exceptions still are appearing, just in reduced forms.
+
+%\subsection
+Exception handling's most common use cases are in error handling.
+Here are some other ways to handle errors and comparisons with exceptions.
+\begin{itemize}
+\item\emph{Error Codes}:
+This pattern uses an enumeration (or just a set of fixed values) to indicate
+that an error has occured and which error it was.
+
+There are some issues if a function wants to return an error code and another
+value. The main issue is that it can be easy to forget checking the error
+code, which can lead to an error being quitely and implicitly ignored.
+Some new languages have tools that raise warnings if the return value is
+discarded to avoid this.
+It also puts more code on the main execution path.
+\item\emph{Special Return with Global Store}:
+A function that encounters an error returns some value indicating that it
+encountered a value but store which error occured in a fixed global location.
+
+Perhaps the C standard @errno@ is the most famous example of this,
+where some standard library functions will return some non-value (often a
+NULL pointer) and set @errno@.
+
+This avoids the multiple results issue encountered with straight error codes
+but otherwise many of the same advantages and disadvantages.
+It does however introduce one other major disadvantage:
+Everything that uses that global location must agree on all possible errors.
+\item\emph{Return Union}:
+Replaces error codes with a tagged union.
+Success is one tag and the errors are another.
+It is also possible to make each possible error its own tag and carry its own
+additional information, but the two branch format is easy to make generic
+so that one type can be used everywhere in error handling code.
+
+This pattern is very popular in functional or semi-functional language,
+anything with primitive support for tagged unions (or algebraic data types).
+% We need listing Rust/rust to format code snipits from it.
+% Rust's \code{rust}{Result<T, E>}
+
+The main disadvantage is again it puts code on the main execution path.
+This is also the first technique that allows for more information about an
+error, other than one of a fix-set of ids, to be sent.
+They can be missed but some languages can force that they are checked.
+It is also implicitly forced in any languages with checked union access.
+\item\emph{Handler Functions}:
+On error the function that produced the error calls another function to
+handle it.
+The handler function can be provided locally (passed in as an argument,
+either directly as as a field of a structure/object) or globally (a global
+variable).
+
+C++ uses this as its fallback system if exception handling fails.
+\snake{std::terminate_handler} and for a time \snake{std::unexpected_handler}
+
+Handler functions work a lot like resumption exceptions.
+The difference is they are more expencive to set up but cheaper to use, and
+so are more suited to more fequent errors.
+The exception being global handlers if they are rarely change as the time
+in both cases strinks towards zero.
+\end{itemize}
+
+%\subsection
+Because of their cost exceptions are rarely used for hot paths of execution.
+There is an element of self-fulfilling prophocy here as implementation
+techniques have been designed to make exceptions cheap to set-up at the cost
+of making them expencive to use.
+Still, use of exceptions for other tasks is more common in higher-level
+scripting languages.
+An iconic example is Python's StopIteration exception which is thrown by
+an iterator to indicate that it is exausted. Combined with Python's heavy
+use of the iterator based for-loop.
+% https://docs.python.org/3/library/exceptions.html#StopIteration
Index: doc/theses/andrew_beach_MMath/performance.tex
===================================================================
--- doc/theses/andrew_beach_MMath/performance.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ doc/theses/andrew_beach_MMath/performance.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,115 @@
+\chapter{Performance}
+\label{c:performance}
+
+\textbf{Just because of the stage of testing there are design notes for
+the tests as well as commentary on them.}
+\todo{Revisit organization of the performance chapter once tests are chosen.}
+% What are good tests for resumption?
+
+Performance has been of secondary importance for most of this project.
+Instead, the focus has been to get the features working. The only performance
+requirements is to ensure the tests for correctness run in a reasonable
+amount of time.
+
+%\section{Termination Comparison}
+\section{Test Set-Up}
+Tests will be run on \CFA, C++ and Java.
+
+C++ is the most comparable language because both it and \CFA use the same
+framework, libunwind.
+In fact, the comparison is almost entirely a quality of implementation
+comparison. \CFA's EHM has had significantly less time to be optimized and
+does not generate its own assembly. It does have a slight advantage in that
+there are some features it does not handle.
+
+Java is another very popular language with similar termination semantics.
+It is implemented in a very different environment, a virtual machine with
+garbage collection.
+It also implements the finally clause on try blocks allowing for a direct
+feature-to-feature comparison.
+
+All tests are run inside a main loop which will perform the test
+repeatedly. This is to avoids start-up or tear-down time from
+affecting the timing results.
+A consequence of this is that tests cannot terminate the program,
+which does limit how tests can be implemented.
+There are catch-alls to keep unhandled
+exceptions from terminating tests.
+
+The exceptions used in these tests will always be a exception based off of
+the base exception. This requirement minimizes performance differences based
+on the object model.
+Catch-alls are done by catching the root exception type (not using \Cpp's
+\code{C++}{catch(...)}).
+
+Tests run in Java were not warmed because exception code paths should not be
+hot.
+
+\section{Tests}
+The following tests were selected to test the performance of different
+components of the exception system.
+The should provide a guide as to where the EHM's costs can be found.
+
+\paragraph{Raise/Handle}
+What is the basic cost to raise and handle an exception?
+
+There are a number of factors that can effect this.
+For \CFA this includes the type of raise,
+
+Main loop, pass through a catch-all, call through some empty helper functions
+to put frames on the stack then raise and exception.
+\todo{Raise/Handle (or a similar test) could also test how much it costs to
+search over things, not sure if that is a useful test.}
+
+\paragraph{Unwinding}
+Isolating the unwinding of the stack as much as possible.
+
+This has the same set-up as the raise/handle test except the intermediate
+stack frames contain either an object declaration with a destructor or a
+try statement with no handlers except for a finally clause.
+
+\paragraph{Enter/Leave}
+What is the cost of entering and leaving a try block, even if no exception
+is thrown?
+
+This test is a simple pattern of entering
+and leaving a try statement.
+
+The only tunables here are which clauses are attached to the try block:
+termination handlers, resumption handlers and finally clauses.
+
+\paragraph{Re-throw and Conditional-Catch}
+How expensive it is to run a non-exception type check for a handler?
+
+In this case different languages approach this problem differently, either
+through a re-throw or a conditional-catch.
+Where \CFA uses its condition other languages will have to unconditionally
+catch the exception then re-throw if the condition if the condition is false.
+
+The set up is as follows: main loop, a catch-all exception handler,
+a conditional catch and then the raise.
+
+% We could do a Cforall test without the catch all and a new default handler
+% that does a catch all.
+As a point of comparison one of the raise/handle tests (which one?) has
+same layout but never catches anything.
+
+The main tunable in this test is how often the conditional-catch matches.
+
+%\section{Cost in Size}
+%Using exceptions also has a cost in the size of the executable.
+%Although it is sometimes ignored
+%
+%There is a size cost to defining a personality function but the later problem
+%is the LSDA which will be generated for every function.
+%
+%(I haven't actually figured out how to compare this, probably using something
+%related to -fexceptions.)
+
+% Some languages I left out:
+% Python: Its a scripting language, different
+% uC++: Not well known and should the same results as C++, except for
+%   resumption which should be the same.
+
+%\section{Resumption Comparison}
+\todo{Can we find a good language to compare resumptions in.}
Index: doc/theses/andrew_beach_MMath/uw-ethesis.tex
===================================================================
--- doc/theses/andrew_beach_MMath/uw-ethesis.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/andrew_beach_MMath/uw-ethesis.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -244,4 +244,5 @@
 \input{features}
 \input{implement}
+\input{performance}
 \input{future}
 
Index: doc/theses/mubeen_zulfiqar_MMath/.gitignore
===================================================================
--- doc/theses/mubeen_zulfiqar_MMath/.gitignore	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/mubeen_zulfiqar_MMath/.gitignore	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,4 +1,4 @@
 # Intermediate Results:
-out/
+build/
 
 # Final Files:
Index: doc/theses/mubeen_zulfiqar_MMath/allocator.tex
===================================================================
--- doc/theses/mubeen_zulfiqar_MMath/allocator.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/mubeen_zulfiqar_MMath/allocator.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -7,11 +7,11 @@
 \begin{itemize}
 \item
-Objective of @uHeapLmmm@.
+Objective of uHeapLmmm.
 \item
 Design philosophy.
 \item
-Background and previous design of @uHeapLmmm@.
+Background and previous design of uHeapLmmm.
 \item
-Distributed design of @uHeapLmmm@.
+Distributed design of uHeapLmmm.
 
 ----- SHOULD WE GIVE IMPLEMENTATION DETAILS HERE? -----
@@ -24,5 +24,5 @@
 \end{itemize}
 
-The new features added to @uHeapLmmm@ (incl. @malloc_size@ routine)
+The new features added to uHeapLmmm (incl. @malloc_size@ routine)
 \CFA alloc interface with examples.
 \begin{itemize}
@@ -33,9 +33,47 @@
 \end{itemize}
 
------ SHOULD WE GIVE PERFORMANCE AND USABILITY COMPARISON OF DIFFERENT INTERFACES THAT WE TRIED? -----
 
-\PAB{Often Performance is its own chapter. I added one for now.}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% uHeapLmmm Design
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-Performance evaluation using u-benchmark suite.
+\section{Objective of uHeapLmmm}
+UHeapLmmm is a lightweight memory allocator. The objective behind uHeapLmmm is to design a minimal concurrent memory allocator that has new features and also fulfills GNU C Library requirements (FIX ME: cite requirements).
+
+\subsection{Design philosophy}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\section{Background and previous design of uHeapLmmm}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\section{Distributed design of uHeapLmmm}
+
+
+\subsection{Advantages of distributed design}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\section{Added Features}
+
+
+\subsection{Methods}
+Why did we need it?
+The added benefits.
+
+
+\subsection{Alloc Interface}
+Why did we need it?
+The added benefits.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Following is added by Peter
 
 \noindent
Index: doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex
===================================================================
--- doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/mubeen_zulfiqar_MMath/benchmarks.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -34,2 +34,207 @@
 \noindent
 ====================
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Performance Matrices
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\section{Performance Matrices of Memory Allocators}
+
+When it comes to memory allocators, there are no set standards of performance. Performance of a memory allocator depends highly on the usage pattern of the application. A memory allocator that is the best performer for a certain application X might be the worst for some other application which has completely different memory usage pattern compared to the application X. It is extremely difficult to make one universally best memory allocator which will outperform every other memory allocator for every usage pattern. So, there is a lack of a set of standard benchmarks that are used to evaluate a memory allocators's performance.
+
+If we breakdown the goals of a memory allocator, there are two basic matrices on which a memory allocator's performance is evaluated.
+\begin{enumerate}
+\item
+Memory Overhead
+\item
+Speed
+\end{enumerate}
+
+\subsection{Memory Overhead}
+Memory overhead is the extra memory that a memory allocator takes from OS which is not requested by the application. Ideally, an allocator should get just enough memory from OS that can fulfill application's request and should return this memory to OS as soon as applications frees it. But, allocators retain more memory compared to what application has asked for which causes memory overhead. Memory overhead can happen for various reasons.
+
+\subsubsection{Fragmentation}
+Fragmentation is one of the major reasons behind memory overhead. Fragmentation happens because of situations that are either necassary for proper functioning of the allocator such as internal memory management and book-keeping or are out of allocator's control such as application's usage pattern.
+
+\paragraph{Internal Fragmentation}
+For internal book-keeping, allocators divide raw memory given by OS into chunks, blocks, or lists that can fulfill application's requested size. Allocators use memory given by OS for creating headers, footers etc. to store information about these chunks, blocks, or lists. This increases usage of memory in-addition to the memory requested by application as the allocators need to store their book-keeping information. This extra usage of memory for allocator's own book-keeping is called Internal Fragmentation. Although it cases memory overhead but this overhead is necassary for an allocator's proper funtioning.
+
+*** FIX ME: Insert a figure of internal fragmentation with explanation
+
+\paragraph{External Fragmentation}
+External fragmentation is the free bits of memory between or around chunks of memory that are currently in-use of the application. Segmentation in memory due to application's usage pattern causes external fragmentation. The memory which is part of external fragmentation is completely free as it is neither used by allocator's internal book-keeping nor by the application. Ideally, an allocator should return a segment of memory back to the OS as soon as application frees it. But, this is not always the case. Allocators get memory from OS in one of the two ways.
+
+\begin{itemize}
+\item
+MMap: an allocator can ask OS for whole pages in mmap area. Then, the allocator segments the page internally and fulfills application's request.
+\item
+Heap: an allocator can ask OS for memory in heap area using system calls such as sbrk. Heap are grows downwards and shrinks upwards.
+\begin{itemize}
+\item
+If an allocator uses mmap area, it can only return extra memory back to OS if the whole page is free i.e. no chunk on the page is in-use of the application. Even if one chunk on the whole page is currently in-use of the application, the allocator has to retain the whole page.
+\item
+If an allocator uses the heap area, it can only return the continous free memory at the end of the heap area that is currently in allocator's possession as heap area shrinks upwards. If there are free bits of memory in-between chunks of memory that are currently in-use of the application, the allocator can not return these free bits.
+
+*** FIX ME: Insert a figure of above scenrio with explanation
+\item
+Even if the entire heap area is free except one small chunk at the end of heap area that is being used by the application, the allocator cannot return the free heap area back to the OS as it is not a continous region at the end of heap area.
+
+*** FIX ME: Insert a figure of above scenrio with explanation
+
+\item
+Such scenerios cause external fragmentation but it is out of the allocator's control and depend on application's usage pattern.
+\end{itemize}
+\end{itemize}
+
+\subsubsection{Internal Memory Management}
+Allocators such as je-malloc (FIX ME: insert reference) pro-actively get some memory from the OS and divide it into chunks of certain sizes that can be used in-future to fulfill application's request. This causes memory overhead as these chunks are made before application's request. There is also the possibility that an application may not even request memory of these sizes during their whole life-time.
+
+*** FIX ME: Insert a figure of above scenrio with explanation
+
+Allocators such as rp-malloc (FIX ME: insert reference) maintain lists or blocks of sized memory segments that is freed by the application for future use. These lists are maintained without any guarantee that application will even request these sizes again.
+
+Such tactics are usually used to gain speed as allocator will not have to get raw memory from OS and manage it at the time of application's request but they do cause memory overhead.
+
+Fragmentation and managed sized chunks of free memory can lead to Heap Blowup as the allocator may not be able to use the fragments or sized free chunks of free memory to fulfill application's requests of other sizes.
+
+\subsection{Speed}
+When it comes to performance evaluation of any piece of software, its runtime is usually the first thing that is evaluated. The same is true for memory allocators but, in case of memory allocators, speed does not only mean the runtime of memory allocator's routines but there are other factors too.
+
+\subsubsection{Runtime Speed}
+Low runtime is the main goal of a memory allocator when it comes it proving its speed. Runtime is the time that it takes for a routine of memory allocator to complete its execution. As mentioned in (FIX ME: refernce to routines' list), there four basic routines that are used in memory allocation. Ideally, each routine of a memory allocator should be fast. Some memory allocator designs use pro-active measures (FIX ME: local refernce) to gain speed when allocating some memory to the application. Some memory allocators do memory allocation faster than memory freeing (FIX ME: graph refernce) while others show similar speed whether memory is allocated or freed.
+
+\subsubsection{Memory Access Speed}
+Runtime speed is not the only speed matrix in memory allocators. The memory that a memory allocator has allocated to the application also needs to be accessible as quick as possible. The application should be able to read/write allocated memory quickly. The allocation method of a memory allocator may introduce some delays when it comes to memory access speed, which is specially important in concurrent applications. Ideally, a memory allocator should allocate all memory on a cache-line to only one thread and no cache-line should be shared among multiple threads. If a memory allocator allocates memory to multple threads on a same cache line, then cache may get invalidated more frequesntly when two different threads running on two different processes will try to read/write the same memory region. On the other hand, if one cache-line is used by only one thread then the cache may get invalidated less frequently. This sharing of one cache-line among multiple threads is called false sharing (FIX ME: cite wasik).
+
+\paragraph{Active False Sharing}
+Active false sharing is the sharing of one cache-line among multiple threads that is caused by memory allocator. It happens when two threads request memory from memory allocator and the allocator allocates memory to both of them on the same cache-line. After that, if the threads are running on different processes who have their own caches and both threads start reading/writing the allocated memory simultanously, their caches will start getting invalidated every time the other thread writes something to the memory. This will cause the application to slow down as the process has to load cache much more frequently.
+
+*** FIX ME: Insert a figure of above scenrio with explanation
+
+\paragraph{Passive False Sharing}
+Passive false sharing is the kind of false sharing which is caused by the application and not the memory allocator. The memory allocator may preservce passive false sharing in future instead of eradicating it. But, passive false sharing is initiated by the application.
+
+\subparagraph{Program Induced Passive False Sharing}
+Program induced false sharing is completely out of memory allocator's control and is purely caused by the application. When a thread in the application creates multiple objects in the dynamic area and allocator allocates memory for these objects on the same cache-line as the objects are created by the same thread. Passive false sharing will occur if this thread passes one of these objects to another thread but it retains the rest of these objects or it passes some/all of the remaining objects to some third thread(s). Now, one cache-line is shared among multiple threads but it is caused by the application and not the allocator. It is out of allocator's control and has the similar performance impact as Active False Sharing (FIX ME: cite local) if these threads, who are sharing the same cache-line, start reading/writing the given objects simultanously.
+
+*** FIX ME: Insert a figure of above scenrio 1 with explanation
+
+*** FIX ME: Insert a figure of above scenrio 2 with explanation
+
+\subparagraph{Program Induced Allocator Preserved Passive False Sharing}
+Program induced allocator preserved passive false sharing is another interesting case of passive false sharing. Both the application and the allocator are partially responsible for it. It starts the same as Program Induced False Sharing (FIX ME: cite local). Once, an application thread has created multiple dynamic objects on the same cache-line and ditributed these objects among multiple threads causing sharing of one cache-line among multiple threads (Program Induced Passive False Sharing). This kind of false sharing occurs when one of these threads, which got the object on the shared cache-line, frees the passed object then re-allocates another object but the allocator returns the same object (on the shared cache-line) that this thread just freed. Although, the application caused the false sharing to happen in the frst place however, to prevent furthur false sharing, the allocator should have returned the new object on some other cache-line which is only shared by the allocating thread. When it comes to performnce impact, this passive false sharing will slow down the application just like any other kind of false sharing if the threads sharing the cache-line start reading/writing the objects simultanously.
+
+
+*** FIX ME: Insert a figure of above scenrio with explanation
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Micro Benchmark Suite
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\section{Micro Benchmark Suite}
+The aim of micro benchmark suite is to create a set of programs that can evaluate a memory allocator based on the performance matrices described in (FIX ME: local cite). These programs can be taken as a standard to benchmark an allocator's basic goals. These programs give details of an allocator's memory overhead and speed under a certain allocation pattern. The speed of the allocator is benchmarked in different ways. Similarly, false sharing happening in an allocator is also measured in multiple ways. These benchmarks evalute the allocator under a certain allocation pattern which is configurable and can be changed using a few knobs to benchmark observe an allocator's performance under a desired allocation pattern.
+
+Micro Benchmark Suite benchmarks an allocator's performance by allocating dynamic objects and, then, measuring specifc matrices. The benchmark suite evaluates an allocator with a certain allocation pattern. Bnechmarks have different knobs that can be used to change allocation pattern and evaluate an allocator under desired conditions. These can be set by giving commandline arguments to the benchmark on execution.
+
+Following is the list of avalable knobs.
+
+*** FIX ME: Add knobs items after finalize
+
+\subsection{Memory Benchmark}
+Memory benchmark measures memory overhead of an allocator. It allocates a number of dynamic objects. Then, by reading /self/proc/maps, gets the total memory that the allocator has reuested from the OS. Finally, it calculates the memory head by taking the difference between the memory the allocator has requested from the OS and the memory that program has allocated.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\subsubsection{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\subsection{Speed Benchmark}
+Speed benchmark calculates the runtime speed of an allocator's functions (FIX ME: cite allocator routines). It does by measuring the runtime of allocator routines in two different ways.
+
+\subsubsection{Speed Time}
+The time method does a certain amount of work by calling each routine of the allocator (FIX ME: cite allocator routines) a specific time. It calculates the total time it took to perform this workload. Then, it divides the time it took by the workload and calculates the average time taken by the allocator's routine.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\paragraph{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\subsubsection{Speed Workload}
+The worload method uses the opposite approach. It calls the allocator's routines for a specific amount of time and measures how much work was done during that time. Then, similar to the time method, it divides the time by the workload done during that time and calculates the average time taken by the allocator's routine.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\paragraph{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\subsection{Cache Scratch}
+Cache Scratch benchmark measures program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator. It does so in two ways.
+
+\subsubsection{Cache Scratch Time}
+Cache Scratch Time allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\paragraph{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\subsubsection{Cache Scratch Layout}
+Cache Scratch Layout also allocates dynamic objects. Then, it benchmarks program induced allocator preserved passive false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\paragraph{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\subsection{Cache Thrash}
+Cache Thrash benchmark measures allocator induced passive false sharing (FIX ME CITE) in an allocator. It also does so in two ways.
+
+\subsubsection{Cache Thrash Time}
+Cache Thrash Time allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) in an allocator by measuring the time it takes to read/write these objects.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\paragraph{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\subsubsection{Cache Thrash Layout}
+Cache Thrash Layout also allocates dynamic objects. Then, it benchmarks allocator induced false sharing (FIX ME CITE) by using heap addresses returned by the allocator. It calculates how many objects were allocated to different threads on the same cache line.
+*** FIX ME: Insert a figure of above benchmark with description
+
+\paragraph{Relevant Knobs}
+*** FIX ME: Insert Relevant Knobs
+
+\section{Results}
+*** FIX ME: add configuration details of memory allocators
+
+\subsection{Memory Benchmark}
+
+\subsubsection{Relevant Knobs}
+
+\subsection{Speed Benchmark}
+
+\subsubsection{Speed Time}
+
+\paragraph{Relevant Knobs}
+
+\subsubsection{Speed Workload}
+
+\paragraph{Relevant Knobs}
+
+\subsection{Cache Scratch}
+
+\subsubsection{Cache Scratch Time}
+
+\paragraph{Relevant Knobs}
+
+\subsubsection{Cache Scratch Layout}
+
+\paragraph{Relevant Knobs}
+
+\subsection{Cache Thrash}
+
+\subsubsection{Cache Thrash Time}
+
+\paragraph{Relevant Knobs}
+
+\subsubsection{Cache Thrash Layout}
+
+\paragraph{Relevant Knobs}
Index: doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex
===================================================================
--- doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ doc/theses/mubeen_zulfiqar_MMath/uw-ethesis.tex	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -165,6 +165,7 @@
 % cfa macros used in the document
 \input{common}
+%\usepackageinput{common}
 \CFAStyle						% CFA code-style for all languages
-\lstset{language=CFA,basicstyle=\linespread{0.9}\tt}	% CFA default language
+\lstset{basicstyle=\linespread{0.9}\tt}			% CFA typewriter font
 \newcommand{\PAB}[1]{{\color{red}PAB: #1}}
 
Index: libcfa/configure.ac
===================================================================
--- libcfa/configure.ac	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/configure.ac	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -131,4 +131,10 @@
 #io_uring 5.5 uses enum values
 #io_uring 5.6 and later uses probes
+
+AH_TEMPLATE([CFA_HAVE_LINUX_RSEQ_H],[Defined if rseq support is present when compiling libcfathread.])
+AC_CHECK_HEADERS([linux/rseq.h], [AC_DEFINE(CFA_HAVE_LINUX_RSEQ_H)])
+
+AH_TEMPLATE([CFA_HAVE_LINUX_LIBRSEQ],[Defined if librseq support is present when compiling libcfathread.])
+AC_CHECK_LIB([rseq], [rseq_available], [AC_DEFINE(CFA_HAVE_LINUX_RSEQ_H)], [])
 
 AH_TEMPLATE([CFA_HAVE_LINUX_IO_URING_H],[Defined if io_uring support is present when compiling libcfathread.])
Index: libcfa/prelude/defines.hfa.in
===================================================================
--- libcfa/prelude/defines.hfa.in	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/prelude/defines.hfa.in	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -171,4 +171,10 @@
 #undef CFA_HAVE_LINUX_IO_URING_H
 
+/* Defined if librseq support is present when compiling libcfathread. */
+#undef CFA_HAVE_LINUX_LIBRSEQ
+
+/* Defined if rseq support is present when compiling libcfathread. */
+#undef CFA_HAVE_LINUX_RSEQ_H
+
 /* Defined if openat2 support is present when compiling libcfathread. */
 #undef CFA_HAVE_OPENAT2
@@ -205,4 +211,7 @@
 #undef HAVE_LINUX_IO_URING_H
 
+/* Define to 1 if you have the <linux/rseq.h> header file. */
+#undef HAVE_LINUX_RSEQ_H
+
 /* Define to 1 if you have the <memory.h> header file. */
 #undef HAVE_MEMORY_H
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/Makefile.am	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -61,4 +61,5 @@
 	containers/queueLockFree.hfa \
 	containers/stackLockFree.hfa \
+	containers/vector2.hfa \
 	vec/vec.hfa \
 	vec/vec2.hfa \
@@ -69,5 +70,4 @@
 	common.hfa \
 	fstream.hfa \
-	strstream.hfa \
 	heap.hfa \
 	iostream.hfa \
@@ -78,4 +78,5 @@
 	rational.hfa \
 	stdlib.hfa \
+	strstream.hfa \
 	time.hfa \
 	bits/weakso_locks.hfa \
@@ -83,5 +84,6 @@
 	containers/pair.hfa \
 	containers/result.hfa \
-	containers/vector.hfa
+	containers/vector.hfa \
+	device/cpu.hfa
 
 libsrc = ${inst_headers_src} ${inst_headers_src:.hfa=.cfa} \
Index: libcfa/src/bits/signal.hfa
===================================================================
--- libcfa/src/bits/signal.hfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/bits/signal.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -20,7 +20,5 @@
 
 #include <errno.h>
-#define __USE_GNU
 #include <signal.h>
-#undef __USE_GNU
 #include <stdlib.h>
 #include <string.h>
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,5 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 #include "coroutine.hfa"
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/io.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,5 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 #if defined(__CFA_DEBUG__)
@@ -23,5 +24,4 @@
 
 #if defined(CFA_HAVE_LINUX_IO_URING_H)
-	#define _GNU_SOURCE         /* See feature_test_macros(7) */
 	#include <errno.h>
 	#include <signal.h>
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,5 +15,5 @@
 
 #define __cforall_thread__
-#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#define _GNU_SOURCE
 
 #if defined(__CFA_DEBUG__)
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/kernel.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,6 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
+
 // #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
 
@@ -278,5 +280,5 @@
 
 				// Spin a little on I/O, just in case
-					for(5) {
+				for(5) {
 					__maybe_io_drain( this );
 					readyThread = pop_fast( this->cltr );
@@ -285,5 +287,5 @@
 
 				// no luck, try stealing a few times
-					for(5) {
+				for(5) {
 					if( __maybe_io_drain( this ) ) {
 						readyThread = pop_fast( this->cltr );
@@ -422,4 +424,6 @@
 		__cfactx_switch( &proc_cor->context, &thrd_dst->context );
 		// when __cfactx_switch returns we are back in the processor coroutine
+
+
 
 		/* paranoid */ verify( 0x0D15EA5E0D15EA5Ep == thrd_dst->canary );
@@ -522,6 +526,6 @@
 
 	/* paranoid */ verify( ! __preemption_enabled() );
-	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
-	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
+	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ) || thrd_src->corctx_flag, "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
+	/* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit) || thrd_src->corctx_flag, "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
 }
 
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/kernel.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -66,4 +66,5 @@
 		unsigned id;
 		unsigned target;
+		unsigned last;
 		unsigned long long int cutoff;
 	} rdq;
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,11 +15,15 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 // C Includes
 #include <errno.h>              // errno
+#include <signal.h>
 #include <string.h>             // strerror
 #include <unistd.h>             // sysconf
+
 extern "C" {
       #include <limits.h>       // PTHREAD_STACK_MIN
+	#include <unistd.h>       // syscall
 	#include <sys/eventfd.h>  // eventfd
       #include <sys/mman.h>     // mprotect
@@ -136,4 +140,16 @@
 };
 
+#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
+	// No data needed
+#elif defined(CFA_HAVE_LINUX_RSEQ_H)
+	extern "Cforall" {
+		__attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq @= {
+			.cpu_id : RSEQ_CPU_ID_UNINITIALIZED,
+		};
+	}
+#else
+	// No data needed
+#endif
+
 //-----------------------------------------------------------------------------
 // Struct to steal stack
@@ -468,5 +484,5 @@
 	self_mon_p = &self_mon;
 	link.next = 0p;
-	link.ts   = 0;
+	link.ts   = -1llu;
 	preferred = -1u;
 	last_proc = 0p;
@@ -497,4 +513,5 @@
 	this.rdq.id  = -1u;
 	this.rdq.target = -1u;
+	this.rdq.last = -1u;
 	this.rdq.cutoff = 0ull;
 	do_terminate = false;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -16,4 +16,8 @@
 #pragma once
 
+#if !defined(__cforall_thread__)
+	#error kernel_private.hfa should only be included in libcfathread source
+#endif
+
 #include "kernel.hfa"
 #include "thread.hfa"
@@ -22,8 +26,19 @@
 #include "stats.hfa"
 
+extern "C" {
+#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
+	#include <rseq/rseq.h>
+#elif defined(CFA_HAVE_LINUX_RSEQ_H)
+	#include <linux/rseq.h>
+#else
+	#ifndef _GNU_SOURCE
+	#error kernel_private requires gnu_source
+	#endif
+	#include <sched.h>
+#endif
+}
+
 //-----------------------------------------------------------------------------
 // Scheduler
-
-
 extern "C" {
 	void disable_interrupts() OPTIONAL_THREAD;
@@ -39,4 +54,30 @@
 
 //-----------------------------------------------------------------------------
+// Hardware
+
+#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
+	// No data needed
+#elif defined(CFA_HAVE_LINUX_RSEQ_H)
+	extern "Cforall" {
+		extern __attribute__((aligned(128))) thread_local volatile struct rseq __cfaabi_rseq;
+	}
+#else
+	// No data needed
+#endif
+
+static inline int __kernel_getcpu() {
+	/* paranoid */ verify( ! __preemption_enabled() );
+#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
+	return rseq_current_cpu();
+#elif defined(CFA_HAVE_LINUX_RSEQ_H)
+	int r = __cfaabi_rseq.cpu_id;
+	/* paranoid */ verify( r >= 0 );
+	return r;
+#else
+	return sched_getcpu();
+#endif
+}
+
+//-----------------------------------------------------------------------------
 // Processor
 void main(processorCtx_t *);
@@ -44,6 +85,4 @@
 void * __create_pthread( pthread_t *, void * (*)(void *), void * );
 void __destroy_pthread( pthread_t pthread, void * stack, void ** retval );
-
-
 
 extern cluster * mainCluster;
Index: libcfa/src/concurrency/locks.cfa
===================================================================
--- libcfa/src/concurrency/locks.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/locks.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -16,4 +16,5 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 #include "locks.hfa"
Index: libcfa/src/concurrency/locks.hfa
===================================================================
--- libcfa/src/concurrency/locks.hfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/locks.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -24,4 +24,5 @@
 #include "containers/list.hfa"
 
+#include "limits.hfa"
 #include "thread.hfa"
 
@@ -87,4 +88,5 @@
 	bool tryP(BinaryBenaphore & this) {
 		ssize_t c = this.counter;
+		/* paranoid */ verify( c > MIN );
 		return (c >= 1) && __atomic_compare_exchange_n(&this.counter, &c, c-1, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
 	}
@@ -94,4 +96,5 @@
 		ssize_t c = 0;
 		for () {
+			/* paranoid */ verify( this.counter < MAX );
 			if (__atomic_compare_exchange_n(&this.counter, &c, c+1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
 				if (c == 0) return true;
@@ -173,4 +176,6 @@
 	ThreadBenaphore sem;
 };
+
+static inline void ?{}(fast_lock & this) { this.owner = 0p; }
 
 static inline bool $try_lock(fast_lock & this, $thread * thrd) {
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/monitor.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,5 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 #include "monitor.hfa"
Index: libcfa/src/concurrency/mutex.cfa
===================================================================
--- libcfa/src/concurrency/mutex.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/mutex.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -17,4 +17,5 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 #include "mutex.hfa"
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/preemption.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,6 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
+
 // #define __CFA_DEBUG_PRINT_PREEMPTION__
 
Index: libcfa/src/concurrency/ready_queue.cfa
===================================================================
--- libcfa/src/concurrency/ready_queue.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/ready_queue.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,6 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
+
 // #define __CFA_DEBUG_PRINT_READY_QUEUE__
 
@@ -20,13 +22,19 @@
 #define USE_RELAXED_FIFO
 // #define USE_WORK_STEALING
+// #define USE_CPU_WORK_STEALING
 
 #include "bits/defs.hfa"
+#include "device/cpu.hfa"
 #include "kernel_private.hfa"
 
-#define _GNU_SOURCE
 #include "stdlib.hfa"
 #include "math.hfa"
 
+#include <errno.h>
 #include <unistd.h>
+
+extern "C" {
+	#include <sys/syscall.h>  // __NR_xxx
+}
 
 #include "ready_subqueue.hfa"
@@ -46,5 +54,7 @@
 #endif
 
-#if   defined(USE_RELAXED_FIFO)
+#if   defined(USE_CPU_WORK_STEALING)
+	#define READYQ_SHARD_FACTOR 2
+#elif defined(USE_RELAXED_FIFO)
 	#define BIAS 4
 	#define READYQ_SHARD_FACTOR 4
@@ -85,4 +95,23 @@
 }
 
+#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
+	// No forward declaration needed
+	#define __kernel_rseq_register rseq_register_current_thread
+	#define __kernel_rseq_unregister rseq_unregister_current_thread
+#elif defined(CFA_HAVE_LINUX_RSEQ_H)
+	void __kernel_raw_rseq_register  (void);
+	void __kernel_raw_rseq_unregister(void);
+
+	#define __kernel_rseq_register __kernel_raw_rseq_register
+	#define __kernel_rseq_unregister __kernel_raw_rseq_unregister
+#else
+	// No forward declaration needed
+	// No initialization needed
+	static inline void noop(void) {}
+
+	#define __kernel_rseq_register noop
+	#define __kernel_rseq_unregister noop
+#endif
+
 //=======================================================================
 // Cluster wide reader-writer lock
@@ -107,4 +136,6 @@
 // Lock-Free registering/unregistering of threads
 unsigned register_proc_id( void ) with(*__scheduler_lock) {
+	__kernel_rseq_register();
+
 	__cfadbg_print_safe(ready_queue, "Kernel : Registering proc %p for RW-Lock\n", proc);
 	bool * handle = (bool *)&kernelTLS().sched_lock;
@@ -161,4 +192,6 @@
 
 	__cfadbg_print_safe(ready_queue, "Kernel : Unregister proc %p\n", proc);
+
+	__kernel_rseq_unregister();
 }
 
@@ -214,11 +247,25 @@
 //=======================================================================
 void ?{}(__ready_queue_t & this) with (this) {
-	lanes.data  = 0p;
-	lanes.tscs  = 0p;
-	lanes.count = 0;
+	#if defined(USE_CPU_WORK_STEALING)
+		lanes.count = cpu_info.hthrd_count * READYQ_SHARD_FACTOR;
+		lanes.data = alloc( lanes.count );
+		lanes.tscs = alloc( lanes.count );
+
+		for( idx; (size_t)lanes.count ) {
+			(lanes.data[idx]){};
+			lanes.tscs[idx].tv = rdtscl();
+		}
+	#else
+		lanes.data  = 0p;
+		lanes.tscs  = 0p;
+		lanes.count = 0;
+	#endif
 }
 
 void ^?{}(__ready_queue_t & this) with (this) {
-	verify( SEQUENTIAL_SHARD == lanes.count );
+	#if !defined(USE_CPU_WORK_STEALING)
+		verify( SEQUENTIAL_SHARD == lanes.count );
+	#endif
+
 	free(lanes.data);
 	free(lanes.tscs);
@@ -226,4 +273,143 @@
 
 //-----------------------------------------------------------------------
+#if defined(USE_CPU_WORK_STEALING)
+	__attribute__((hot)) void push(struct cluster * cltr, struct $thread * thrd, bool push_local) with (cltr->ready_queue) {
+		__cfadbg_print_safe(ready_queue, "Kernel : Pushing %p on cluster %p\n", thrd, cltr);
+
+		processor * const proc = kernelTLS().this_processor;
+		const bool external = !push_local || (!proc) || (cltr != proc->cltr);
+
+		const int cpu = __kernel_getcpu();
+		/* paranoid */ verify(cpu >= 0);
+		/* paranoid */ verify(cpu < cpu_info.hthrd_count);
+		/* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
+
+		const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
+		/* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
+		/* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
+		/* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
+
+		const int start = map.self * READYQ_SHARD_FACTOR;
+		unsigned i;
+		do {
+			unsigned r;
+			if(unlikely(external)) { r = __tls_rand(); }
+			else { r = proc->rdq.its++; }
+			i = start + (r % READYQ_SHARD_FACTOR);
+			// If we can't lock it retry
+		} while( !__atomic_try_acquire( &lanes.data[i].lock ) );
+
+		// Actually push it
+		push(lanes.data[i], thrd);
+
+		// Unlock and return
+		__atomic_unlock( &lanes.data[i].lock );
+
+		#if !defined(__CFA_NO_STATISTICS__)
+			if(unlikely(external)) __atomic_fetch_add(&cltr->stats->ready.push.extrn.success, 1, __ATOMIC_RELAXED);
+			else __tls_stats()->ready.push.local.success++;
+		#endif
+
+		__cfadbg_print_safe(ready_queue, "Kernel : Pushed %p on cluster %p (idx: %u, mask %llu, first %d)\n", thrd, cltr, i, used.mask[0], lane_first);
+
+	}
+
+	// Pop from the ready queue from a given cluster
+	__attribute__((hot)) $thread * pop_fast(struct cluster * cltr) with (cltr->ready_queue) {
+		/* paranoid */ verify( lanes.count > 0 );
+		/* paranoid */ verify( kernelTLS().this_processor );
+
+		const int cpu = __kernel_getcpu();
+		/* paranoid */ verify(cpu >= 0);
+		/* paranoid */ verify(cpu < cpu_info.hthrd_count);
+		/* paranoid */ verify(cpu * READYQ_SHARD_FACTOR < lanes.count);
+
+		const cpu_map_entry_t & map = cpu_info.llc_map[cpu];
+		/* paranoid */ verify(map.start * READYQ_SHARD_FACTOR < lanes.count);
+		/* paranoid */ verify(map.self * READYQ_SHARD_FACTOR < lanes.count);
+		/* paranoid */ verifyf((map.start + map.count) * READYQ_SHARD_FACTOR <= lanes.count, "have %zu lanes but map can go up to %u", lanes.count, (map.start + map.count) * READYQ_SHARD_FACTOR);
+
+		processor * const proc = kernelTLS().this_processor;
+		const int start = map.self * READYQ_SHARD_FACTOR;
+
+		// Did we already have a help target
+		if(proc->rdq.target == -1u) {
+			// if We don't have a
+			unsigned long long min = ts(lanes.data[start]);
+			for(i; READYQ_SHARD_FACTOR) {
+				unsigned long long tsc = ts(lanes.data[start + i]);
+				if(tsc < min) min = tsc;
+			}
+			proc->rdq.cutoff = min;
+
+			/* paranoid */ verify(lanes.count < 65536); // The following code assumes max 65536 cores.
+			/* paranoid */ verify(map.count < 65536); // The following code assumes max 65536 cores.
+			uint64_t chaos = __tls_rand();
+			uint64_t high_chaos = (chaos >> 32);
+			uint64_t  mid_chaos = (chaos >> 16) & 0xffff;
+			uint64_t  low_chaos = chaos & 0xffff;
+
+			unsigned me = map.self;
+			unsigned cpu_chaos = map.start + (mid_chaos % map.count);
+			bool global = cpu_chaos == me;
+
+			if(global) {
+				proc->rdq.target = high_chaos % lanes.count;
+			} else {
+				proc->rdq.target = (cpu_chaos * READYQ_SHARD_FACTOR) + (low_chaos % READYQ_SHARD_FACTOR);
+				/* paranoid */ verify(proc->rdq.target >= (map.start * READYQ_SHARD_FACTOR));
+				/* paranoid */ verify(proc->rdq.target <  ((map.start + map.count) * READYQ_SHARD_FACTOR));
+			}
+
+			/* paranoid */ verify(proc->rdq.target != -1u);
+		}
+		else {
+			const unsigned long long bias = 0; //2_500_000_000;
+			const unsigned long long cutoff = proc->rdq.cutoff > bias ? proc->rdq.cutoff - bias : proc->rdq.cutoff;
+			{
+				unsigned target = proc->rdq.target;
+				proc->rdq.target = -1u;
+				if(lanes.tscs[target].tv < cutoff && ts(lanes.data[target]) < cutoff) {
+					$thread * t = try_pop(cltr, target __STATS(, __tls_stats()->ready.pop.help));
+					proc->rdq.last = target;
+					if(t) return t;
+				}
+			}
+
+			unsigned last = proc->rdq.last;
+			if(last != -1u && lanes.tscs[last].tv < cutoff && ts(lanes.data[last]) < cutoff) {
+				$thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.help));
+				if(t) return t;
+			}
+			else {
+				proc->rdq.last = -1u;
+			}
+		}
+
+		for(READYQ_SHARD_FACTOR) {
+			unsigned i = start + (proc->rdq.itr++ % READYQ_SHARD_FACTOR);
+			if($thread * t = try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.local))) return t;
+		}
+
+		// All lanes where empty return 0p
+		return 0p;
+	}
+
+	__attribute__((hot)) struct $thread * pop_slow(struct cluster * cltr) with (cltr->ready_queue) {
+		processor * const proc = kernelTLS().this_processor;
+		unsigned last = proc->rdq.last;
+		if(last != -1u) {
+			struct $thread * t = try_pop(cltr, last __STATS(, __tls_stats()->ready.pop.steal));
+			if(t) return t;
+			proc->rdq.last = -1u;
+		}
+
+		unsigned i = __tls_rand() % lanes.count;
+		return try_pop(cltr, i __STATS(, __tls_stats()->ready.pop.steal));
+	}
+	__attribute__((hot)) struct $thread * pop_search(struct cluster * cltr) {
+		return search(cltr);
+	}
+#endif
 #if defined(USE_RELAXED_FIFO)
 	//-----------------------------------------------------------------------
@@ -519,9 +705,9 @@
 					if(is_empty(sl)) {
 						assert( sl.anchor.next == 0p );
-						assert( sl.anchor.ts   == 0  );
+						assert( sl.anchor.ts   == -1llu );
 						assert( mock_head(sl)  == sl.prev );
 					} else {
 						assert( sl.anchor.next != 0p );
-						assert( sl.anchor.ts   != 0  );
+						assert( sl.anchor.ts   != -1llu );
 						assert( mock_head(sl)  != sl.prev );
 					}
@@ -573,134 +759,141 @@
 		lanes.tscs = alloc(lanes.count, lanes.tscs`realloc);
 		for(i; lanes.count) {
-			unsigned long long tsc = ts(lanes.data[i]);
-			lanes.tscs[i].tv = tsc != 0 ? tsc : rdtscl();
+			unsigned long long tsc1 = ts(lanes.data[i]);
+			unsigned long long tsc2 = rdtscl();
+			lanes.tscs[i].tv = min(tsc1, tsc2);
 		}
 	#endif
 }
 
-// Grow the ready queue
-void ready_queue_grow(struct cluster * cltr) {
-	size_t ncount;
-	int target = cltr->procs.total;
-
-	/* paranoid */ verify( ready_mutate_islocked() );
-	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
-
-	// Make sure that everything is consistent
-	/* paranoid */ check( cltr->ready_queue );
-
-	// grow the ready queue
-	with( cltr->ready_queue ) {
-		// Find new count
-		// Make sure we always have atleast 1 list
-		if(target >= 2) {
-			ncount = target * READYQ_SHARD_FACTOR;
-		} else {
-			ncount = SEQUENTIAL_SHARD;
-		}
-
-		// Allocate new array (uses realloc and memcpies the data)
-		lanes.data = alloc( ncount, lanes.data`realloc );
-
-		// Fix the moved data
-		for( idx; (size_t)lanes.count ) {
-			fix(lanes.data[idx]);
-		}
-
-		// Construct new data
-		for( idx; (size_t)lanes.count ~ ncount) {
-			(lanes.data[idx]){};
-		}
-
-		// Update original
-		lanes.count = ncount;
-	}
-
-	fix_times(cltr);
-
-	reassign_cltr_id(cltr);
-
-	// Make sure that everything is consistent
-	/* paranoid */ check( cltr->ready_queue );
-
-	__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
-
-	/* paranoid */ verify( ready_mutate_islocked() );
-}
-
-// Shrink the ready queue
-void ready_queue_shrink(struct cluster * cltr) {
-	/* paranoid */ verify( ready_mutate_islocked() );
-	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
-
-	// Make sure that everything is consistent
-	/* paranoid */ check( cltr->ready_queue );
-
-	int target = cltr->procs.total;
-
-	with( cltr->ready_queue ) {
-		// Remember old count
-		size_t ocount = lanes.count;
-
-		// Find new count
-		// Make sure we always have atleast 1 list
-		lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
-		/* paranoid */ verify( ocount >= lanes.count );
-		/* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
-
-		// for printing count the number of displaced threads
-		#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
-			__attribute__((unused)) size_t displaced = 0;
-		#endif
-
-		// redistribute old data
-		for( idx; (size_t)lanes.count ~ ocount) {
-			// Lock is not strictly needed but makes checking invariants much easier
-			__attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
-			verify(locked);
-
-			// As long as we can pop from this lane to push the threads somewhere else in the queue
-			while(!is_empty(lanes.data[idx])) {
-				struct $thread * thrd;
-				unsigned long long _;
-				[thrd, _] = pop(lanes.data[idx]);
-
-				push(cltr, thrd, true);
-
-				// for printing count the number of displaced threads
-				#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
-					displaced++;
-				#endif
-			}
-
-			// Unlock the lane
-			__atomic_unlock(&lanes.data[idx].lock);
-
-			// TODO print the queue statistics here
-
-			^(lanes.data[idx]){};
-		}
-
-		__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
-
-		// Allocate new array (uses realloc and memcpies the data)
-		lanes.data = alloc( lanes.count, lanes.data`realloc );
-
-		// Fix the moved data
-		for( idx; (size_t)lanes.count ) {
-			fix(lanes.data[idx]);
-		}
-	}
-
-	fix_times(cltr);
-
-	reassign_cltr_id(cltr);
-
-	// Make sure that everything is consistent
-	/* paranoid */ check( cltr->ready_queue );
-
-	__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
-	/* paranoid */ verify( ready_mutate_islocked() );
-}
+#if defined(USE_CPU_WORK_STEALING)
+	// ready_queue size is fixed in this case
+	void ready_queue_grow(struct cluster * cltr) {}
+	void ready_queue_shrink(struct cluster * cltr) {}
+#else
+	// Grow the ready queue
+	void ready_queue_grow(struct cluster * cltr) {
+		size_t ncount;
+		int target = cltr->procs.total;
+
+		/* paranoid */ verify( ready_mutate_islocked() );
+		__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue\n");
+
+		// Make sure that everything is consistent
+		/* paranoid */ check( cltr->ready_queue );
+
+		// grow the ready queue
+		with( cltr->ready_queue ) {
+			// Find new count
+			// Make sure we always have atleast 1 list
+			if(target >= 2) {
+				ncount = target * READYQ_SHARD_FACTOR;
+			} else {
+				ncount = SEQUENTIAL_SHARD;
+			}
+
+			// Allocate new array (uses realloc and memcpies the data)
+			lanes.data = alloc( ncount, lanes.data`realloc );
+
+			// Fix the moved data
+			for( idx; (size_t)lanes.count ) {
+				fix(lanes.data[idx]);
+			}
+
+			// Construct new data
+			for( idx; (size_t)lanes.count ~ ncount) {
+				(lanes.data[idx]){};
+			}
+
+			// Update original
+			lanes.count = ncount;
+		}
+
+		fix_times(cltr);
+
+		reassign_cltr_id(cltr);
+
+		// Make sure that everything is consistent
+		/* paranoid */ check( cltr->ready_queue );
+
+		__cfadbg_print_safe(ready_queue, "Kernel : Growing ready queue done\n");
+
+		/* paranoid */ verify( ready_mutate_islocked() );
+	}
+
+	// Shrink the ready queue
+	void ready_queue_shrink(struct cluster * cltr) {
+		/* paranoid */ verify( ready_mutate_islocked() );
+		__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue\n");
+
+		// Make sure that everything is consistent
+		/* paranoid */ check( cltr->ready_queue );
+
+		int target = cltr->procs.total;
+
+		with( cltr->ready_queue ) {
+			// Remember old count
+			size_t ocount = lanes.count;
+
+			// Find new count
+			// Make sure we always have atleast 1 list
+			lanes.count = target >= 2 ? target * READYQ_SHARD_FACTOR: SEQUENTIAL_SHARD;
+			/* paranoid */ verify( ocount >= lanes.count );
+			/* paranoid */ verify( lanes.count == target * READYQ_SHARD_FACTOR || target < 2 );
+
+			// for printing count the number of displaced threads
+			#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
+				__attribute__((unused)) size_t displaced = 0;
+			#endif
+
+			// redistribute old data
+			for( idx; (size_t)lanes.count ~ ocount) {
+				// Lock is not strictly needed but makes checking invariants much easier
+				__attribute__((unused)) bool locked = __atomic_try_acquire(&lanes.data[idx].lock);
+				verify(locked);
+
+				// As long as we can pop from this lane to push the threads somewhere else in the queue
+				while(!is_empty(lanes.data[idx])) {
+					struct $thread * thrd;
+					unsigned long long _;
+					[thrd, _] = pop(lanes.data[idx]);
+
+					push(cltr, thrd, true);
+
+					// for printing count the number of displaced threads
+					#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
+						displaced++;
+					#endif
+				}
+
+				// Unlock the lane
+				__atomic_unlock(&lanes.data[idx].lock);
+
+				// TODO print the queue statistics here
+
+				^(lanes.data[idx]){};
+			}
+
+			__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue displaced %zu threads\n", displaced);
+
+			// Allocate new array (uses realloc and memcpies the data)
+			lanes.data = alloc( lanes.count, lanes.data`realloc );
+
+			// Fix the moved data
+			for( idx; (size_t)lanes.count ) {
+				fix(lanes.data[idx]);
+			}
+		}
+
+		fix_times(cltr);
+
+		reassign_cltr_id(cltr);
+
+		// Make sure that everything is consistent
+		/* paranoid */ check( cltr->ready_queue );
+
+		__cfadbg_print_safe(ready_queue, "Kernel : Shrinking ready queue done\n");
+		/* paranoid */ verify( ready_mutate_islocked() );
+	}
+#endif
 
 #if !defined(__CFA_NO_STATISTICS__)
@@ -710,2 +903,59 @@
 	}
 #endif
+
+
+#if   defined(CFA_HAVE_LINUX_LIBRSEQ)
+	// No definition needed
+#elif defined(CFA_HAVE_LINUX_RSEQ_H)
+
+	#if defined( __x86_64 ) || defined( __i386 )
+		#define RSEQ_SIG	0x53053053
+	#elif defined( __ARM_ARCH )
+		#ifdef __ARMEB__
+		#define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
+		#else
+		#define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
+		#endif
+	#endif
+
+	extern void __disable_interrupts_hard();
+	extern void __enable_interrupts_hard();
+
+	void __kernel_raw_rseq_register  (void) {
+		/* paranoid */ verify( __cfaabi_rseq.cpu_id == RSEQ_CPU_ID_UNINITIALIZED );
+
+		// int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, (sigset_t *)0p, _NSIG / 8);
+		int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), 0, RSEQ_SIG);
+		if(ret != 0) {
+			int e = errno;
+			switch(e) {
+			case EINVAL: abort("KERNEL ERROR: rseq register invalid argument");
+			case ENOSYS: abort("KERNEL ERROR: rseq register no supported");
+			case EFAULT: abort("KERNEL ERROR: rseq register with invalid argument");
+			case EBUSY : abort("KERNEL ERROR: rseq register already registered");
+			case EPERM : abort("KERNEL ERROR: rseq register sig  argument  on unregistration does not match the signature received on registration");
+			default: abort("KERNEL ERROR: rseq register unexpected return %d", e);
+			}
+		}
+	}
+
+	void __kernel_raw_rseq_unregister(void) {
+		/* paranoid */ verify( __cfaabi_rseq.cpu_id >= 0 );
+
+		// int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, (sigset_t *)0p, _NSIG / 8);
+		int ret = syscall(__NR_rseq, &__cfaabi_rseq, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+		if(ret != 0) {
+			int e = errno;
+			switch(e) {
+			case EINVAL: abort("KERNEL ERROR: rseq unregister invalid argument");
+			case ENOSYS: abort("KERNEL ERROR: rseq unregister no supported");
+			case EFAULT: abort("KERNEL ERROR: rseq unregister with invalid argument");
+			case EBUSY : abort("KERNEL ERROR: rseq unregister already registered");
+			case EPERM : abort("KERNEL ERROR: rseq unregister sig  argument  on unregistration does not match the signature received on registration");
+			default: abort("KERNEL ERROR: rseq unregisteunexpected return %d", e);
+			}
+		}
+	}
+#else
+	// No definition needed
+#endif
Index: libcfa/src/concurrency/ready_subqueue.hfa
===================================================================
--- libcfa/src/concurrency/ready_subqueue.hfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/ready_subqueue.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -32,5 +32,5 @@
 	this.prev = mock_head(this);
 	this.anchor.next = 0p;
-	this.anchor.ts   = 0;
+	this.anchor.ts   = -1llu;
 	#if !defined(__CFA_NO_STATISTICS__)
 		this.cnt  = 0;
@@ -44,5 +44,5 @@
 	/* paranoid */ verify( &mock_head(this)->link.ts   == &this.anchor.ts   );
 	/* paranoid */ verify( mock_head(this)->link.next == 0p );
-	/* paranoid */ verify( mock_head(this)->link.ts   == 0  );
+	/* paranoid */ verify( mock_head(this)->link.ts   == -1llu  );
 	/* paranoid */ verify( mock_head(this) == this.prev );
 	/* paranoid */ verify( __alignof__(__intrusive_lane_t) == 128 );
@@ -55,5 +55,5 @@
 	// Make sure the list is empty
 	/* paranoid */ verify( this.anchor.next == 0p );
-	/* paranoid */ verify( this.anchor.ts   == 0  );
+	/* paranoid */ verify( this.anchor.ts   == -1llu );
 	/* paranoid */ verify( mock_head(this)  == this.prev );
 }
@@ -64,13 +64,15 @@
 	/* paranoid */ verify( this.lock );
 	/* paranoid */ verify( node->link.next == 0p );
-	/* paranoid */ verify( node->link.ts   == 0  );
+	/* paranoid */ verify( node->link.ts   == -1llu  );
 	/* paranoid */ verify( this.prev->link.next == 0p );
-	/* paranoid */ verify( this.prev->link.ts   == 0  );
+	/* paranoid */ verify( this.prev->link.ts   == -1llu  );
 	if( this.anchor.next == 0p ) {
 		/* paranoid */ verify( this.anchor.next == 0p );
-		/* paranoid */ verify( this.anchor.ts   == 0  );
+		/* paranoid */ verify( this.anchor.ts   == -1llu );
+		/* paranoid */ verify( this.anchor.ts   != 0  );
 		/* paranoid */ verify( this.prev == mock_head( this ) );
 	} else {
 		/* paranoid */ verify( this.anchor.next != 0p );
+		/* paranoid */ verify( this.anchor.ts   != -1llu );
 		/* paranoid */ verify( this.anchor.ts   != 0  );
 		/* paranoid */ verify( this.prev != mock_head( this ) );
@@ -92,4 +94,5 @@
 	/* paranoid */ verify( this.lock );
 	/* paranoid */ verify( this.anchor.next != 0p );
+	/* paranoid */ verify( this.anchor.ts   != -1llu );
 	/* paranoid */ verify( this.anchor.ts   != 0  );
 
@@ -99,7 +102,7 @@
 	this.anchor.next = node->link.next;
 	this.anchor.ts   = node->link.ts;
-	bool is_empty = this.anchor.ts == 0;
+	bool is_empty = this.anchor.next == 0p;
 	node->link.next = 0p;
-	node->link.ts   = 0;
+	node->link.ts   = -1llu;
 	#if !defined(__CFA_NO_STATISTICS__)
 		this.cnt--;
@@ -110,5 +113,7 @@
 
 	/* paranoid */ verify( node->link.next == 0p );
-	/* paranoid */ verify( node->link.ts   == 0  );
+	/* paranoid */ verify( node->link.ts   == -1llu  );
+	/* paranoid */ verify( node->link.ts   != 0  );
+	/* paranoid */ verify( this.anchor.ts  != 0  );
 	return [node, ts];
 }
@@ -116,5 +121,5 @@
 // Check whether or not list is empty
 static inline bool is_empty(__intrusive_lane_t & this) {
-	return this.anchor.ts == 0;
+	return this.anchor.next == 0p;
 }
 
@@ -122,4 +127,5 @@
 static inline unsigned long long ts(__intrusive_lane_t & this) {
 	// Cannot verify here since it may not be locked
+	/* paranoid */ verify(this.anchor.ts != 0);
 	return this.anchor.ts;
 }
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/concurrency/thread.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -15,4 +15,5 @@
 
 #define __cforall_thread__
+#define _GNU_SOURCE
 
 #include "thread.hfa"
@@ -39,5 +40,5 @@
 	curr_cluster = &cl;
 	link.next = 0p;
-	link.ts   = 0;
+	link.ts   = -1llu;
 	preferred = -1u;
 	last_proc = 0p;
Index: libcfa/src/containers/array.hfa
===================================================================
--- libcfa/src/containers/array.hfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/containers/array.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,13 +1,7 @@
 
 
-// a type whose size is n
-#define Z(n) char[n]
-
-// the inverse of Z(-)
-#define z(N) sizeof(N)
-
-forall( T & ) struct tag {};
+forall( __CFA_tysys_id_only_X & ) struct tag {};
 #define ttag(T) ((tag(T)){})
-#define ztag(n) ttag(Z(n))
+#define ztag(n) ttag(n)
 
 
@@ -18,5 +12,5 @@
 forall( [N], S & | sized(S), Timmed &, Tbase & ) {
     struct arpk {
-        S strides[z(N)];
+        S strides[N];
     };
 
@@ -56,14 +50,14 @@
 
     static inline size_t ?`len( arpk(N, S, Timmed, Tbase) & a ) {
-        return z(N);
+        return N;
     }
 
     // workaround #226 (and array relevance thereof demonstrated in mike102/otype-slow-ndims.cfa)
     static inline void ?{}( arpk(N, S, Timmed, Tbase) & this ) {
-        void ?{}( S (&inner)[z(N)] ) {}
+        void ?{}( S (&inner)[N] ) {}
         ?{}(this.strides);
     }
     static inline void ^?{}( arpk(N, S, Timmed, Tbase) & this ) {
-        void ^?{}( S (&inner)[z(N)] ) {}
+        void ^?{}( S (&inner)[N] ) {}
         ^?{}(this.strides);
     }
@@ -143,9 +137,15 @@
 // Base
 forall( [Nq], Sq & | sized(Sq), Tbase & )
-static inline tag(arpk(Nq, Sq, Tbase, Tbase)) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(Tbase) ) {}
+static inline tag(arpk(Nq, Sq, Tbase, Tbase)) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(Tbase) ) {
+    tag(arpk(Nq, Sq, Tbase, Tbase)) ret;
+    return ret;
+}
 
 // Rec
 forall( [Nq], Sq & | sized(Sq), [N], S & | sized(S), recq &, recr &, Tbase & | { tag(recr) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(recq) ); } )
-static inline tag(arpk(N, S, recr, Tbase)) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(arpk(N, S, recq, Tbase)) ) {}
+static inline tag(arpk(N, S, recr, Tbase)) enq_( tag(Tbase), tag(Nq), tag(Sq), tag(arpk(N, S, recq, Tbase)) ) {
+    tag(arpk(N, S, recr, Tbase)) ret;
+    return ret;
+}
 
 // Wrapper
Index: libcfa/src/containers/vector2.hfa
===================================================================
--- libcfa/src/containers/vector2.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ libcfa/src/containers/vector2.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,355 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// vector -- A growable array, with full-service iterators
+//
+// Author           : Michael Brooks
+// Created On       : Thu Jun 23 22:00:00 2021
+// Last Modified By : Michael Brooks
+// Last Modified On : Thu Jun 23 22:00:00 2021
+// Update Count     : 1
+//
+
+#include <stdlib.hfa>
+#include "list.hfa"
+
+forall( T ) {
+    struct vector;
+    
+    struct vector_transit {
+        vector(T) * col_$;
+        ptrdiff_t idx_$;
+    };
+
+    struct vector_exit {
+        vector(T) * invec_$;
+        T * item_$;
+    };
+
+    struct vector_permit {
+        vector(T) * invec_$;
+        T * item_$;
+        inline dlink(vector_permit(T));
+    };
+    P9_EMBEDDED(vector_permit(T), dlink(vector_permit(T)))
+
+    struct vector {
+        T * buffer_first_$;
+        T * buffer_end_$;
+        T * elems_first_$;
+        T * elems_end_$; // wrapped before storing, never == buffer_end_$
+        size_t exit_refcount_$;
+        dlist(vector_permit(T)) live_iters_$;
+    };
+}
+
+static inline
+forall( T ) {
+    
+    // vector
+
+    void ?{}( vector( T ) &, size_t capacity );
+    void ^?{}( vector( T ) & );
+
+    void ?{}( vector( T ) & ) = void;
+    void ?{}( vector( T ) &, vector( T ) & ) = void;
+    vector( T ) & ?=?( vector( T ) &, vector( T ) & ) = void;
+
+    // transit
+
+    void ?{}( vector_transit(T) & ) = void;
+    void ?{}( vector_transit(T) &, vector_transit(T) & );
+    void ^?{}( vector_transit(T) & );
+
+    T ?`val( vector_transit(T) & src );
+    void ?=?( vector_transit(T) & dst, T val );
+
+    // exit
+
+    void ?{}( vector_exit(T) & ) = void;
+    void ?{}( vector_exit(T) &, vector(T) * ) = void;
+
+    void ^?{}( vector_exit(T) & );
+    void ?{}( vector_exit(T) &, vector_transit(T) & );
+    void ?{}( vector_exit(T) &, vector_exit(T) & );
+
+    T ?`val( vector_exit(T) & src );
+    void ?=?( vector_exit(T) & dst, T val );
+    T & ?=?( T & dst, vector_exit(T) & src );
+    void ?*=?( T & dst, vector_exit(T) & src );
+
+    bool ?`moveNext( vector_exit(T) & it );
+
+    // permit
+
+    void ?{}( vector_permit(T) & ) = void;
+
+    void ^?{}( vector_permit(T) & );
+    void ?{}( vector_permit(T) &, vector_transit(T) & );
+    void ?{}( vector_permit(T) &, vector_exit(T) & );
+    void ?{}( vector_permit(T) &, vector_permit(T) & ) = void;
+
+    T ?`val( vector_permit(T) & src );
+
+    // api
+
+    vector_transit(T) push_last( vector( T ) & col, T val );
+    vector_transit(T) ?[?]( vector( T ) &, ptrdiff_t );
+    vector_exit(T) ?`origin( vector( T ) & );
+    size_t ?`capacity( vector(T) & );
+    size_t ?`length( vector(T) & );
+
+    void insert_before( vector( T ) & col, ptrdiff_t idx, T val );
+
+}
+
+static inline
+forall( T ) {
+
+    // vector
+
+    void ?{}( vector( T ) & this, size_t capacity ) {
+        (this.buffer_first_$){ aalloc( capacity ) };
+        (this.buffer_end_$){ this.buffer_first_$ + capacity};
+        (this.elems_first_$){ 0p };
+        (this.elems_end_$){ this.buffer_first_$ };
+        (this.exit_refcount_$){ 0 };
+        (this.live_iters_$){};
+    }
+
+    void ^?{}( vector( T ) & this ) {
+        assert( this.exit_refcount_$ == 0 );
+        free( this.buffer_first_$ );
+        this.buffer_first_$ = 0p;
+        this.buffer_end_$ = 0p;
+        this.elems_first_$ = 0p;
+        this.elems_end_$ = 0p;
+    }
+
+    // transit 
+
+    void ?{}( vector_transit(T) & this, vector_transit(T) & other ) {
+        // call autogen constructor deleted at end of hfa
+        (this){ other.col_$, other.idx_$ };
+    }
+
+    void ^?{}( vector_transit(T) & ) {}
+
+
+    vector_transit(T) ?[?]( vector( T ) & vec, ptrdiff_t idx ) {
+        // call autogen constructor deleted at end of hfa
+        vector_transit(T) ret = { & vec, idx };
+        return ret;
+    }
+
+    T & findElemMem_$( vector(T) & v, ptrdiff_t idx ) {
+        size_t len = v`length;
+        while (idx > len) idx -= len;
+        while (idx < 0  ) idx += len;
+        T * ret = v.elems_first_$ + idx;
+        if (ret < v.buffer_end_$) return *ret;
+        ret -= (v.buffer_end_$ - v.buffer_first_$);
+        assert( v.buffer_first_$ <= ret && ret < v.elems_end_$ );
+        return *ret;
+    }
+
+    T ?`val( vector_transit(T) & src ) {
+        T ret = findElemMem_$( *src.col_$, src.idx_$ );
+        return ret;
+    }
+
+    void ?=?( vector_transit(T) & src, T val ) {
+        findElemMem_$( *src.col_$, src.idx_$ ) = val;
+    }
+
+    // exit
+
+    void ?{}( vector_exit(T) & this, vector_transit(T) & src ) {
+        ( this.invec_$ ){ src.col_$ };
+        ( this.item_$ ){ & findElemMem_$( *src.col_$, src.idx_$ ) };
+
+        this.invec_$->exit_refcount_$ ++;
+    }
+    void ?{}( vector_exit(T) & this, vector_exit(T) & src ){
+        ( this.invec_$ ){ src.invec_$ };
+        ( this.item_$ ){ src.item_$ };
+
+        this.invec_$->exit_refcount_$ ++;
+    }
+
+    void ^?{}( vector_exit(T) & it ) {
+        it.invec_$->exit_refcount_$ --;
+    }
+
+    T ?`val( vector_exit(T) & src ) {
+        return *src.item_$;
+    }
+
+    void ?*=?( T & dst, vector_exit(T) & src ) {
+        dst = *src.item_$;
+    }
+
+    bool ?`moveNext( vector_exit(T) & it ) {
+        if (it.invec_$->elems_first_$ == 0p) {
+            // vector is empty
+            assert ( it.item_$ == 0p ); // it was at origin
+            return false;
+        }
+        assert( it.invec_$->elems_first_$ < it.invec_$->elems_end_$ && "can't handle wraparound yet" ); // temporary: must implement
+        if( it.item_$ == 0p ) {
+            // moving from origin
+            it.item_$ = it.invec_$->elems_first_$;
+        } else {
+            it.item_$ += 1;
+            if( it.item_$ > it.invec_$->buffer_end_$ )
+                it.item_$ = it.invec_$->buffer_first_$;
+        }
+        if ( it.item_$ >= it.invec_$->elems_end_$ ) {
+            // moving to origin
+            it.item_$ = 0p;
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    // permit
+
+    void ^?{}( vector_permit(T) & this ) {
+        remove(this);
+    }
+
+    void ?{}( vector_permit(T) & this, vector_transit(T) & src ) {
+        ( this.invec_$ ){ src.col_$ };
+        ( this.item_$ ){ & findElemMem_$( *src.col_$, src.idx_$ ) };
+        insert_first( src.col_$->live_iters_$, this );
+    }
+
+    void ?{}( vector_permit(T) & this, vector_exit(T) & src ) {
+        ( this.invec_$ ){ src.invec_$ };
+        ( this.item_$ ){ src.item_$ };
+        insert_first( src.invec_$->live_iters_$, this );
+    }
+
+    T ?`val( vector_permit(T) & src ){
+        return *src.item_$;
+    }
+
+    // vec internals
+
+    void grow( vector( T ) & this ) {
+        size_t newCapacity = 2 * (this.buffer_end_$ - this.buffer_first_$);
+        T * newItems = aalloc( newCapacity );
+        size_t elemCount = this`length;
+        for ( ptrdiff_t pos = 0; pos < elemCount; pos += 1 ) {
+            newItems[pos] = findElemMem_$(this, pos);
+        }
+
+        while ( vector_permit(T) & liveIter = this.live_iters_$`elems; liveIter`moveNext ) {
+            liveIter.item_$ += (newItems - this.buffer_first_$);
+        }
+
+        free( this.buffer_first_$ );
+        this.buffer_first_$ = newItems;
+        this.buffer_end_$ = newItems + newCapacity;
+        this.elems_first_$ = this.buffer_first_$;
+        this.elems_end_$ = this.buffer_first_$ + elemCount;
+        assert (this.elems_end_$ < this.buffer_end_$);
+    }
+
+    // vec api
+
+    vector_transit(T) push_last( vector( T ) & col, T val ) {
+        assert (col.exit_refcount_$ == 0);
+        if (col`length >= col`capacity) {
+            assert (col`length == col`capacity);
+            grow(col);
+        }
+        // call autogen constructor deleted at end of hfa
+        vector_transit(T) ret = { & col, col`length };
+        *col.elems_end_$ = val;
+        if (col.elems_first_$ == 0p) col.elems_first_$ = col.elems_end_$;
+        col.elems_end_$ += 1;
+        if (col.elems_end_$ >= col.buffer_end_$) col.elems_end_$ = col.buffer_first_$;
+        return ret;
+    }
+
+    vector_exit(T) ?`origin( vector( T ) & vec ) {
+
+        // private memberwise constructor, deleted from global namespace at end
+        // autogen constructor would not do the raii
+        void ?{}( vector_exit(T) & this, vector(T) * invec_$, T * item_$ ) {
+            ( this.invec_$ ){ invec_$ };
+            ( this.item_$ ){ item_$ };
+            this.invec_$->exit_refcount_$ ++;
+        }
+
+        vector_exit(T) ret = { &vec, 0p };
+        return ret;
+    }
+
+    bool inRange_$( T * query, T * from, T * to) {
+        if (from == to) return false;
+        if (from < to) return from <= query && query < to;
+        return query < to || from <= query;
+    }
+
+    void insert_before( vector( T ) & col, ptrdiff_t idx, T val ) {
+        assert (col.exit_refcount_$ == 0);
+        if (col`length >= col`capacity) {
+            assert (col`length == col`capacity);
+            grow(col);
+        }
+        
+        T & insertTargetR = findElemMem_$( col, idx );
+        T * insertTarget = & insertTargetR; // doesn't work in one line; must be a bug
+
+        // bubble toward back
+        if ( col.elems_end_$ < insertTarget ) {
+            // two phases of bubbling, to wrap around
+            for (T * tgt = col.elems_end_$; tgt > col.buffer_first_$; tgt--) {
+                *tgt = *(tgt-1);
+            }
+            *col.buffer_first_$ = *(col.buffer_end_$ - 1);
+            for (T * tgt = col.buffer_end_$ - 1; tgt > insertTarget; tgt--) {
+                *tgt = *(tgt-1);
+            }
+        } else {
+            for (T * tgt = col.elems_end_$; tgt > insertTarget; tgt--) {
+                *tgt = *(tgt-1);
+            }
+        }
+
+        col.elems_end_$ += 1;
+        if (col.elems_end_$ == col.buffer_end_$) col.elems_end_$ = col.buffer_first_$;
+
+        *insertTarget = val;
+
+        while ( vector_permit(T) & liveIter = col.live_iters_$`elems; liveIter`moveNext ) {
+            if ( inRange_$(liveIter.item_$, insertTarget, col.elems_end_$) ) {
+                liveIter.item_$ += 1;
+                if (liveIter.item_$ >= col.buffer_end_$) liveIter.item_$ = col.buffer_first_$;
+            }
+        }
+    }
+
+    size_t ?`capacity( vector(T) & v ) {
+        return v.buffer_end_$ - v.buffer_first_$;
+    }
+
+    size_t ?`length( vector(T) & v ) {
+        if (v.elems_first_$ == 0p) return 0;
+        if (v.elems_first_$ < v.elems_end_$ ) return v.elems_end_$ - v.elems_first_$;
+        return v.buffer_end_$ - v.elems_first_$ + v.elems_end_$ - v.buffer_first_$;
+    }
+
+
+} // forall T
+
+forall( T ) {
+    void ?{}( vector_exit(T) &, vector(T) *, T * ) = void;
+    void ?{}( vector_transit(T) & this, vector( T ) * col, ptrdiff_t idx ) = void;
+}
Index: libcfa/src/device/cpu.cfa
===================================================================
--- libcfa/src/device/cpu.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ libcfa/src/device/cpu.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,423 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// topology.cfa -- read the data structure
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Jun 10 16:13:07 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include "device/cpu.hfa"
+
+#include <math.hfa>
+#include <stdlib.hfa>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+extern "C" {
+	#include <dirent.h>
+	#include <sys/types.h>
+	#include <sys/stat.h>
+	#include <fcntl.h>
+}
+
+// search a string for character 'character' but looking atmost at len
+// chars
+static const char * strnchr(const char * str, int character, size_t len) {
+	return (const char *)memchr(str, character, strnlen(str, len));
+}
+
+// Check if have string matches the want string
+// ignoring any characters that are longer than the want string
+static bool strmatch(const char * want, char * have) {
+	size_t w = strlen(want);
+	return strncmp(want, have, w) == 0;
+}
+
+typedef const char * idx_range_t;
+
+// read the value of a string and evaluate it
+// get the end pointer and make sure it is all evaluated
+static unsigned read_value(idx_range_t map, size_t len, const char ** end) {
+	unsigned long val = strtoul(map, (char**)end, 10);
+	/* paranoid */ __attribute__((unused)) size_t read = (*end - map);
+	/* paranoid */ verifyf(read <= len, "String '%s' passed with inconsistent length %zu", map, len);
+	/* paranoid */ verifyf(read == len, "String %.*s not entirely a number, %zu chars left", (int)len, map, len - read);
+	return val;
+}
+
+// Evaluate the width of a comma seperated list of idx
+// for example 'A-B,C-D,E,F' has a width of '(B-A) + (D-C) + 1 + 1'
+// Also has an (non-optional) end ptr like strtoul and friends
+//
+// FIXME : the current implementation only supports 1 comma
+static unsigned read_width(idx_range_t map, size_t len, const char ** end) {
+	// Do we have a comma
+	const char * comma = strnchr(map, ',', len);
+	if(comma != 0p) {
+		// We do! recurse and sum the widths
+		const char * _;
+		size_t split = comma - map;
+		unsigned lhs = read_width(map, split, &_);
+		unsigned rhs = read_width(comma + 1, len - split - 1, end);
+		return lhs + rhs;
+	}
+
+	// No commas, check for a range
+	const char * dash = strnchr(map, '-', len);
+	if(dash != 0p) {
+		const char * _;
+		size_t split = dash - map;
+		unsigned lhs = read_value(map, split, &_);
+		unsigned rhs = read_value(dash + 1, len - split - 1, end);
+		return rhs - lhs + 1;
+	}
+
+	// No range, no comma, just a single value
+	// It's width is 1 and we can consume everything
+	/* paranoid */ verifyf( ({strtoul(map, (char**)end, 10); *end == (map + len); }), "Value in range '%.*s' not a number", (int)len, map);
+	*end = map + len;
+	return 1;
+}
+
+// go through a directory calling fn on each file
+static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
+	// open the directory
+	DIR *dir = opendir(path);
+	if(dir == 0p) { return ENOTDIR; }
+
+	// call fn for each
+	struct dirent * ent;
+	while ((ent = readdir(dir)) != 0p) {
+		fn( ent );
+	}
+
+	// no longer need this
+	closedir(dir);
+	return 0;
+}
+
+// count the number of directories with the specified prefix
+// the directories counted have the form '[prefix]N' where prefix is the parameter
+// and N is an base 10 integer.
+static int count_prefix_dirs(const char * path, const char * prefix) {
+	// read the directory and find the cpu count
+	// and make sure everything is as expected
+	int max = -1;
+	int count = 0;
+	void lambda(struct dirent * ent) {
+		// were are looking for prefixX, where X is a number
+		// check that it starts with 'cpu
+		char * s = strstr(ent->d_name, prefix);
+		if(s == 0p) { return; }
+		if(s != ent->d_name) { return; }
+
+		// check that the next part is a number
+		s += strlen(prefix);
+		char * end;
+		long int val = strtol(s, &end, 10);
+		if(*end != '\0' || val < 0) { return; }
+
+		// check that it's a directory
+		if(ent->d_type != DT_DIR) { return; }
+
+		// it's a match!
+		max = max(val, max);
+		count++;
+	}
+	iterate_dir(path, lambda);
+
+	/* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
+
+	return count;
+}
+
+// Count number of cpus in the system
+static int count_cpus(void) {
+	const char * fpath = "/sys/devices/system/cpu/present";
+	int fd = open(fpath, 0, O_RDONLY);
+	/* paranoid */ verifyf(fd >= 0, "Could not open file %s", fpath);
+
+	char buff[128];
+	ssize_t r = read(fd, buff, 128);
+	/* paranoid */ verifyf(r > 0, "Could not read file %s", fpath);
+	/* paranoid */ verify( buff[r-1] == '\n' );
+	buff[r-1] = '\0';
+
+	/* paranoid */ __attribute__((unused)) int ret =
+	close(fd);
+	/* paranoid */ verifyf(ret == 0, "Could not close file %s", fpath);
+
+	const char * _;
+	int cnt = read_width(buff, r - 1, &_);
+	/* paranoid */ verify(cnt == count_prefix_dirs("/sys/devices/system/cpu", "cpu"));
+	return cnt;
+}
+
+// Count number of cache *indexes* in the system
+// cache indexes are distinct from cache level as Data or Instruction cache
+// can share a level but not an index
+// PITFALL: assumes all cpus have the same indexes as cpu0
+static int count_cache_indexes(void) {
+	return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
+}
+
+
+// read information about a spcficic cache index/cpu file into the output buffer
+static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
+	// Pick the file we want and read it
+	char buf[128];
+	/* paranoid */ __attribute__((unused)) int len =
+	snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
+	/* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
+
+	int fd = open(buf, 0, O_RDONLY);
+	/* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
+
+	ssize_t r = read(fd, out, out_len);
+	/* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
+
+	/* paranoid */ __attribute__((unused)) int ret =
+	close(fd);
+	/* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
+	return r;
+}
+
+// Iterate over the cache indexes of a given cpu
+typedef void (*handle_func_t)(unsigned idx, unsigned char level, idx_range_t range, size_t len);
+static void foreach_cacheidx(unsigned cpu, unsigned idxs, handle_func_t handle) {
+	for(i; idxs) {
+		unsigned idx = idxs - 1 - i;
+		char buf[32];
+
+		// Type says what kind of cache this is,
+		// Options are: Unified, Data, Instruction
+		read_cpuidxinfo_into(cpu, idx, "type", buf, 32);
+		if((!strmatch("Unified", buf)) && (!strmatch("Data", buf))) {
+			// We don't care about instruction caches
+			continue;
+		}
+
+		// Level is the cache level: higher means bigger and slower
+		read_cpuidxinfo_into(cpu, idx, "level", buf, 32);
+		char * end;
+		unsigned long level = strtoul(buf, &end, 10);
+		/* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, this is not supported", cpu);
+
+		// shared_cpu_list is a range of cpus that share this particular cache
+		size_t n = read_cpuidxinfo_into(cpu, idx, "shared_cpu_list", buf, 32);
+		/* paranoid */ verify( buf[n-1] == '\n' );
+		buf[n-1] = '\0';
+
+		// Simply call the functor
+		handle(idx, level, buf, n - 1);
+	}
+}
+
+
+struct raw_cache_instance {
+	idx_range_t range;
+	unsigned width;
+	unsigned char level;
+	// FIXME add at least size and type
+};
+
+static void  ?{}(raw_cache_instance & this) { this.range = 0p;}
+static void ^?{}(raw_cache_instance & this) { free(this.range);}
+
+raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels)
+{
+	raw_cache_instance ** raw = alloc(cpus);
+	for(i; cpus) {
+		raw[i] = alloc(cache_levels);
+		void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
+			/* paranoid */ verifyf(level <= cache_levels, "Unexpected cache level %d on cpu %u index %u", (int)level, i, fidx);
+
+			unsigned idx = cache_levels - level;
+			raw_cache_instance & r = raw[i][idx];
+			r.range = strndup(range, len);
+			r.level = level;
+			const char * end;
+			r.width = read_width(range, len, &end);
+		}
+		foreach_cacheidx(i, idxs, addcache);
+	}
+
+	return raw;
+}
+
+struct llc_map_t {
+	raw_cache_instance * raw;
+	unsigned count;
+	unsigned start;
+};
+
+// returns an allocate list of all the different distinct last level caches
+static [*llc_map_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
+	// Allocate at least one element
+	llc_map_t* ranges = alloc();
+	size_t range_cnt = 1;
+
+	// Initialize with element 0
+	ranges->raw = &raw[0][llc_idx];
+	ranges->count = 0;
+	ranges->start = -1u;
+
+	// Go over all other cpus
+	CPU_LOOP: for(i; 1~cpus) {
+		// Check if the range is already there
+		raw_cache_instance * candidate = &raw[i][llc_idx];
+		for(j; range_cnt) {
+			llc_map_t & exist = ranges[j];
+			// If the range is already there just jump to the next cpu
+			if(0 == strcmp(candidate->range, exist.raw->range)) continue CPU_LOOP;
+		}
+
+		// The range wasn't there, added to the list
+		ranges = alloc(range_cnt + 1, ranges`realloc);
+		ranges[range_cnt].raw = candidate;
+		ranges[range_cnt].count = 0;
+		ranges[range_cnt].start = -1u;
+		range_cnt++;
+	}
+
+	// return what we have
+	return [ranges, range_cnt];
+}
+
+struct cpu_pairing_t {
+	unsigned cpu;
+	unsigned id;
+};
+
+int ?<?( cpu_pairing_t lhs, cpu_pairing_t rhs ) {
+	return lhs.id < rhs.id;
+}
+
+static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
+	cpu_pairing_t * pairings = alloc(cpus);
+
+	CPU_LOOP: for(i; cpus) {
+		pairings[i].cpu = i;
+		idx_range_t want = raw[i][0].range;
+		MAP_LOOP: for(j; map_cnt) {
+			if(0 != strcmp(want, maps[j].raw->range)) continue MAP_LOOP;
+
+			pairings[i].id = j;
+			continue CPU_LOOP;
+		}
+
+		/* paranoid */ verifyf( false, "Cpu %u map doesn't match", i );
+	}
+
+	return pairings;
+}
+
+#include <fstream.hfa>
+
+extern "C" {
+	void __cfaabi_device_startup( void ) {
+		int cpus = count_cpus();
+		int idxs = count_cache_indexes();
+
+		// Count actual cache levels
+		unsigned cache_levels = 0;
+		unsigned llc = 0;
+		{
+			unsigned char prev = -1u;
+			void first(unsigned idx, unsigned char level, const char * map, size_t len) {
+				/* paranoid */ verifyf(level < prev, "Index %u of cpu 0 has cache levels out of order: %u then %u", idx, (unsigned)prev, (unsigned)level);
+				llc = max(llc, level);
+				prev = level;
+				cache_levels++;
+			}
+			foreach_cacheidx(0, idxs, first);
+		}
+
+		// Read in raw data
+		raw_cache_instance ** raw = build_raw_cache_table(cpus, idxs, cache_levels);
+
+		// Find number of distinct cache instances
+		llc_map_t * maps;
+		size_t map_cnt;
+		[maps, map_cnt] =  distinct_llcs(cpus, cache_levels - llc, raw);
+
+		#if defined(__CFA_WITH_VERIFY__)
+		// Verify that the caches cover the all the cpus
+		{
+			unsigned width1 = 0;
+			unsigned width2 = 0;
+			for(i; map_cnt) {
+				const char * _;
+				width1 += read_width(maps[i].raw->range, strlen(maps[i].raw->range), &_);
+				width2 += maps[i].raw->width;
+			}
+			verify(width1 == cpus);
+			verify(width2 == cpus);
+		}
+		#endif
+
+		// Get mappings from cpu to cache instance
+		cpu_pairing_t * pairings = get_cpu_pairings(cpus, raw, maps, map_cnt);
+
+		// Sort by cache instance
+		qsort(pairings, cpus);
+
+		{
+			unsigned it = 0;
+			for(i; cpus) {
+				unsigned llc_id = pairings[i].id;
+				if(maps[llc_id].start == -1u) {
+					maps[llc_id].start = it;
+					it += maps[llc_id].raw->width;
+					/* paranoid */ verify(maps[llc_id].start < it);
+					/* paranoid */ verify(it != -1u);
+				}
+			}
+			/* paranoid */ verify(it == cpus);
+		}
+
+		// From the mappings build the actual cpu map we want
+		struct cpu_map_entry_t * entries = alloc(cpus);
+		for(i; cpus) { entries[i].count = 0; }
+		for(i; cpus) {
+			/* paranoid */ verify(pairings[i].id < map_cnt);
+			unsigned c = pairings[i].cpu;
+			unsigned llc_id = pairings[i].id;
+			unsigned width = maps[llc_id].raw->width;
+			unsigned start = maps[llc_id].start;
+			unsigned self  = start + (maps[llc_id].count++);
+			entries[c].count = width;
+			entries[c].start = start;
+			entries[c].self  = self;
+		}
+
+		// get rid of the temporary data
+		free(maps);
+		free(pairings);
+
+		for(i; cpus) {
+			for(j; cache_levels) {
+				^(raw[i][j]){};
+			}
+			free(raw[i]);
+		}
+		free(raw);
+
+		cpu_info.llc_map = entries;
+		cpu_info.hthrd_count = cpus;
+	}
+
+	void __cfaabi_device_shutdown( void ) {
+		free(cpu_info.llc_map);
+	}
+}
Index: libcfa/src/device/cpu.hfa
===================================================================
--- libcfa/src/device/cpu.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ libcfa/src/device/cpu.hfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,32 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// cpu.hfa -- read the data structure
+//
+// Author           : Thierry Delisle
+// Created On       : Fri Jun 11 15:22:23 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <stddef.h>
+
+struct cpu_map_entry_t {
+	unsigned self;
+	unsigned start;
+	unsigned count;
+};
+
+struct cpu_info_t {
+	 // array of size [hthrd_count]
+	const cpu_map_entry_t * llc_map;
+
+	 // Number of _hardware_ threads present in the system
+	size_t hthrd_count;
+};
+
+cpu_info_t cpu_info;
Index: libcfa/src/exception.c
===================================================================
--- libcfa/src/exception.c	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/exception.c	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -27,15 +27,4 @@
 #include "stdhdr/assert.h"
 #include "virtual.h"
-
-#if defined( __ARM_ARCH )
-#warning FIX ME: temporary hack to keep ARM build working
-#ifndef _URC_FATAL_PHASE1_ERROR
-#define _URC_FATAL_PHASE1_ERROR 3
-#endif // ! _URC_FATAL_PHASE1_ERROR
-#ifndef _URC_FATAL_PHASE2_ERROR
-#define _URC_FATAL_PHASE2_ERROR 2
-#endif // ! _URC_FATAL_PHASE2_ERROR
-#endif // __ARM_ARCH
-
 #include "lsda.h"
 
@@ -267,6 +256,12 @@
 	// the whole stack.
 
+#if defined( __x86_64 ) || defined( __i386 )
 	// We did not simply reach the end of the stack without finding a handler. This is an error.
 	if ( ret != _URC_END_OF_STACK ) {
+#else // defined( __ARM_ARCH )
+	// The return code from _Unwind_RaiseException seems to be corrupt on ARM at end of stack.
+	// This workaround tries to keep default exception handling working. 
+	if ( ret == _URC_FATAL_PHASE1_ERROR || ret == _URC_FATAL_PHASE2_ERROR ) {
+#endif
 		printf("UNWIND ERROR %d after raise exception\n", ret);
 		abort();
@@ -301,5 +296,5 @@
 }
 
-#if defined( __x86_64 ) || defined( __i386 )
+#if defined( __x86_64 ) || defined( __i386 ) || defined( __ARM_ARCH )
 // This is our personality routine. For every stack frame annotated with
 // ".cfi_personality 0x3,__gcfa_personality_v0" this function will be called twice when unwinding.
@@ -419,6 +414,5 @@
 				    _Unwind_GetCFA(unwind_context) + 24;
 #				elif defined( __ARM_ARCH )
-#				    warning FIX ME: check if anything needed for ARM
-				    42;
+				    _Unwind_GetCFA(unwind_context) + 40;
 #				endif
 				int (*matcher)(exception_t *) = *(int(**)(exception_t *))match_pos;
@@ -537,5 +531,9 @@
 	// HEADER
 	".LFECFA1:\n"
+#if defined( __x86_64 ) || defined( __i386 )
 	"	.globl	__gcfa_personality_v0\n"
+#else // defined( __ARM_ARCH )
+	"	.global	__gcfa_personality_v0\n"
+#endif
 	"	.section	.gcc_except_table,\"a\",@progbits\n"
 	// TABLE HEADER (important field is the BODY length at the end)
@@ -569,5 +567,9 @@
 	// No clue what this does specifically
 	"	.section	.data.rel.local.CFA.ref.__gcfa_personality_v0,\"awG\",@progbits,CFA.ref.__gcfa_personality_v0,comdat\n"
+#if defined( __x86_64 ) || defined( __i386 )
 	"	.align 8\n"
+#else // defined( __ARM_ARCH )
+	"	.align 3\n"
+#endif
 	"	.type CFA.ref.__gcfa_personality_v0, @object\n"
 	"	.size CFA.ref.__gcfa_personality_v0, 8\n"
@@ -575,6 +577,8 @@
 #if defined( __x86_64 )
 	"	.quad __gcfa_personality_v0\n"
-#else // then __i386
+#elif defined( __i386 )
 	"	.long __gcfa_personality_v0\n"
+#else // defined( __ARM_ARCH )
+	"	.xword __gcfa_personality_v0\n"
 #endif
 );
@@ -583,5 +587,9 @@
 	// HEADER
 	".LFECFA1:\n"
+#if defined( __x86_64 ) || defined( __i386 )
 	"	.globl	__gcfa_personality_v0\n"
+#else // defined( __ARM_ARCH )
+	"	.global	__gcfa_personality_v0\n"
+#endif
 	"	.section	.gcc_except_table,\"a\",@progbits\n"
 	// TABLE HEADER (important field is the BODY length at the end)
@@ -612,20 +620,5 @@
 #pragma GCC pop_options
 
-#elif defined( __ARM_ARCH )
-_Unwind_Reason_Code __gcfa_personality_v0(
-		int version,
-		_Unwind_Action actions,
-		unsigned long long exception_class,
-		struct _Unwind_Exception * unwind_exception,
-		struct _Unwind_Context * unwind_context) {
-	return _URC_CONTINUE_UNWIND;
-}
-
-__attribute__((noinline))
-void __cfaehm_try_terminate(void (*try_block)(),
-		void (*catch_block)(int index, exception_t * except),
-		__attribute__((unused)) int (*match_block)(exception_t * except)) {
-}
 #else
 	#error unsupported hardware architecture
-#endif // __x86_64 || __i386
+#endif // __x86_64 || __i386 || __ARM_ARCH
Index: libcfa/src/interpose.cfa
===================================================================
--- libcfa/src/interpose.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/interpose.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -95,5 +95,4 @@
 
 extern "C" {
-	void __cfaabi_interpose_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_CORE ) ));
 	void __cfaabi_interpose_startup( void ) {
 		const char *version = 0p;
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ libcfa/src/startup.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -20,6 +20,6 @@
 
 extern "C" {
-    void __cfaabi_appready_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_APPREADY ) ));
-    void __cfaabi_appready_startup( void ) {
+	void __cfaabi_appready_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_APPREADY ) ));
+	void __cfaabi_appready_startup( void ) {
 		tzset();										// initialize time global variables
 		setlocale( LC_NUMERIC, getenv("LANG") );
@@ -28,16 +28,32 @@
 		heapAppStart();
 		#endif // __CFA_DEBUG__
-    } // __cfaabi_appready_startup
+	} // __cfaabi_appready_startup
 
-    void __cfaabi_appready_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_APPREADY ) ));
-    void __cfaabi_appready_shutdown( void ) {
+	void __cfaabi_appready_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_APPREADY ) ));
+	void __cfaabi_appready_shutdown( void ) {
 		#ifdef __CFA_DEBUG__
 		extern void heapAppStop();
 		heapAppStop();
 		#endif // __CFA_DEBUG__
-    } // __cfaabi_appready_shutdown
+	} // __cfaabi_appready_shutdown
 
-    void disable_interrupts() __attribute__(( weak )) {}
-    void enable_interrupts() __attribute__(( weak )) {}
+	void disable_interrupts() __attribute__(( weak )) {}
+	void enable_interrupts() __attribute__(( weak )) {}
+
+
+	extern void __cfaabi_interpose_startup( void );
+	extern void __cfaabi_device_startup   ( void );
+	extern void __cfaabi_device_shutdown  ( void );
+
+	void __cfaabi_core_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_CORE ) ));
+	void __cfaabi_core_startup( void ) {
+		__cfaabi_interpose_startup();
+		__cfaabi_device_startup();
+	} // __cfaabi_core_startup
+
+	void __cfaabi_core_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_CORE ) ));
+	void __cfaabi_core_shutdown( void ) {
+		__cfaabi_device_shutdown();
+	} // __cfaabi_core_shutdown
 } // extern "C"
 
Index: libcfa/src/stdhdr/pthread.h
===================================================================
--- libcfa/src/stdhdr/pthread.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ libcfa/src/stdhdr/pthread.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,24 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+// 
+// pthread.h -- 
+// 
+// Author           : Peter A. Buhr
+// Created On       : Wed Jun 16 13:39:06 2021
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Jun 16 13:39:42 2021
+// Update Count     : 1
+// 
+
+extern "C" {
+#include_next <pthread.h>								// has internal check for multiple expansion
+} // extern "C"
+
+// Local Variables: //
+// tab-width: 4 //
+// mode: c++ //
+// compile-command: "make install" //
+// End: //
Index: src/AST/Convert.cpp
===================================================================
--- src/AST/Convert.cpp	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/AST/Convert.cpp	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -2415,4 +2415,15 @@
 	}
 
+	virtual void visit( const DimensionExpr * old ) override final {
+		// DimensionExpr gets desugared away in Validate.
+		// As long as new-AST passes don't use it, this cheap-cheerful error
+		// detection helps ensure that these occurrences have been compiled
+		// away, as expected.  To move the DimensionExpr boundary downstream
+		// or move the new-AST translation boundary upstream, implement
+		// DimensionExpr in the new AST and implement a conversion.
+		(void) old;
+		assert(false && "DimensionExpr should not be present at new-AST boundary");
+	}
+
 	virtual void visit( const AsmExpr * old ) override final {
 		this->node = visitBaseExpr( old,
Index: src/AST/Decl.cpp
===================================================================
--- src/AST/Decl.cpp	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/AST/Decl.cpp	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -78,5 +78,5 @@
 
 const char * TypeDecl::typeString() const {
-	static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" };
+	static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized length value" };
 	static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );
 	assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." );
Index: src/AST/Decl.hpp
===================================================================
--- src/AST/Decl.hpp	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/AST/Decl.hpp	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -175,5 +175,5 @@
 class TypeDecl final : public NamedTypeDecl {
   public:
-	enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };
+	enum Kind { Dtype, DStype, Otype, Ftype, Ttype, Dimension, NUMBER_OF_KINDS };
 
 	Kind kind;
Index: src/AST/Pass.impl.hpp
===================================================================
--- src/AST/Pass.impl.hpp	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/AST/Pass.impl.hpp	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -479,5 +479,5 @@
 			guard_symtab guard { *this };
 			// implicit add __func__ identifier as specified in the C manual 6.4.2.2
-			static ast::ptr< ast::ObjectDecl > func{ new ast::ObjectDecl{ 
+			static ast::ptr< ast::ObjectDecl > func{ new ast::ObjectDecl{
 				CodeLocation{}, "__func__",
 				new ast::ArrayType{
@@ -522,6 +522,7 @@
 	VISIT({
 		guard_symtab guard { * this };
-		maybe_accept( node, &StructDecl::params  );
-		maybe_accept( node, &StructDecl::members );
+		maybe_accept( node, &StructDecl::params     );
+		maybe_accept( node, &StructDecl::members    );
+		maybe_accept( node, &StructDecl::attributes );
 	})
 
@@ -543,6 +544,7 @@
 	VISIT({
 		guard_symtab guard { * this };
-		maybe_accept( node, &UnionDecl::params  );
-		maybe_accept( node, &UnionDecl::members );
+		maybe_accept( node, &UnionDecl::params     );
+		maybe_accept( node, &UnionDecl::members    );
+		maybe_accept( node, &UnionDecl::attributes );
 	})
 
@@ -562,6 +564,7 @@
 	VISIT(
 		// unlike structs, traits, and unions, enums inject their members into the global scope
-		maybe_accept( node, &EnumDecl::params  );
-		maybe_accept( node, &EnumDecl::members );
+		maybe_accept( node, &EnumDecl::params     );
+		maybe_accept( node, &EnumDecl::members    );
+		maybe_accept( node, &EnumDecl::attributes );
 	)
 
@@ -577,6 +580,7 @@
 	VISIT({
 		guard_symtab guard { *this };
-		maybe_accept( node, &TraitDecl::params  );
-		maybe_accept( node, &TraitDecl::members );
+		maybe_accept( node, &TraitDecl::params     );
+		maybe_accept( node, &TraitDecl::members    );
+		maybe_accept( node, &TraitDecl::attributes );
 	})
 
Index: src/CodeGen/CodeGenerator.cc
===================================================================
--- src/CodeGen/CodeGenerator.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/CodeGen/CodeGenerator.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -589,4 +589,9 @@
 			output << nameExpr->get_name();
 		} // if
+	}
+
+	void CodeGenerator::postvisit( DimensionExpr * dimensionExpr ) {
+		extension( dimensionExpr );
+		output << "/*non-type*/" << dimensionExpr->get_name();
 	}
 
Index: src/CodeGen/CodeGenerator.h
===================================================================
--- src/CodeGen/CodeGenerator.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/CodeGen/CodeGenerator.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -92,4 +92,5 @@
 		void postvisit( TupleIndexExpr * tupleExpr );
 		void postvisit( TypeExpr *typeExpr );
+		void postvisit( DimensionExpr *dimensionExpr );
 		void postvisit( AsmExpr * );
 		void postvisit( StmtExpr * );
Index: src/Common/PassVisitor.h
===================================================================
--- src/Common/PassVisitor.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Common/PassVisitor.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -167,4 +167,6 @@
 	virtual void visit( TypeExpr * typeExpr ) override final;
 	virtual void visit( const TypeExpr * typeExpr ) override final;
+	virtual void visit( DimensionExpr * dimensionExpr ) override final;
+	virtual void visit( const DimensionExpr * dimensionExpr ) override final;
 	virtual void visit( AsmExpr * asmExpr ) override final;
 	virtual void visit( const AsmExpr * asmExpr ) override final;
@@ -309,4 +311,5 @@
 	virtual Expression * mutate( CommaExpr * commaExpr ) override final;
 	virtual Expression * mutate( TypeExpr * typeExpr ) override final;
+	virtual Expression * mutate( DimensionExpr * dimensionExpr ) override final;
 	virtual Expression * mutate( AsmExpr * asmExpr ) override final;
 	virtual Expression * mutate( ImplicitCopyCtorExpr * impCpCtorExpr ) override final;
@@ -542,5 +545,5 @@
 class WithIndexer {
 protected:
-	WithIndexer() {}
+	WithIndexer( bool trackIdentifiers = true ) : indexer(trackIdentifiers) {}
 	~WithIndexer() {}
 
Index: src/Common/PassVisitor.impl.h
===================================================================
--- src/Common/PassVisitor.impl.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Common/PassVisitor.impl.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -636,4 +636,5 @@
 		maybeAccept_impl( node->parameters, *this );
 		maybeAccept_impl( node->members   , *this );
+		maybeAccept_impl( node->attributes, *this );
 	}
 
@@ -656,4 +657,5 @@
 		maybeAccept_impl( node->parameters, *this );
 		maybeAccept_impl( node->members   , *this );
+		maybeAccept_impl( node->attributes, *this );
 	}
 
@@ -676,4 +678,5 @@
 		maybeMutate_impl( node->parameters, *this );
 		maybeMutate_impl( node->members   , *this );
+		maybeMutate_impl( node->attributes, *this );
 	}
 
@@ -697,4 +700,5 @@
 		maybeAccept_impl( node->parameters, *this );
 		maybeAccept_impl( node->members   , *this );
+		maybeAccept_impl( node->attributes, *this );
 	}
 
@@ -714,4 +718,5 @@
 		maybeAccept_impl( node->parameters, *this );
 		maybeAccept_impl( node->members   , *this );
+		maybeAccept_impl( node->attributes, *this );
 	}
 
@@ -732,4 +737,5 @@
 		maybeMutate_impl( node->parameters, *this );
 		maybeMutate_impl( node->members   , *this );
+		maybeMutate_impl( node->attributes, *this );
 	}
 
@@ -750,4 +756,5 @@
 	maybeAccept_impl( node->parameters, *this );
 	maybeAccept_impl( node->members   , *this );
+	maybeAccept_impl( node->attributes, *this );
 
 	VISIT_END( node );
@@ -763,4 +770,5 @@
 	maybeAccept_impl( node->parameters, *this );
 	maybeAccept_impl( node->members   , *this );
+	maybeAccept_impl( node->attributes, *this );
 
 	VISIT_END( node );
@@ -776,4 +784,5 @@
 	maybeMutate_impl( node->parameters, *this );
 	maybeMutate_impl( node->members   , *this );
+	maybeMutate_impl( node->attributes, *this );
 
 	MUTATE_END( Declaration, node );
@@ -790,4 +799,5 @@
 		maybeAccept_impl( node->parameters, *this );
 		maybeAccept_impl( node->members   , *this );
+		maybeAccept_impl( node->attributes, *this );
 	}
 
@@ -805,4 +815,5 @@
 		maybeAccept_impl( node->parameters, *this );
 		maybeAccept_impl( node->members   , *this );
+		maybeAccept_impl( node->attributes, *this );
 	}
 
@@ -820,4 +831,5 @@
 		maybeMutate_impl( node->parameters, *this );
 		maybeMutate_impl( node->members   , *this );
+		maybeMutate_impl( node->attributes, *this );
 	}
 
@@ -2507,4 +2519,34 @@
 
 //--------------------------------------------------------------------------
+// DimensionExpr
+template< typename pass_type >
+void PassVisitor< pass_type >::visit( DimensionExpr * node ) {
+	VISIT_START( node );
+
+	indexerScopedAccept( node->result, *this );
+
+	VISIT_END( node );
+}
+
+template< typename pass_type >
+void PassVisitor< pass_type >::visit( const DimensionExpr * node ) {
+	VISIT_START( node );
+
+	indexerScopedAccept( node->result, *this );
+
+	VISIT_END( node );
+}
+
+template< typename pass_type >
+Expression * PassVisitor< pass_type >::mutate( DimensionExpr * node ) {
+	MUTATE_START( node );
+
+	indexerScopedMutate( node->env   , *this );
+	indexerScopedMutate( node->result, *this );
+
+	MUTATE_END( Expression, node );
+}
+
+//--------------------------------------------------------------------------
 // AsmExpr
 template< typename pass_type >
@@ -3145,5 +3187,5 @@
 
 	maybeAccept_impl( node->forall, *this );
-	// xxx - should PointerType visit/mutate dimension?
+	maybeAccept_impl( node->dimension, *this );
 	maybeAccept_impl( node->base, *this );
 
@@ -3156,5 +3198,5 @@
 
 	maybeAccept_impl( node->forall, *this );
-	// xxx - should PointerType visit/mutate dimension?
+	maybeAccept_impl( node->dimension, *this );
 	maybeAccept_impl( node->base, *this );
 
@@ -3167,5 +3209,5 @@
 
 	maybeMutate_impl( node->forall, *this );
-	// xxx - should PointerType visit/mutate dimension?
+	maybeMutate_impl( node->dimension, *this );
 	maybeMutate_impl( node->base, *this );
 
@@ -3856,5 +3898,5 @@
 
 //--------------------------------------------------------------------------
-// Attribute
+// Constant
 template< typename pass_type >
 void PassVisitor< pass_type >::visit( Constant * node ) {
Index: src/InitTweak/InitTweak.cc
===================================================================
--- src/InitTweak/InitTweak.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/InitTweak/InitTweak.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -10,6 +10,6 @@
 // Created On       : Fri May 13 11:26:36 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Dec 13 23:15:52 2019
-// Update Count     : 8
+// Last Modified On : Wed Jun 16 20:57:22 2021
+// Update Count     : 18
 //
 
@@ -1217,12 +1217,22 @@
 	void addDataSectonAttribute( ObjectDecl * objDecl ) {
 		objDecl->attributes.push_back(new Attribute("section", {
-			new ConstantExpr( Constant::from_string(".data#") ),
-		}));
+			new ConstantExpr( Constant::from_string(".data"
+#if defined( __x86_64 ) || defined( __i386 ) // assembler comment to prevent assembler warning message
+					"#"
+#else // defined( __ARM_ARCH )
+					"//"
+#endif
+				))}));
 	}
 
 	void addDataSectionAttribute( ast::ObjectDecl * objDecl ) {
 		objDecl->attributes.push_back(new ast::Attribute("section", {
-			ast::ConstantExpr::from_string(objDecl->location, ".data#"),
-		}));
+			ast::ConstantExpr::from_string(objDecl->location, ".data"
+#if defined( __x86_64 ) || defined( __i386 ) // assembler comment to prevent assembler warning message
+					"#"
+#else // defined( __ARM_ARCH )
+					"//"
+#endif
+				)}));
 	}
 
Index: src/Parser/DeclarationNode.cc
===================================================================
--- src/Parser/DeclarationNode.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Parser/DeclarationNode.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1076,8 +1076,8 @@
 	if ( variable.tyClass != TypeDecl::NUMBER_OF_KINDS ) {
 		// otype is internally converted to dtype + otype parameters
-		static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::DStype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::Dtype };
+		static const TypeDecl::Kind kindMap[] = { TypeDecl::Dtype, TypeDecl::Dtype, TypeDecl::Dtype, TypeDecl::Ftype, TypeDecl::Ttype, TypeDecl::Dimension };
 		static_assert( sizeof(kindMap) / sizeof(kindMap[0]) == TypeDecl::NUMBER_OF_KINDS, "DeclarationNode::build: kindMap is out of sync." );
 		assertf( variable.tyClass < sizeof(kindMap)/sizeof(kindMap[0]), "Variable's tyClass is out of bounds." );
-		TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype || variable.tyClass == TypeDecl::ALtype, variable.initializer ? variable.initializer->buildType() : nullptr );
+		TypeDecl * ret = new TypeDecl( *name, Type::StorageClasses(), nullptr, kindMap[ variable.tyClass ], variable.tyClass == TypeDecl::Otype || variable.tyClass == TypeDecl::DStype, variable.initializer ? variable.initializer->buildType() : nullptr );
 		buildList( variable.assertions, ret->get_assertions() );
 		return ret;
Index: src/Parser/ExpressionNode.cc
===================================================================
--- src/Parser/ExpressionNode.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Parser/ExpressionNode.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -509,4 +509,9 @@
 } // build_varref
 
+DimensionExpr * build_dimensionref( const string * name ) {
+	DimensionExpr * expr = new DimensionExpr( *name );
+	delete name;
+	return expr;
+} // build_varref
 // TODO: get rid of this and OperKinds and reuse code from OperatorTable
 static const char * OperName[] = {						// must harmonize with OperKinds
Index: src/Parser/ParseNode.h
===================================================================
--- src/Parser/ParseNode.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Parser/ParseNode.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -183,4 +183,5 @@
 
 NameExpr * build_varref( const std::string * name );
+DimensionExpr * build_dimensionref( const std::string * name );
 
 Expression * build_cast( DeclarationNode * decl_node, ExpressionNode * expr_node );
Index: src/Parser/TypedefTable.cc
===================================================================
--- src/Parser/TypedefTable.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Parser/TypedefTable.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -10,6 +10,6 @@
 // Created On       : Sat May 16 15:20:13 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Mar 15 20:56:47 2021
-// Update Count     : 260
+// Last Modified On : Wed May 19 08:30:14 2021
+// Update Count     : 262
 //
 
@@ -31,4 +31,5 @@
 	switch ( kind ) {
 	  case IDENTIFIER: return "identifier";
+	  case TYPEDIMname: return "typedim";
 	  case TYPEDEFname: return "typedef";
 	  case TYPEGENname: return "typegen";
Index: src/Parser/lex.ll
===================================================================
--- src/Parser/lex.ll	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Parser/lex.ll	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -10,6 +10,6 @@
  * Created On       : Sat Sep 22 08:58:10 2001
  * Last Modified By : Peter A. Buhr
- * Last Modified On : Thu Apr  1 13:22:31 2021
- * Update Count     : 754
+ * Last Modified On : Sun Jun 20 18:41:09 2021
+ * Update Count     : 759
  */
 
@@ -117,10 +117,11 @@
 hex_constant {hex_prefix}{hex_digits}{integer_suffix_opt}
 
-				// GCC: D (double) and iI (imaginary) suffixes, and DL (long double)
+				// GCC: floating D (double), imaginary iI, and decimal floating DF, DD, DL
 exponent "_"?[eE]"_"?[+-]?{decimal_digits}
 floating_size 16|32|32x|64|64x|80|128|128x
 floating_length ([fFdDlLwWqQ]|[fF]{floating_size})
 floating_suffix ({floating_length}?[iI]?)|([iI]{floating_length})
-floating_suffix_opt ("_"?({floating_suffix}|"DL"))?
+decimal_floating_suffix [dD][fFdDlL]
+floating_suffix_opt ("_"?({floating_suffix}|{decimal_floating_suffix}))?
 decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
 floating_decimal {decimal_digits}"."{exponent}?{floating_suffix_opt}
@@ -234,4 +235,7 @@
 continue		{ KEYWORD_RETURN(CONTINUE); }
 coroutine		{ KEYWORD_RETURN(COROUTINE); }			// CFA
+_Decimal32		{ KEYWORD_RETURN(DECIMAL32); }			// GCC
+_Decimal64		{ KEYWORD_RETURN(DECIMAL64); }			// GCC
+_Decimal128		{ KEYWORD_RETURN(DECIMAL128); }			// GCC
 default			{ KEYWORD_RETURN(DEFAULT); }
 disable			{ KEYWORD_RETURN(DISABLE); }			// CFA
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/Parser/parser.yy	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Apr 26 18:41:54 2021
-// Update Count     : 4990
+// Last Modified On : Tue Jun 29 09:12:47 2021
+// Update Count     : 5027
 //
 
@@ -26,17 +26,13 @@
 // The root language for this grammar is ANSI99/11 C. All of ANSI99/11 is parsed, except for:
 //
-// 1. designation with '=' (use ':' instead)
-//
-// Most of the syntactic extensions from ANSI90 to ANSI11 C are marked with the comment "C99/C11". This grammar also has
-// two levels of extensions. The first extensions cover most of the GCC C extensions, except for:
-//
-// 1. designation with and without '=' (use ':' instead)
-
-//
-// All of the syntactic extensions for GCC C are marked with the comment "GCC". The second extensions are for Cforall
-// (CFA), which fixes several of C's outstanding problems and extends C with many modern language concepts. All of the
-// syntactic extensions for CFA C are marked with the comment "CFA". As noted above, there is one unreconcileable
-// parsing problem between C99 and CFA with respect to designators; this is discussed in detail before the "designation"
-// grammar rule.
+//   designation with '=' (use ':' instead)
+//
+// This incompatibility is discussed in detail before the "designation" grammar rule.  Most of the syntactic extensions
+// from ANSI90 to ANSI11 C are marked with the comment "C99/C11".
+
+// This grammar also has two levels of extensions. The first extensions cover most of the GCC C extensions All of the
+// syntactic extensions for GCC C are marked with the comment "GCC". The second extensions are for Cforall (CFA), which
+// fixes several of C's outstanding problems and extends C with many modern language concepts. All of the syntactic
+// extensions for CFA C are marked with the comment "CFA".
 
 %{
@@ -269,4 +265,5 @@
 %token INT128 UINT128 uuFLOAT80 uuFLOAT128				// GCC
 %token uFLOAT16 uFLOAT32 uFLOAT32X uFLOAT64 uFLOAT64X uFLOAT128 // GCC
+%token DECIMAL32 DECIMAL64 DECIMAL128					// GCC
 %token ZERO_T ONE_T										// CFA
 %token SIZEOF TYPEOF VALIST AUTO_TYPE					// GCC
@@ -287,5 +284,5 @@
 
 // names and constants: lexer differentiates between identifier and typedef names
-%token<tok> IDENTIFIER		QUOTED_IDENTIFIER	TYPEDEFname		TYPEGENname
+%token<tok> IDENTIFIER		QUOTED_IDENTIFIER	TYPEDIMname		TYPEDEFname		TYPEGENname
 %token<tok> TIMEOUT			WOR					CATCH			RECOVER			CATCHRESUME		FIXUP		FINALLY		// CFA
 %token<tok> INTEGERconstant	CHARACTERconstant	STRINGliteral
@@ -586,4 +583,8 @@
 	| quasi_keyword
 		{ $$ = new ExpressionNode( build_varref( $1 ) ); }
+	| TYPEDIMname										// CFA, generic length argument
+		// { $$ = new ExpressionNode( new TypeExpr( maybeMoveBuildType( DeclarationNode::newFromTypedef( $1 ) ) ) ); }
+		// { $$ = new ExpressionNode( build_varref( $1 ) ); }
+		{ $$ = new ExpressionNode( build_dimensionref( $1 ) ); }
 	| tuple
 	| '(' comma_expression ')'
@@ -630,7 +631,10 @@
 postfix_expression:
 	primary_expression
-	| postfix_expression '[' assignment_expression ',' comma_expression ']'
-		// { $$ = new ExpressionNode( build_binary_val( OperKinds::Index, $1, new ExpressionNode( build_binary_val( OperKinds::Index, $3, $5 ) ) ) ); }
-		{ SemanticError( yylloc, "New array subscript is currently unimplemented." ); $$ = nullptr; }
+	| postfix_expression '[' assignment_expression ',' tuple_expression_list ']'
+			// Historic, transitional: Disallow commas in subscripts.
+			// Switching to this behaviour may help check if a C compatibilty case uses comma-exprs in subscripts.
+		// { SemanticError( yylloc, "New array subscript is currently unimplemented." ); $$ = nullptr; }
+			// Current: Commas in subscripts make tuples.
+		{ $$ = new ExpressionNode( build_binary_val( OperKinds::Index, $1, new ExpressionNode( build_tuple( (ExpressionNode *)($3->set_last( $5 ) ) )) ) ); }
 	| postfix_expression '[' assignment_expression ']'
 		// CFA, comma_expression disallowed in this context because it results in a common user error: subscripting a
@@ -1887,4 +1891,10 @@
 	| uFLOAT128
 		{ $$ = DeclarationNode::newBasicType( DeclarationNode::uFloat128 ); }
+	| DECIMAL32
+		{ SemanticError( yylloc, "_Decimal32 is currently unimplemented." ); $$ = nullptr; }
+	| DECIMAL64
+		{ SemanticError( yylloc, "_Decimal64 is currently unimplemented." ); $$ = nullptr; }
+	| DECIMAL128
+		{ SemanticError( yylloc, "_Decimal128 is currently unimplemented." ); $$ = nullptr; }
 	| COMPLEX											// C99
 		{ $$ = DeclarationNode::newComplexType( DeclarationNode::Complex ); }
@@ -1909,5 +1919,5 @@
 	// empty
 		{ $$ = nullptr; }
-	| vtable;
+	| vtable
 	;
 
@@ -2535,6 +2545,6 @@
 	| '[' identifier_or_type_name ']'
 		{
-			typedefTable.addToScope( *$2, TYPEDEFname, "9" );
-			$$ = DeclarationNode::newTypeParam( TypeDecl::ALtype, $2 );
+			typedefTable.addToScope( *$2, TYPEDIMname, "9" );
+			$$ = DeclarationNode::newTypeParam( TypeDecl::Dimension, $2 );
 		}
 	// | type_specifier identifier_parameter_declarator
@@ -2550,4 +2560,6 @@
 	| '*'
 		{ $$ = TypeDecl::DStype; }						// dtype + sized
+	// | '(' '*' ')'
+	// 	{ $$ = TypeDecl::Ftype; }
 	| ELLIPSIS
 		{ $$ = TypeDecl::Ttype; }
@@ -2590,10 +2602,8 @@
 		{ $$ = new ExpressionNode( new TypeExpr( maybeMoveBuildType( $1 ) ) ); }
 	| assignment_expression
-		{ SemanticError( yylloc, toString("Expression generic parameters are currently unimplemented: ", $1->build()) ); $$ = nullptr; }
 	| type_list ',' type
 		{ $$ = (ExpressionNode *)($1->set_last( new ExpressionNode( new TypeExpr( maybeMoveBuildType( $3 ) ) ) )); }
 	| type_list ',' assignment_expression
-		{ SemanticError( yylloc, toString("Expression generic parameters are currently unimplemented: ", $3->build()) ); $$ = nullptr; }
-		// { $$ = (ExpressionNode *)( $1->set_last( $3 )); }
+		{ $$ = (ExpressionNode *)( $1->set_last( $3 )); }
 	;
 
Index: src/SymTab/Indexer.cc
===================================================================
--- src/SymTab/Indexer.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SymTab/Indexer.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -74,7 +74,7 @@
 	}
 
-	Indexer::Indexer()
+	Indexer::Indexer( bool trackIdentifiers )
 	: idTable(), typeTable(), structTable(), enumTable(), unionTable(), traitTable(),
-	  prevScope(), scope( 0 ), repScope( 0 ) { ++* stats().count; }
+	  prevScope(), scope( 0 ), repScope( 0 ), trackIdentifiers( trackIdentifiers ) { ++* stats().count; }
 
 	Indexer::~Indexer() {
@@ -110,4 +110,6 @@
 
 	void Indexer::lookupId( const std::string & id, std::list< IdData > &out ) const {
+		assert( trackIdentifiers );
+
 		++* stats().lookup_calls;
 		if ( ! idTable ) return;
@@ -434,4 +436,5 @@
 			const Declaration * deleteStmt ) {
 		++* stats().add_calls;
+		if ( ! trackIdentifiers ) return;
 		const std::string &name = decl->name;
 		if ( name == "" ) return;
Index: src/SymTab/Indexer.h
===================================================================
--- src/SymTab/Indexer.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SymTab/Indexer.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -31,5 +31,5 @@
 	class Indexer : public std::enable_shared_from_this<SymTab::Indexer> {
 	public:
-		explicit Indexer();
+		explicit Indexer( bool trackIdentifiers = true );
 		virtual ~Indexer();
 
@@ -180,4 +180,6 @@
 		/// returns true if there exists a declaration with C linkage and the given name with a different mangled name
 		bool hasIncompatibleCDecl( const std::string & id, const std::string & mangleName ) const;
+
+	    bool trackIdentifiers;
 	};
 } // namespace SymTab
Index: src/SymTab/Validate.cc
===================================================================
--- src/SymTab/Validate.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SymTab/Validate.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -105,4 +105,5 @@
 
 	struct FixQualifiedTypes final : public WithIndexer {
+		FixQualifiedTypes() : WithIndexer(false) {}
 		Type * postmutate( QualifiedType * );
 	};
@@ -174,4 +175,14 @@
 	};
 
+	/// Does early resolution on the expressions that give enumeration constants their values
+	struct ResolveEnumInitializers final : public WithIndexer, public WithGuards, public WithVisitorRef<ResolveEnumInitializers>, public WithShortCircuiting {
+		ResolveEnumInitializers( const Indexer * indexer );
+		void postvisit( EnumDecl * enumDecl );
+
+	  private:
+		const Indexer * local_indexer;
+
+	};
+
 	/// Replaces array and function types in forall lists by appropriate pointer type and assigns each Object and Function declaration a unique ID.
 	struct ForallPointerDecay_old final {
@@ -260,4 +271,23 @@
 		void previsit( StructInstType * inst );
 		void previsit( UnionInstType * inst );
+	};
+
+	/// desugar declarations and uses of dimension paramaters like [N],
+	/// from type-system managed values, to tunnneling via ordinary types,
+	/// as char[-] in and sizeof(-) out
+	struct TranslateDimensionGenericParameters : public WithIndexer, public WithGuards {
+		static void translateDimensions( std::list< Declaration * > &translationUnit );
+		TranslateDimensionGenericParameters();
+
+		bool nextVisitedNodeIsChildOfSUIT = false; // SUIT = Struct or Union -Inst Type
+		bool visitingChildOfSUIT = false;
+		void changeState_ChildOfSUIT( bool newVal );
+		void premutate( StructInstType * sit );
+		void premutate( UnionInstType * uit );
+		void premutate( BaseSyntaxNode * node );
+
+		TypeDecl * postmutate( TypeDecl * td );
+		Expression * postmutate( DimensionExpr * de );
+		Expression * postmutate( Expression * e );
 	};
 
@@ -307,4 +337,5 @@
 		PassVisitor<EnumAndPointerDecay_old> epc;
 		PassVisitor<LinkReferenceToTypes_old> lrt( nullptr );
+		PassVisitor<ResolveEnumInitializers> rei( nullptr );
 		PassVisitor<ForallPointerDecay_old> fpd;
 		PassVisitor<CompoundLiteral> compoundliteral;
@@ -326,23 +357,27 @@
 			Stats::Heap::newPass("validate-B");
 			Stats::Time::BlockGuard guard("validate-B");
-			Stats::Time::TimeBlock("Link Reference To Types", [&]() {
-				acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
-			});
-			Stats::Time::TimeBlock("Fix Qualified Types", [&]() {
-				mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed
-			});
-			Stats::Time::TimeBlock("Hoist Structs", [&]() {
-				HoistStruct::hoistStruct( translationUnit ); // must happen after EliminateTypedef, so that aggregate typedefs occur in the correct order
-			});
-			Stats::Time::TimeBlock("Eliminate Typedefs", [&]() {
-				EliminateTypedef::eliminateTypedef( translationUnit ); //
-			});
+			acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
+			mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed
+			HoistStruct::hoistStruct( translationUnit );
+			EliminateTypedef::eliminateTypedef( translationUnit );
 		}
 		{
 			Stats::Heap::newPass("validate-C");
 			Stats::Time::BlockGuard guard("validate-C");
-			acceptAll( translationUnit, genericParams );  // check as early as possible - can't happen before LinkReferenceToTypes_old
-			ReturnChecker::checkFunctionReturns( translationUnit );
-			InitTweak::fixReturnStatements( translationUnit ); // must happen before autogen
+			Stats::Time::TimeBlock("Validate Generic Parameters", [&]() {
+				acceptAll( translationUnit, genericParams );  // check as early as possible - can't happen before LinkReferenceToTypes_old; observed failing when attempted before eliminateTypedef
+			});
+			Stats::Time::TimeBlock("Translate Dimensions", [&]() {
+				TranslateDimensionGenericParameters::translateDimensions( translationUnit );
+			});
+			Stats::Time::TimeBlock("Resolve Enum Initializers", [&]() {
+				acceptAll( translationUnit, rei ); // must happen after translateDimensions because rei needs identifier lookup, which needs name mangling
+			});
+			Stats::Time::TimeBlock("Check Function Returns", [&]() {
+				ReturnChecker::checkFunctionReturns( translationUnit );
+			});
+			Stats::Time::TimeBlock("Fix Return Statements", [&]() {
+				InitTweak::fixReturnStatements( translationUnit ); // must happen before autogen
+			});
 		}
 		{
@@ -644,5 +679,5 @@
 	}
 
-	LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) {
+	LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) : WithIndexer( false ) {
 		if ( other_indexer ) {
 			local_indexer = other_indexer;
@@ -664,12 +699,4 @@
 	}
 
-	void checkGenericParameters( ReferenceToType * inst ) {
-		for ( Expression * param : inst->parameters ) {
-			if ( ! dynamic_cast< TypeExpr * >( param ) ) {
-				SemanticError( inst, "Expression parameters for generic types are currently unsupported: " );
-			}
-		}
-	}
-
 	void LinkReferenceToTypes_old::postvisit( StructInstType * structInst ) {
 		const StructDecl * st = local_indexer->lookupStruct( structInst->name );
@@ -682,5 +709,4 @@
 			forwardStructs[ structInst->name ].push_back( structInst );
 		} // if
-		checkGenericParameters( structInst );
 	}
 
@@ -695,5 +721,4 @@
 			forwardUnions[ unionInst->name ].push_back( unionInst );
 		} // if
-		checkGenericParameters( unionInst );
 	}
 
@@ -807,13 +832,4 @@
 				forwardEnums.erase( fwds );
 			} // if
-
-			for ( Declaration * member : enumDecl->members ) {
-				ObjectDecl * field = strict_dynamic_cast<ObjectDecl *>( member );
-				if ( field->init ) {
-					// need to resolve enumerator initializers early so that other passes that determine if an expression is constexpr have the appropriate information.
-					SingleInit * init = strict_dynamic_cast<SingleInit *>( field->init );
-					ResolvExpr::findSingleExpression( init->value, new BasicType( Type::Qualifiers(), BasicType::SignedInt ), indexer );
-				}
-			}
 		} // if
 	}
@@ -878,4 +894,25 @@
 				typeInst->set_isFtype( typeDecl->kind == TypeDecl::Ftype );
 			} // if
+		} // if
+	}
+
+	ResolveEnumInitializers::ResolveEnumInitializers( const Indexer * other_indexer ) : WithIndexer( true ) {
+		if ( other_indexer ) {
+			local_indexer = other_indexer;
+		} else {
+			local_indexer = &indexer;
+		} // if
+	}
+
+	void ResolveEnumInitializers::postvisit( EnumDecl * enumDecl ) {
+		if ( enumDecl->body ) {
+			for ( Declaration * member : enumDecl->members ) {
+				ObjectDecl * field = strict_dynamic_cast<ObjectDecl *>( member );
+				if ( field->init ) {
+					// need to resolve enumerator initializers early so that other passes that determine if an expression is constexpr have the appropriate information.
+					SingleInit * init = strict_dynamic_cast<SingleInit *>( field->init );
+					ResolvExpr::findSingleExpression( init->value, new BasicType( Type::Qualifiers(), BasicType::SignedInt ), indexer );
+				}
+			}
 		} // if
 	}
@@ -1152,4 +1189,5 @@
 		GuardScope( typedeclNames );
 		mutateAll( aggr->parameters, * visitor );
+		mutateAll( aggr->attributes, * visitor );
 
 		// unroll mutateAll for aggr->members so that implicit typedefs for nested types are added to the aggregate body.
@@ -1220,4 +1258,22 @@
 			}
 		}
+	}
+
+	// Test for special name on a generic parameter.  Special treatment for the
+	// special name is a bootstrapping hack.  In most cases, the worlds of T's
+	// and of N's don't overlap (normal treamtemt).  The foundations in
+	// array.hfa use tagging for both types and dimensions.  Tagging treats
+	// its subject parameter even more opaquely than T&, which assumes it is
+	// possible to have a pointer/reference to such an object.  Tagging only
+	// seeks to identify the type-system resident at compile time.  Both N's
+	// and T's can make tags.  The tag definition uses the special name, which
+	// is treated as "an N or a T."  This feature is not inteded to be used
+	// outside of the definition and immediate uses of a tag.
+	static inline bool isReservedTysysIdOnlyName( const std::string & name ) {
+		// name's prefix was __CFA_tysys_id_only, before it got wrapped in __..._generic
+		int foundAt = name.find("__CFA_tysys_id_only");
+		if (foundAt == 0) return true;
+		if (foundAt == 2 && name[0] == '_' && name[1] == '_') return true;
+		return false;
 	}
 
@@ -1238,19 +1294,35 @@
 			TypeSubstitution sub;
 			auto paramIter = params->begin();
-			for ( size_t i = 0; paramIter != params->end(); ++paramIter, ++i ) {
-				if ( i < args.size() ) {
-					TypeExpr * expr = strict_dynamic_cast< TypeExpr * >( * std::next( args.begin(), i ) );
-					sub.add( (* paramIter)->get_name(), expr->get_type()->clone() );
-				} else if ( i == args.size() ) {
+			auto argIter = args.begin();
+			for ( ; paramIter != params->end(); ++paramIter, ++argIter ) {
+				if ( argIter != args.end() ) {
+					TypeExpr * expr = dynamic_cast< TypeExpr * >( * argIter );
+					if ( expr ) {
+						sub.add( (* paramIter)->get_name(), expr->get_type()->clone() );
+					}
+				} else {
 					Type * defaultType = (* paramIter)->get_init();
 					if ( defaultType ) {
 						args.push_back( new TypeExpr( defaultType->clone() ) );
 						sub.add( (* paramIter)->get_name(), defaultType->clone() );
+						argIter = std::prev(args.end());
+					} else {
+						SemanticError( inst, "Too few type arguments in generic type " );
 					}
 				}
+				assert( argIter != args.end() );
+				bool typeParamDeclared = (*paramIter)->kind != TypeDecl::Kind::Dimension;
+				bool typeArgGiven;
+				if ( isReservedTysysIdOnlyName( (*paramIter)->name ) ) {
+					// coerce a match when declaration is reserved name, which means "either"
+					typeArgGiven = typeParamDeclared;
+				} else {
+					typeArgGiven = dynamic_cast< TypeExpr * >( * argIter );
+				}
+				if ( ! typeParamDeclared &&   typeArgGiven ) SemanticError( inst, "Type argument given for value parameter: " );
+				if (   typeParamDeclared && ! typeArgGiven ) SemanticError( inst, "Expression argument given for type parameter: " );
 			}
 
 			sub.apply( inst );
-			if ( args.size() < params->size() ) SemanticError( inst, "Too few type arguments in generic type " );
 			if ( args.size() > params->size() ) SemanticError( inst, "Too many type arguments in generic type " );
 		}
@@ -1263,4 +1335,104 @@
 	void ValidateGenericParameters::previsit( UnionInstType * inst ) {
 		validateGeneric( inst );
+	}
+
+	void TranslateDimensionGenericParameters::translateDimensions( std::list< Declaration * > &translationUnit ) {
+		PassVisitor<TranslateDimensionGenericParameters> translator;
+		mutateAll( translationUnit, translator );
+	}
+
+	TranslateDimensionGenericParameters::TranslateDimensionGenericParameters() : WithIndexer( false ) {}
+
+	// Declaration of type variable:           forall( [N] )          ->  forall( N & | sized( N ) )
+	TypeDecl * TranslateDimensionGenericParameters::postmutate( TypeDecl * td ) {
+		if ( td->kind == TypeDecl::Dimension ) {
+			td->kind = TypeDecl::Dtype;
+			if ( ! isReservedTysysIdOnlyName( td->name ) ) {
+				td->sized = true;
+			}
+		}
+		return td;
+	}
+
+	// Situational awareness:
+	// array( float, [[currentExpr]]     )  has  visitingChildOfSUIT == true
+	// array( float, [[currentExpr]] - 1 )  has  visitingChildOfSUIT == false
+	// size_t x =    [[currentExpr]]        has  visitingChildOfSUIT == false
+	void TranslateDimensionGenericParameters::changeState_ChildOfSUIT( bool newVal ) {
+		GuardValue( nextVisitedNodeIsChildOfSUIT );
+		GuardValue( visitingChildOfSUIT );
+		visitingChildOfSUIT = nextVisitedNodeIsChildOfSUIT;
+		nextVisitedNodeIsChildOfSUIT = newVal;
+	}
+	void TranslateDimensionGenericParameters::premutate( StructInstType * sit ) {
+		(void) sit;
+		changeState_ChildOfSUIT(true);
+	}
+	void TranslateDimensionGenericParameters::premutate( UnionInstType * uit ) {
+		(void) uit;
+		changeState_ChildOfSUIT(true);
+	}
+	void TranslateDimensionGenericParameters::premutate( BaseSyntaxNode * node ) {
+		(void) node;
+		changeState_ChildOfSUIT(false);
+	}
+
+	// Passing values as dimension arguments:  array( float,     7 )  -> array( float, char[             7 ] )
+	// Consuming dimension parameters:         size_t x =    N - 1 ;  -> size_t x =          sizeof(N) - 1   ;
+	// Intertwined reality:                    array( float, N     )  -> array( float,              N        )
+	//                                         array( float, N - 1 )  -> array( float, char[ sizeof(N) - 1 ] )
+	// Intertwined case 1 is not just an optimization.
+	// Avoiding char[sizeof(-)] is necessary to enable the call of f to bind the value of N, in:
+	//   forall([N]) void f( array(float, N) & );
+	//   array(float, 7) a;
+	//   f(a);
+
+	Expression * TranslateDimensionGenericParameters::postmutate( DimensionExpr * de ) {
+		// Expression de is an occurrence of N in LHS of above examples.
+		// Look up the name that de references.
+		// If we are in a struct body, then this reference can be to an entry of the stuct's forall list.
+		// Whether or not we are in a struct body, this reference can be to an entry of a containing function's forall list.
+		// If we are in a struct body, then the stuct's forall declarations are innermost (functions don't occur in structs).
+		// Thus, a potential struct's declaration is highest priority.
+		// A struct's forall declarations are already renamed with _generic_ suffix.  Try that name variant first.
+
+		std::string useName = "__" + de->name + "_generic_";
+		TypeDecl * namedParamDecl = const_cast<TypeDecl *>( strict_dynamic_cast<const TypeDecl *, nullptr >( indexer.lookupType( useName ) ) );
+
+		if ( ! namedParamDecl ) {
+			useName = de->name;
+			namedParamDecl = const_cast<TypeDecl *>( strict_dynamic_cast<const TypeDecl *, nullptr >( indexer.lookupType( useName ) ) );
+		}
+
+		// Expect to find it always.  A misspelled name would have been parsed as an identifier.
+		assert( namedParamDecl && "Type-system-managed value name not found in symbol table" );
+
+		delete de;
+
+		TypeInstType * refToDecl = new TypeInstType( 0, useName, namedParamDecl );
+
+		if ( visitingChildOfSUIT ) {
+			// As in postmutate( Expression * ), topmost expression needs a TypeExpr wrapper
+			// But avoid ArrayType-Sizeof
+			return new TypeExpr( refToDecl );
+		} else {
+			// the N occurrence is being used directly as a runtime value,
+			// if we are in a type instantiation, then the N is within a bigger value computation
+			return new SizeofExpr( refToDecl );
+		}
+	}
+
+	Expression * TranslateDimensionGenericParameters::postmutate( Expression * e ) {
+		if ( visitingChildOfSUIT ) {
+			// e is an expression used as an argument to instantiate a type
+			if (! dynamic_cast< TypeExpr * >( e ) ) {
+				// e is a value expression
+				// but not a DimensionExpr, which has a distinct postmutate
+				Type * typeExprContent = new ArrayType( 0, new BasicType( 0, BasicType::Char ), e, true, false );
+				TypeExpr * result = new TypeExpr( typeExprContent );
+				return result;
+			}
+		}
+		return e;
 	}
 
Index: src/SynTree/Declaration.h
===================================================================
--- src/SynTree/Declaration.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/Declaration.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -201,5 +201,5 @@
 	typedef NamedTypeDecl Parent;
   public:
-	enum Kind { Dtype, DStype, Otype, Ftype, Ttype, ALtype, NUMBER_OF_KINDS };
+	enum Kind { Dtype, DStype, Otype, Ftype, Ttype, Dimension, NUMBER_OF_KINDS };
 
 	Kind kind;
Index: src/SynTree/Expression.h
===================================================================
--- src/SynTree/Expression.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/Expression.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -587,4 +587,23 @@
 };
 
+/// DimensionExpr represents a type-system provided value used in an expression ( forrall([N]) ... N + 1 )
+class DimensionExpr : public Expression {
+  public:
+	std::string name;
+
+	DimensionExpr( std::string name );
+	DimensionExpr( const DimensionExpr & other );
+	virtual ~DimensionExpr();
+
+	const std::string & get_name() const { return name; }
+	void set_name( std::string newValue ) { name = newValue; }
+
+	virtual DimensionExpr * clone() const override { return new DimensionExpr( * this ); }
+	virtual void accept( Visitor & v ) override { v.visit( this ); }
+	virtual void accept( Visitor & v ) const override { v.visit( this ); }
+	virtual Expression * acceptMutator( Mutator & m ) override { return m.mutate( this ); }
+	virtual void print( std::ostream & os, Indenter indent = {} ) const override;
+};
+
 /// AsmExpr represents a GCC 'asm constraint operand' used in an asm statement: [output] "=f" (result)
 class AsmExpr : public Expression {
Index: src/SynTree/Mutator.h
===================================================================
--- src/SynTree/Mutator.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/Mutator.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -80,4 +80,5 @@
 	virtual Expression * mutate( CommaExpr * commaExpr ) = 0;
 	virtual Expression * mutate( TypeExpr * typeExpr ) = 0;
+	virtual Expression * mutate( DimensionExpr * dimensionExpr ) = 0;
 	virtual Expression * mutate( AsmExpr * asmExpr ) = 0;
 	virtual Expression * mutate( ImplicitCopyCtorExpr * impCpCtorExpr ) = 0;
Index: src/SynTree/SynTree.h
===================================================================
--- src/SynTree/SynTree.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/SynTree.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -85,4 +85,5 @@
 class CommaExpr;
 class TypeExpr;
+class DimensionExpr;
 class AsmExpr;
 class ImplicitCopyCtorExpr;
Index: src/SynTree/TypeDecl.cc
===================================================================
--- src/SynTree/TypeDecl.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/TypeDecl.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -33,5 +33,5 @@
 
 const char * TypeDecl::typeString() const {
-	static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized array length type" };
+	static const char * kindNames[] = { "sized data type", "sized data type", "sized object type", "sized function type", "sized tuple type", "sized length value" };
 	static_assert( sizeof(kindNames) / sizeof(kindNames[0]) == TypeDecl::NUMBER_OF_KINDS, "typeString: kindNames is out of sync." );
 	assertf( kind < TypeDecl::NUMBER_OF_KINDS, "TypeDecl kind is out of bounds." );
Index: src/SynTree/TypeExpr.cc
===================================================================
--- src/SynTree/TypeExpr.cc	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/TypeExpr.cc	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -35,4 +35,18 @@
 }
 
+DimensionExpr::DimensionExpr( std::string name ) : Expression(), name(name) {
+	assertf(name != "0", "Zero is not a valid name");
+	assertf(name != "1", "One is not a valid name");
+}
+
+DimensionExpr::DimensionExpr( const DimensionExpr & other ) : Expression( other ), name( other.name ) {
+}
+
+DimensionExpr::~DimensionExpr() {}
+
+void DimensionExpr::print( std::ostream & os, Indenter indent ) const {
+	os << "Type-Sys Value: " << get_name();
+	Expression::print( os, indent );
+}
 // Local Variables: //
 // tab-width: 4 //
Index: src/SynTree/Visitor.h
===================================================================
--- src/SynTree/Visitor.h	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ src/SynTree/Visitor.h	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -135,4 +135,6 @@
 	virtual void visit( TypeExpr * node ) { visit( const_cast<const TypeExpr *>(node) ); }
 	virtual void visit( const TypeExpr * typeExpr ) = 0;
+	virtual void visit( DimensionExpr * node ) { visit( const_cast<const DimensionExpr *>(node) ); }
+	virtual void visit( const DimensionExpr * typeExpr ) = 0;
 	virtual void visit( AsmExpr * node ) { visit( const_cast<const AsmExpr *>(node) ); }
 	virtual void visit( const AsmExpr * asmExpr ) = 0;
Index: tests/.expect/forall.txt
===================================================================
--- tests/.expect/forall.txt	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/.expect/forall.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,1 +1,1 @@
-forall.cfa:216:25: warning: Compiled
+forall.cfa:242:25: warning: Compiled
Index: tests/.expect/gmp.arm64.txt
===================================================================
--- tests/.expect/gmp.arm64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/.expect/gmp.arm64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,279 @@
+constructors
+50000000000000000000 3 50000000000000000003
+x:50000000000000000000 y:3 z:50000000000000000003
+conversions
+y:97
+y:12345678901234567890123456789
+y:200
+y:-400
+y:24691357802469135780246913578
+y:3
+y:-3
+y:4
+y:3
+y:3 b:3 si:3
+comparison
+1
+0
+0
+1
+0
+1
+arithmetic
+z:100000000000000000006
+z:50000000000000000000
+z:-3
+z:-450000000000000000000
+z:150000000000000000000
+z:150000000000000000000
+z:16666666666666666666
+16666666666666666666, 2 16666666666666666666, 2
+x:16666666666666666666 y:2
+
+12345678901234567890123456789 12345678901234567890123456789 12345678901234567890123456789
+
+Fibonacci Numbers
+0 0
+1 1
+2 1
+3 2
+4 3
+5 5
+6 8
+7 13
+8 21
+9 34
+10 55
+11 89
+12 144
+13 233
+14 377
+15 610
+16 987
+17 1597
+18 2584
+19 4181
+20 6765
+21 10946
+22 17711
+23 28657
+24 46368
+25 75025
+26 121393
+27 196418
+28 317811
+29 514229
+30 832040
+31 1346269
+32 2178309
+33 3524578
+34 5702887
+35 9227465
+36 14930352
+37 24157817
+38 39088169
+39 63245986
+40 102334155
+41 165580141
+42 267914296
+43 433494437
+44 701408733
+45 1134903170
+46 1836311903
+47 2971215073
+48 4807526976
+49 7778742049
+50 12586269025
+51 20365011074
+52 32951280099
+53 53316291173
+54 86267571272
+55 139583862445
+56 225851433717
+57 365435296162
+58 591286729879
+59 956722026041
+60 1548008755920
+61 2504730781961
+62 4052739537881
+63 6557470319842
+64 10610209857723
+65 17167680177565
+66 27777890035288
+67 44945570212853
+68 72723460248141
+69 117669030460994
+70 190392490709135
+71 308061521170129
+72 498454011879264
+73 806515533049393
+74 1304969544928657
+75 2111485077978050
+76 3416454622906707
+77 5527939700884757
+78 8944394323791464
+79 14472334024676221
+80 23416728348467685
+81 37889062373143906
+82 61305790721611591
+83 99194853094755497
+84 160500643816367088
+85 259695496911122585
+86 420196140727489673
+87 679891637638612258
+88 1100087778366101931
+89 1779979416004714189
+90 2880067194370816120
+91 4660046610375530309
+92 7540113804746346429
+93 12200160415121876738
+94 19740274219868223167
+95 31940434634990099905
+96 51680708854858323072
+97 83621143489848422977
+98 135301852344706746049
+99 218922995834555169026
+100 354224848179261915075
+101 573147844013817084101
+102 927372692193078999176
+103 1500520536206896083277
+104 2427893228399975082453
+105 3928413764606871165730
+106 6356306993006846248183
+107 10284720757613717413913
+108 16641027750620563662096
+109 26925748508234281076009
+110 43566776258854844738105
+111 70492524767089125814114
+112 114059301025943970552219
+113 184551825793033096366333
+114 298611126818977066918552
+115 483162952612010163284885
+116 781774079430987230203437
+117 1264937032042997393488322
+118 2046711111473984623691759
+119 3311648143516982017180081
+120 5358359254990966640871840
+121 8670007398507948658051921
+122 14028366653498915298923761
+123 22698374052006863956975682
+124 36726740705505779255899443
+125 59425114757512643212875125
+126 96151855463018422468774568
+127 155576970220531065681649693
+128 251728825683549488150424261
+129 407305795904080553832073954
+130 659034621587630041982498215
+131 1066340417491710595814572169
+132 1725375039079340637797070384
+133 2791715456571051233611642553
+134 4517090495650391871408712937
+135 7308805952221443105020355490
+136 11825896447871834976429068427
+137 19134702400093278081449423917
+138 30960598847965113057878492344
+139 50095301248058391139327916261
+140 81055900096023504197206408605
+141 131151201344081895336534324866
+142 212207101440105399533740733471
+143 343358302784187294870275058337
+144 555565404224292694404015791808
+145 898923707008479989274290850145
+146 1454489111232772683678306641953
+147 2353412818241252672952597492098
+148 3807901929474025356630904134051
+149 6161314747715278029583501626149
+150 9969216677189303386214405760200
+151 16130531424904581415797907386349
+152 26099748102093884802012313146549
+153 42230279526998466217810220532898
+154 68330027629092351019822533679447
+155 110560307156090817237632754212345
+156 178890334785183168257455287891792
+157 289450641941273985495088042104137
+158 468340976726457153752543329995929
+159 757791618667731139247631372100066
+160 1226132595394188293000174702095995
+161 1983924214061919432247806074196061
+162 3210056809456107725247980776292056
+163 5193981023518027157495786850488117
+164 8404037832974134882743767626780173
+165 13598018856492162040239554477268290
+166 22002056689466296922983322104048463
+167 35600075545958458963222876581316753
+168 57602132235424755886206198685365216
+169 93202207781383214849429075266681969
+170 150804340016807970735635273952047185
+171 244006547798191185585064349218729154
+172 394810887814999156320699623170776339
+173 638817435613190341905763972389505493
+174 1033628323428189498226463595560281832
+175 1672445759041379840132227567949787325
+176 2706074082469569338358691163510069157
+177 4378519841510949178490918731459856482
+178 7084593923980518516849609894969925639
+179 11463113765491467695340528626429782121
+180 18547707689471986212190138521399707760
+181 30010821454963453907530667147829489881
+182 48558529144435440119720805669229197641
+183 78569350599398894027251472817058687522
+184 127127879743834334146972278486287885163
+185 205697230343233228174223751303346572685
+186 332825110087067562321196029789634457848
+187 538522340430300790495419781092981030533
+188 871347450517368352816615810882615488381
+189 1409869790947669143312035591975596518914
+190 2281217241465037496128651402858212007295
+191 3691087032412706639440686994833808526209
+192 5972304273877744135569338397692020533504
+193 9663391306290450775010025392525829059713
+194 15635695580168194910579363790217849593217
+195 25299086886458645685589389182743678652930
+196 40934782466626840596168752972961528246147
+197 66233869353085486281758142155705206899077
+198 107168651819712326877926895128666735145224
+199 173402521172797813159685037284371942044301
+200 280571172992510140037611932413038677189525
+
+Factorial Numbers
+0 1
+1 1
+2 2
+3 6
+4 24
+5 120
+6 720
+7 5040
+8 40320
+9 362880
+10 3628800
+11 39916800
+12 479001600
+13 6227020800
+14 87178291200
+15 1307674368000
+16 20922789888000
+17 355687428096000
+18 6402373705728000
+19 121645100408832000
+20 2432902008176640000
+21 51090942171709440000
+22 1124000727777607680000
+23 25852016738884976640000
+24 620448401733239439360000
+25 15511210043330985984000000
+26 403291461126605635584000000
+27 10888869450418352160768000000
+28 304888344611713860501504000000
+29 8841761993739701954543616000000
+30 265252859812191058636308480000000
+31 8222838654177922817725562880000000
+32 263130836933693530167218012160000000
+33 8683317618811886495518194401280000000
+34 295232799039604140847618609643520000000
+35 10333147966386144929666651337523200000000
+36 371993326789901217467999448150835200000000
+37 13763753091226345046315979581580902400000000
+38 523022617466601111760007224100074291200000000
+39 20397882081197443358640281739902897356800000000
+40 815915283247897734345611269596115894272000000000
Index: tests/.expect/typedefRedef-ERR1.txt
===================================================================
--- tests/.expect/typedefRedef-ERR1.txt	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/.expect/typedefRedef-ERR1.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,3 +1,3 @@
-typedefRedef.cfa:69:25: warning: Compiled
+typedefRedef.cfa:75:25: warning: Compiled
 typedefRedef.cfa:4:1 error: Cannot redefine typedef: Foo
-typedefRedef.cfa:59:1 error: Cannot redefine typedef: ARR
+typedefRedef.cfa:65:1 error: Cannot redefine typedef: ARR
Index: tests/.expect/typedefRedef.txt
===================================================================
--- tests/.expect/typedefRedef.txt	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/.expect/typedefRedef.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -1,1 +1,1 @@
-typedefRedef.cfa:69:25: warning: Compiled
+typedefRedef.cfa:75:25: warning: Compiled
Index: tests/array-container/.expect/array-md-sbscr-cases.arm64.txt
===================================================================
--- tests/array-container/.expect/array-md-sbscr-cases.arm64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/array-container/.expect/array-md-sbscr-cases.arm64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,1 @@
+done
Index: tests/array-container/.expect/array-md-sbscr-cases.x86.txt
===================================================================
--- tests/array-container/.expect/array-md-sbscr-cases.x86.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/array-container/.expect/array-md-sbscr-cases.x86.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,1 @@
+done
Index: tests/array-container/.expect/language-dim-mismatch.txt
===================================================================
--- tests/array-container/.expect/language-dim-mismatch.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/array-container/.expect/language-dim-mismatch.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,10 @@
+array-container/language-dim-mismatch.cfa:12:1 error: Type argument given for value parameter: instance of struct SN with body 1
+... with parameters
+  float
+
+array-container/language-dim-mismatch.cfa:13:1 error: Expression argument given for type parameter: instance of struct ST with body 1
+... with parameters
+  constant expression (42 42: signed int)
+  with resolved type:
+    signed int
+
Index: tests/array-container/array-basic.cfa
===================================================================
--- tests/array-container/array-basic.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/array-container/array-basic.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -61,17 +61,17 @@
 forall( [Nw], [Nx], [Ny], [Nz] )
 void fillHelloData( array( float, Nw, Nx, Ny, Nz ) & wxyz ) {
-    for (w; z(Nw))
-    for (x; z(Nx))
-    for (y; z(Ny))
-    for (z; z(Nz))
+    for (w; Nw)
+    for (x; Nx)
+    for (y; Ny)
+    for (z; Nz)
         wxyz[w][x][y][z] = getMagicNumber(w, x, y, z);
 }
 
-forall( [Zn]
+forall( [N]
       , S & | sized(S)
       )
-float total1d_low( arpk(Zn, S, float, float ) & a ) {
+float total1d_low( arpk(N, S, float, float ) & a ) {
     float total = 0.0f;
-    for (i; z(Zn))
+    for (i; N)
         total += a[i];
     return total;
@@ -98,5 +98,5 @@
 
     expect = 0;
-    for (i; z(Nw))
+    for (i; Nw)
         expect += getMagicNumber( i, slice_ix, slice_ix, slice_ix );
     printf("expect Ws             = %f\n", expect);
@@ -105,5 +105,5 @@
     printf("result Ws [][][][] lo = %f\n", result);
 
-    result = total1d_low( wxyz[[all, slice_ix, slice_ix, slice_ix]] );
+    result = total1d_low( wxyz[all, slice_ix, slice_ix, slice_ix] );
     printf("result Ws [,,,]    lo = %f\n", result);
 
@@ -111,5 +111,5 @@
     printf("result Ws [][][][] hi = %f\n", result);
 
-    result = total1d_hi( wxyz[[all, slice_ix, slice_ix, slice_ix]] );
+    result = total1d_hi( wxyz[all, slice_ix, slice_ix, slice_ix] );
     printf("result Ws [,,,]    hi = %f\n", result);
 
@@ -117,5 +117,5 @@
 
     expect = 0;
-    for (i; z(Nx))
+    for (i; Nx)
         expect += getMagicNumber( slice_ix, i, slice_ix, slice_ix );
     printf("expect Xs             = %f\n", expect);
@@ -124,5 +124,5 @@
     printf("result Xs [][][][] lo = %f\n", result);
 
-    result = total1d_low( wxyz[[slice_ix, all, slice_ix, slice_ix]] );
+    result = total1d_low( wxyz[slice_ix, all, slice_ix, slice_ix] );
     printf("result Xs [,,,]    lo = %f\n", result);
 
@@ -130,5 +130,5 @@
     printf("result Xs [][][][] hi = %f\n", result);
 
-    result = total1d_hi( wxyz[[slice_ix, all, slice_ix, slice_ix]] );
+    result = total1d_hi( wxyz[slice_ix, all, slice_ix, slice_ix] );
     printf("result Xs [,,,]    hi = %f\n", result);
 
Index: tests/array-container/array-md-sbscr-cases.cfa
===================================================================
--- tests/array-container/array-md-sbscr-cases.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/array-container/array-md-sbscr-cases.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -20,8 +20,8 @@
 forall( [Nw], [Nx], [Ny], [Nz] )
 void fillHelloData( array( float, Nw, Nx, Ny, Nz ) & wxyz ) {
-    for (w; z(Nw))
-    for (x; z(Nx))
-    for (y; z(Ny))
-    for (z; z(Nz))
+    for (w; Nw)
+    for (x; Nx)
+    for (y; Ny)
+    for (z; Nz)
         wxyz[w][x][y][z] = getMagicNumber(w, x, y, z);
 }
@@ -53,38 +53,38 @@
     // order wxyz, natural split (4-0 or 0-4, no intermediate to declare)
 
-    assert(( wxyz[[iw, ix, iy, iz]] == valExpected ));
+    assert(( wxyz[iw, ix, iy, iz] == valExpected ));
 
     // order wxyz, unnatural split 1-3  (three ways declared)
 
     typeof( wxyz[iw] ) xyz1 = wxyz[iw];
-    assert(( xyz1[[ix, iy, iz]]  == valExpected ));
+    assert(( xyz1[ix, iy, iz]  == valExpected ));
 
     typeof( wxyz[iw] ) xyz2;
     &xyz2 = &wxyz[iw];
-    assert(( xyz2[[ix, iy, iz]] == valExpected ));
-
-    assert(( wxyz[iw][[ix, iy, iz]] == valExpected ));
+    assert(( xyz2[ix, iy, iz] == valExpected ));
+
+    assert(( wxyz[iw][ix, iy, iz] == valExpected ));
 
     // order wxyz, unnatural split 2-2  (three ways declared)
 
-    typeof( wxyz[[iw, ix]] ) yz1 = wxyz[[iw,ix]];
-    assert(( yz1[[iy, iz]]  == valExpected ));
-
-    typeof( wxyz[[iw, ix]] ) yz2;
-    &yz2 = &wxyz[[iw, ix]];
-    assert(( yz2[[iy, iz]]  == valExpected ));
-
-    assert(( wxyz[[iw, ix]][[iy, iz]] == valExpected ));
+    typeof( wxyz[iw, ix] ) yz1 = wxyz[iw,ix];
+    assert(( yz1[iy, iz]  == valExpected ));
+
+    typeof( wxyz[iw, ix] ) yz2;
+    &yz2 = &wxyz[iw, ix];
+    assert(( yz2[iy, iz]  == valExpected ));
+
+    assert(( wxyz[iw, ix][iy, iz] == valExpected ));
 
     // order wxyz, unnatural split 3-1  (three ways declared)
 
-    typeof( wxyz[[iw, ix, iy]] ) z1 = wxyz[[iw, ix, iy]];
+    typeof( wxyz[iw, ix, iy] ) z1 = wxyz[iw, ix, iy];
     assert(( z1[iz]  == valExpected ));
 
-    typeof( wxyz[[iw, ix, iy]] ) z2;
-    &z2 = &wxyz[[iw, ix, iy]];
+    typeof( wxyz[iw, ix, iy] ) z2;
+    &z2 = &wxyz[iw, ix, iy];
     assert(( z2[iz] == valExpected ));
 
-    assert(( wxyz[[iw, ix, iy]][iz] == valExpected ));
+    assert(( wxyz[iw, ix, iy][iz] == valExpected ));
 }
 
@@ -104,25 +104,25 @@
     // order wxyz (no intermediates to declare)
 
-    assert(( wxyz[[iw  , ix  , iy  , iz  ]]       == valExpected ));
-    assert(( wxyz[[iw-1, ix  , iy  , iz  ]]       != valExpected ));
+    assert(( wxyz[iw  , ix  , iy  , iz  ]       == valExpected ));
+    assert(( wxyz[iw-1, ix  , iy  , iz  ]       != valExpected ));
 
     // order xyzw: *xyz, w
 
-    assert(( wxyz[[all , ix  , iy  , iz  ]][iw  ] == valExpected ));
-    assert(( wxyz[[all , ix-1, iy  , iz  ]][iw  ] != valExpected ));
-    assert(( wxyz[[all , ix  , iy  , iz  ]][iw-1] != valExpected ));
+    assert(( wxyz[all , ix  , iy  , iz  ][iw  ] == valExpected ));
+    assert(( wxyz[all , ix-1, iy  , iz  ][iw  ] != valExpected ));
+    assert(( wxyz[all , ix  , iy  , iz  ][iw-1] != valExpected ));
 
     // order wyzx: w*yz, x
 
-    assert(( wxyz[[iw  , all , iy  , iz  ]][ix  ] == valExpected ));
-    assert(( wxyz[[iw  , all , iy-1, iz  ]][ix  ] != valExpected ));
-    assert(( wxyz[[iw  , all , iy  , iz  ]][ix-1] != valExpected ));
+    assert(( wxyz[iw  , all , iy  , iz  ][ix  ] == valExpected ));
+    assert(( wxyz[iw  , all , iy-1, iz  ][ix  ] != valExpected ));
+    assert(( wxyz[iw  , all , iy  , iz  ][ix-1] != valExpected ));
 
     // order wxzy: wx*z, y
   #if 0
     // not working on 32-bit
-    assert(( wxyz[[iw  , ix  , all , iz  ]][iy  ] == valExpected ));
-    assert(( wxyz[[iw  , ix  , all , iz-1]][iy  ] != valExpected ));
-    assert(( wxyz[[iw  , ix  , all , iz  ]][iy-1] != valExpected ));
+    assert(( wxyz[iw  , ix  , all , iz  ][iy  ] == valExpected ));
+    assert(( wxyz[iw  , ix  , all , iz-1][iy  ] != valExpected ));
+    assert(( wxyz[iw  , ix  , all , iz  ][iy-1] != valExpected ));
   #endif
 }
@@ -131,5 +131,5 @@
 // The comments specify a covering set of orders, each in its most natural split.
 // Covering means that each edge on the lattice of dimesnions-provided is used.
-// Natural split means the arity of every -[[-,...]] tuple equals the dimensionality of its "this" operand, then that the fewest "all" subscripts are given.
+// Natural split means the arity of every -[-,...] tuple equals the dimensionality of its "this" operand, then that the fewest "all" subscripts are given.
 // The commented-out test code shows cases that don't work.  We wish all the comment-coverd cases worked.
 forall( [Nw], [Nx], [Ny], [Nz] )
@@ -147,5 +147,5 @@
     // order wxyz (no intermediates to declare)
 
-    assert(( wxyz[[iw, ix, iy, iz]] == valExpected ));
+    assert(( wxyz[iw, ix, iy, iz] == valExpected ));
 
     {
@@ -153,16 +153,16 @@
         assert( wxyz[iw][all][iy][all] [ix][iz] == valExpected );
 
-        typeof( wxyz[[iw, all, iy, all]] ) xz1 = wxyz[[iw, all, iy, all]];
-        assert(( xz1[[ix, iz]]  == valExpected ));
-
-        typeof( wxyz[[iw, all, iy, all]] ) xz2;
-        &xz2 = &wxyz[[iw, all, iy, all]];
-        assert(( xz2[[ix, iz]]  == valExpected ));
-
-        assert(( wxyz[[iw  , all, iy  , all]][[ix  , iz  ]] == valExpected ));
-        assert(( wxyz[[iw-1, all, iy  , all]][[ix  , iz  ]] != valExpected ));
-        assert(( wxyz[[iw  , all, iy-1, all]][[ix  , iz  ]] != valExpected ));
-        assert(( wxyz[[iw  , all, iy  , all]][[ix-1, iz  ]] != valExpected ));
-        assert(( wxyz[[iw  , all, iy  , all]][[ix  , iz-1]] != valExpected ));
+        typeof( wxyz[iw, all, iy, all] ) xz1 = wxyz[iw, all, iy, all];
+        assert(( xz1[ix, iz]  == valExpected ));
+
+        typeof( wxyz[iw, all, iy, all] ) xz2;
+        &xz2 = &wxyz[iw, all, iy, all];
+        assert(( xz2[ix, iz]  == valExpected ));
+
+        assert(( wxyz[iw  , all, iy  , all][ix  , iz  ] == valExpected ));
+        assert(( wxyz[iw-1, all, iy  , all][ix  , iz  ] != valExpected ));
+        assert(( wxyz[iw  , all, iy-1, all][ix  , iz  ] != valExpected ));
+        assert(( wxyz[iw  , all, iy  , all][ix-1, iz  ] != valExpected ));
+        assert(( wxyz[iw  , all, iy  , all][ix  , iz-1] != valExpected ));
     }
     {
@@ -170,16 +170,16 @@
         assert( wxyz[iw][all][all][iz] [ix][iy] == valExpected );
 
-        // typeof( wxyz[[iw, all, all, iz]] ) xy1 = wxyz[[iw, all, all, iz]];
-        // assert(( xy1[[ix, iy]]  == valExpected ));
-
-        // typeof(  wxyz[[iw, all, all, iz]] ) xy2;
-        // &xy2 = &wxyz[[iw, all, all, iz]];
-        // assert(( xy2[[ix, iy]]  == valExpected ));
-
-        // assert(( wxyz[[iw  , all, all, iz  ]][[ix  , iy  ]] == valExpected ));
-        // assert(( wxyz[[iw-1, all, all, iz  ]][[ix  , iy  ]] != valExpected ));
-        // assert(( wxyz[[iw  , all, all, iz-1]][[ix  , iy  ]] != valExpected ));
-        // assert(( wxyz[[iw  , all, all, iz  ]][[ix-1, iy  ]] != valExpected ));
-        // assert(( wxyz[[iw  , all, all, iz  ]][[ix  , iy-1]] != valExpected ));
+        // typeof( wxyz[iw, all, all, iz] ) xy1 = wxyz[iw, all, all, iz];
+        // assert(( xy1[ix, iy]  == valExpected ));
+
+        // typeof(  wxyz[iw, all, all, iz] ) xy2;
+        // &xy2 = &wxyz[iw, all, all, iz];
+        // assert(( xy2[ix, iy]  == valExpected ));
+
+        // assert(( wxyz[iw  , all, all, iz  ][ix  , iy  ] == valExpected ));
+        // assert(( wxyz[iw-1, all, all, iz  ][ix  , iy  ] != valExpected ));
+        // assert(( wxyz[iw  , all, all, iz-1][ix  , iy  ] != valExpected ));
+        // assert(( wxyz[iw  , all, all, iz  ][ix-1, iy  ] != valExpected ));
+        // assert(( wxyz[iw  , all, all, iz  ][ix  , iy-1] != valExpected ));
     }
     {
@@ -187,8 +187,8 @@
         assert( wxyz[all][ix][iy][all] [iw][iz] == valExpected );
 
-        typeof( wxyz[[all, ix, iy, all]] ) wz1 = wxyz[[all, ix, iy, all]];
-        assert(( wz1[[iw, iz]]  == valExpected ));
-
-        assert(( wxyz[[all  , ix, iy  , all]][[iw  , iz  ]] == valExpected ));
+        typeof( wxyz[all, ix, iy, all] ) wz1 = wxyz[all, ix, iy, all];
+        assert(( wz1[iw, iz]  == valExpected ));
+
+        assert(( wxyz[all  , ix, iy  , all][iw  , iz  ] == valExpected ));
     }
     {
@@ -196,5 +196,5 @@
         assert( wxyz[all][ix][all][iz] [iw][iy] == valExpected );
 
-        // assert(( wxyz[[all , ix  , all , iz  ]][[iw  , iy  ]] == valExpected ));
+        // assert(( wxyz[all , ix  , all , iz  ][iw  , iy  ] == valExpected ));
     }
     {
@@ -202,5 +202,5 @@
         assert( wxyz[all][all][iy][iz] [iw][ix] == valExpected );
 
-        // assert(( wxyz[[all , all , iy  , iz  ]][[iw  , ix  ]] == valExpected ));
+        // assert(( wxyz[all , all , iy  , iz  ][iw  , ix  ] == valExpected ));
     }
     {
@@ -208,9 +208,9 @@
         assert( wxyz[all][ix][all][all] [iw][all][iz] [iy] == valExpected );
 
-        typeof( wxyz[all][ix][all][all] ) wyz_workaround = wxyz[[all , ix , all  , all  ]];
-        typeof( wyz_workaround[iw][all][iz] ) y_workaround = wyz_workaround[[iw , all , iz  ]];
+        typeof( wxyz[all][ix][all][all] ) wyz_workaround = wxyz[all , ix , all  , all  ];
+        typeof( wyz_workaround[iw][all][iz] ) y_workaround = wyz_workaround[iw , all , iz  ];
         assert( y_workaround[iy] == valExpected );
 
-        // assert(( wxyz[[all , ix , all  , all  ]][[iw  , all , iz  ]][iy  ] == valExpected ));
+        // assert(( wxyz[all , ix , all  , all  ][iw  , all , iz  ][iy  ] == valExpected ));
     }
     {
@@ -239,33 +239,33 @@
     valExpected = getMagicNumber(2, 3, 4, 5);
     assert(( wxyz [2] [3] [4] [5]  == valExpected ));
-    assert(( wxyz[[2,  3]][4] [5]  == valExpected ));
-    assert(( wxyz [2][[3,  4]][5]  == valExpected ));
-    assert(( wxyz [2] [3][[4,  5]] == valExpected ));
-    assert(( wxyz[[2,  3,  4]][5]  == valExpected ));
-    assert(( wxyz [2][[3,  4,  5]] == valExpected ));
-    assert(( wxyz[[2,  3,  4,  5]] == valExpected ));
-
-    for ( i; z(Nw) ) {
-        assert(( wxyz[[ i, 3, 4, 5 ]] == getMagicNumber(i, 3, 4, 5) ));
-    }
-
-    for ( i; z(Nx) ) {
-        assert(( wxyz[[ 2, i, 4, 5 ]] == getMagicNumber(2, i, 4, 5) ));
-    }
-
-    for ( i; z(Ny) ) {
-        assert(( wxyz[[ 2, 3, i, 5 ]] == getMagicNumber(2, 3, i, 5) ));
-    }
-
-    for ( i; z(Nz) ) {
-        assert(( wxyz[[ 2, 3, 4, i ]] == getMagicNumber(2, 3, 4, i) ));
-    }
-
-    for ( i; z(Nw) ) {
-        assert(( wxyz[[ i, all, 4, 5 ]][3] == getMagicNumber(i, 3, 4, 5) ));
-    }
-
-    for ( i; z(Nw) ) {
-        assert(( wxyz[[ all, 3, 4, 5 ]][i] == getMagicNumber(i, 3, 4, 5) ));
+    assert(( wxyz[2,  3][4] [5]  == valExpected ));
+    assert(( wxyz [2][3,  4][5]  == valExpected ));
+    assert(( wxyz [2] [3][4,  5] == valExpected ));
+    assert(( wxyz[2,  3,  4][5]  == valExpected ));
+    assert(( wxyz [2][3,  4,  5] == valExpected ));
+    assert(( wxyz[2,  3,  4,  5] == valExpected ));
+
+    for ( i; Nw ) {
+        assert(( wxyz[ i, 3, 4, 5 ] == getMagicNumber(i, 3, 4, 5) ));
+    }
+
+    for ( i; Nx ) {
+        assert(( wxyz[ 2, i, 4, 5 ] == getMagicNumber(2, i, 4, 5) ));
+    }
+
+    for ( i; Ny ) {
+        assert(( wxyz[ 2, 3, i, 5 ] == getMagicNumber(2, 3, i, 5) ));
+    }
+
+    for ( i; Nz ) {
+        assert(( wxyz[ 2, 3, 4, i ] == getMagicNumber(2, 3, 4, i) ));
+    }
+
+    for ( i; Nw ) {
+        assert(( wxyz[ i, all, 4, 5 ][3] == getMagicNumber(i, 3, 4, 5) ));
+    }
+
+    for ( i; Nw ) {
+        assert(( wxyz[ all, 3, 4, 5 ][i] == getMagicNumber(i, 3, 4, 5) ));
     }
 }
Index: tests/array-container/language-dim-mismatch.cfa
===================================================================
--- tests/array-container/language-dim-mismatch.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/array-container/language-dim-mismatch.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,15 @@
+forall( [N] )
+struct SN {};
+
+forall( T )
+struct ST {};
+
+int main() {
+
+    SN(42) good1;
+    ST(float) good2;
+
+    SN(float) bad1;  // first  expected error: Type argument given for value parameter
+    ST(42) bad2;     // second expected error: Expression argument given for type parameter
+
+}
Index: tests/collections/.expect/vector-demo.txt
===================================================================
--- tests/collections/.expect/vector-demo.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/.expect/vector-demo.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,21 @@
+raiiTests=================
+Having pushed, length is 1
+by transit, got pos0 = 1.000000
+by exit, got pos0 = 1.000000
+Having pushed, length is 2
+helperE sees 2.000000
+producerE gave 1.000000
+producerE again gave 1.000000
+helperT sees 2.000000
+producerT gave 1.000000
+by permit, got pos0 = 1.000000
+Having pushed, length is 3
+into permit from call, got ofH = 1.000000
+stayValidTests============
+before 0.100000
+after, logical: 0.100000
+after, physical: -0.100000
+loopTests=================
+loop sees 0.000000
+loop sees 0.100000
+loop sees 0.200000
Index: tests/collections/.expect/vector-err-pass-perm-it-byval.txt
===================================================================
--- tests/collections/.expect/vector-err-pass-perm-it-byval.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/.expect/vector-err-pass-perm-it-byval.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,270 @@
+error: Unique best alternative includes deleted identifier in Generated Cast of:
+  Application of
+    Deleted Expression
+      Variable Expression: ?{}: static inline forall
+        instance of type T (not function type)
+        with assertions
+        Variable Expression: ?=?: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning
+          instance of type T (not function type)
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning
+            instance of type T (not function type)
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ^?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        function
+      ... with parameters
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+      ... returning nothing
+
+      ... with resolved type:
+        forall
+          instance of type T (not function type)
+          with assertions
+          Variable Expression: ?=?: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning
+            instance of type T (not function type)
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+              instance of type T (not function type)
+            ... returning
+              instance of type T (not function type)
+
+          Variable Expression: ?{}: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+            ... returning nothing
+
+          Variable Expression: ?{}: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning nothing
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+              instance of type T (not function type)
+            ... returning nothing
+
+          Variable Expression: ^?{}: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+            ... returning nothing
+
+          function
+        ... with parameters
+          reference to instance of struct vector_permit with body
+          ... with parameters
+            instance of type T (not function type)
+
+          reference to instance of struct vector_permit with body
+          ... with parameters
+            instance of type T (not function type)
+
+        ... returning nothing
+
+      ... deleted by: ?{}: static inline forall
+        instance of type T (not function type)
+        with assertions
+        Variable Expression: ?=?: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning
+          instance of type T (not function type)
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning
+            instance of type T (not function type)
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ^?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        function
+      ... with parameters
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+      ... returning nothing
+
+    ... to arguments
+    Generated Cast of:
+      Variable Expression: __tmp: instance of struct vector_permit with body
+      ... with parameters
+        float
+
+      ... with resolved type:
+        instance of struct vector_permit with body
+        ... with parameters
+          float
+
+    ... to:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+    ... with resolved type:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+    Generated Cast of:
+      Variable Expression: it: instance of struct vector_permit with body
+      ... with parameters
+        float
+
+      ... with resolved type:
+        instance of struct vector_permit with body
+        ... with parameters
+          float
+
+    ... to:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+    ... with resolved type:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+  with inferred parameters 0:
+    ?=?: function
+    ... with parameters
+      reference to float
+      float
+    ... returning
+      float
+
+    ?{}: function
+    ... with parameters
+      reference to float
+    ... returning nothing
+
+    ?{}: function
+    ... with parameters
+      reference to float
+      float
+    ... returning nothing
+
+    ^?{}: function
+    ... with parameters
+      reference to float
+    ... returning nothing
+
+
+  ... with resolved type:
+    void
+... to: nothing
+... with resolved type:
+  void
Index: tests/collections/.expect/vector-err-retn-perm-it-byval.txt
===================================================================
--- tests/collections/.expect/vector-err-retn-perm-it-byval.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/.expect/vector-err-retn-perm-it-byval.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,314 @@
+collections/vector-demo.cfa:105:1 error: Unique best alternative includes deleted identifier in Generated Cast of:
+  Application of
+    Deleted Expression
+      Variable Expression: ?{}: static inline forall
+        instance of type T (not function type)
+        with assertions
+        Variable Expression: ?=?: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning
+          instance of type T (not function type)
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning
+            instance of type T (not function type)
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ^?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        function
+      ... with parameters
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+      ... returning nothing
+
+      ... with resolved type:
+        forall
+          instance of type T (not function type)
+          with assertions
+          Variable Expression: ?=?: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning
+            instance of type T (not function type)
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+              instance of type T (not function type)
+            ... returning
+              instance of type T (not function type)
+
+          Variable Expression: ?{}: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+            ... returning nothing
+
+          Variable Expression: ?{}: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning nothing
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+              instance of type T (not function type)
+            ... returning nothing
+
+          Variable Expression: ^?{}: pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+          ... with resolved type:
+            pointer to function
+            ... with parameters
+              reference to instance of type T (not function type)
+            ... returning nothing
+
+          function
+        ... with parameters
+          reference to instance of struct vector_permit with body
+          ... with parameters
+            instance of type T (not function type)
+
+          reference to instance of struct vector_permit with body
+          ... with parameters
+            instance of type T (not function type)
+
+        ... returning nothing
+
+      ... deleted by: ?{}: static inline forall
+        instance of type T (not function type)
+        with assertions
+        Variable Expression: ?=?: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning
+          instance of type T (not function type)
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning
+            instance of type T (not function type)
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+          instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+            instance of type T (not function type)
+          ... returning nothing
+
+        Variable Expression: ^?{}: pointer to function
+        ... with parameters
+          reference to instance of type T (not function type)
+        ... returning nothing
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of type T (not function type)
+          ... returning nothing
+
+        function
+      ... with parameters
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+        reference to instance of struct vector_permit with body
+        ... with parameters
+          instance of type T (not function type)
+
+      ... returning nothing
+
+    ... to arguments
+    Generated Cast of:
+      Variable Expression: ofG: instance of struct vector_permit with body
+      ... with parameters
+        float
+
+      ... with resolved type:
+        instance of struct vector_permit with body
+        ... with parameters
+          float
+
+    ... to:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+    ... with resolved type:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+    Generated Cast of:
+      Application of
+        Variable Expression: g: function
+        ... with parameters
+          reference to instance of struct vector with body
+          ... with parameters
+            float
+
+        ... returning
+          instance of struct vector_permit with body
+          ... with parameters
+            float
+
+
+        ... with resolved type:
+          pointer to function
+          ... with parameters
+            reference to instance of struct vector with body
+            ... with parameters
+              float
+
+          ... returning
+            instance of struct vector_permit with body
+            ... with parameters
+              float
+
+
+        ... to arguments
+        Generated Cast of:
+          Variable Expression: v: instance of struct vector with body
+          ... with parameters
+            float
+
+          ... with resolved type:
+            instance of struct vector with body
+            ... with parameters
+              float
+
+        ... to:
+          reference to instance of struct vector with body
+          ... with parameters
+            float
+
+        ... with resolved type:
+          reference to instance of struct vector with body
+          ... with parameters
+            float
+
+
+      ... with resolved type:
+        instance of struct vector_permit with body
+        ... with parameters
+          float
+
+    ... to:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+    ... with resolved type:
+      reference to instance of struct vector_permit with body
+      ... with parameters
+        float
+
+  with inferred parameters 0:
+    ?=?: function
+    ... with parameters
+      reference to float
+      float
+    ... returning
+      float
+
+    ?{}: function
+    ... with parameters
+      reference to float
+    ... returning nothing
+
+    ?{}: function
+    ... with parameters
+      reference to float
+      float
+    ... returning nothing
+
+    ^?{}: function
+    ... with parameters
+      reference to float
+    ... returning nothing
+
+
+  ... with resolved type:
+    void
+... to: nothing
+... with resolved type:
+  void
Index: tests/collections/vector-demo.cfa
===================================================================
--- tests/collections/vector-demo.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/vector-demo.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,169 @@
+#include <containers/vector2.hfa>
+
+void raiiTests() {
+    printf("raiiTests=================\n");
+
+    vector( float ) v = { 25 };
+
+    push_last( v, 1 );
+    printf( "Having pushed, length is %ld\n", v`length );
+
+    float y = v[0]`val;
+    printf( "by transit, got pos0 = %f\n", y );
+
+    {
+        vector_exit(float) it = v[0];
+
+        float z = it`val;
+        printf( "by exit, got pos0 = %f\n", z );
+
+        // forbid modification while locked
+      #ifdef TRY_MOD_WHILE_LOCKED_1
+        push_last( v, 1 );   // runtime assertion failure
+        printf( "Having pushed, length is %ld\n", v`length );
+      #endif
+    }
+
+    push_last( v, 2 );
+    printf( "Having pushed, length is %ld\n", v`length );
+
+    // deletion scope for some exits
+    // point is the function decls and calls, though
+    {
+        void helperE(vector_exit(float) it) {
+            float q = it`val;
+            printf( "helperE sees %f\n", q );
+        }
+
+        vector_exit(float) it2 = v[1];
+        helperE(it2);
+
+        // can't call with implied exit (wish I could, low priority)
+      #ifdef TRY_IMPLIED_EXIT_1
+        helperE( v[1] ); // Invalid application of existing declaration(s)
+      #endif
+
+        //------
+
+        vector_exit(float) producerE( vector( float ) & theVec ) {
+          return theVec[0];
+        }
+
+        vector_exit(float) it3 = producerE( v );
+        float z = it3`val;
+        printf( "producerE gave %f\n", z );
+
+        float zzzz = producerE( v )`val;
+        printf( "producerE again gave %f\n", zzzz );
+
+        //------
+
+        void helperT(vector_transit(float) it) {
+            float q = it`val;
+            printf( "helperT sees %f\n", q );
+        }
+
+        helperT( v[1] );
+
+        //------
+
+        vector_transit(float) producerT( vector( float ) & theVec ) {
+            return theVec[0];
+        }
+
+        float zz = producerT( v )`val;
+        printf( "producerT gave %f\n", zz );
+    }
+
+    //=======
+
+    {
+        vector_permit(float) it = v[0];
+
+        float z = it`val;
+        printf( "by permit, got pos0 = %f\n", z );
+
+        // allow modification while permits exist
+        push_last( v, 1 );
+        printf( "Having pushed, length is %ld\n", v`length );
+
+        // forbid passing permit by value
+      #ifdef TRY_PASS_PERMIT_BYVAL_1
+        void f( vector_permit(float) xx ) {
+            printf("can't get here\n");
+        }
+        f( it ); // Unique best alternative includes deleted identifier
+      #endif
+
+        // can declare function that returns permit (wish to forbid)
+        vector_permit(float) g( vector( float ) & theVec ) {
+            return theVec[0]; // want to forbid
+        }
+
+        // forbid calling a function that returns permit by value
+      #ifdef TRY_RETURN_PERMIT_BYVAL_1
+        vector_permit(float) ofG = g( v ); // Unique best alternative includes deleted identifier
+      #endif
+        
+        // allow declaration of permit, populating from exit
+        vector_exit(float) h( vector( float ) & theVec ) {
+            return theVec[0];
+        }
+
+        vector_permit(float) ofH = h( v );
+        float zh = ofH`val;
+        printf( "into permit from call, got ofH = %f\n", zh );
+    }
+}
+
+void stayValidTests() {
+    printf("stayValidTests============\n");
+    vector( float ) v = { 4 };
+    push_last(v, 0.0f);
+    push_last(v, 0.1f);
+    push_last(v, 0.2f);
+    // v is [0.0, 0.1, 0.2]
+
+    vector_permit(float) iter = v[1];  // iter at 0.1
+
+    float val1 = iter`val;
+    printf("before %f\n", val1);
+
+    insert_before( v, 1, 98.6f );  // v is [0.0, 98.6, 0.1, 0.2]; iter at 0.1
+
+    float val2 = iter`val;
+    printf("after, logical: %f\n", val2);
+
+    // we had filled it to the brim
+    assert( v`capacity == 4 && v`length == 4 );
+
+    push_last(v, -100); // v is [0.0, 98.6, 0.1, 0.2, 0.3]; iter at 0.1, but RTP it's not looking at the old memory's 0.1
+
+    // that made it bigger
+    assert( v`capacity >  5 && v`length == 5 );
+
+    v[2] = -0.1;  // v is [0.0, 98.6, -0.1, 0.2, 0.3]; iter at -0.1, where only the new memory had that change
+
+    float val3 = iter`val;
+    printf("after, physical: %f\n", val3);
+}
+
+void loopTests() {
+    printf("loopTests=================\n");
+    vector(float) v = { 4 };
+    push_last(v, 0.0f);
+    push_last(v, 0.1f);
+    push_last(v, 0.2f);
+    float toPrint;
+
+    while( vector_exit(float) it = v`origin; it`moveNext ) {
+        toPrint *= it;
+        printf("loop sees %f\n", toPrint);
+    }
+}
+
+int main() {
+    raiiTests();
+    stayValidTests();
+    loopTests();
+}
Index: tests/collections/vector-err-mod-with-excl-it.cfa
===================================================================
--- tests/collections/vector-err-mod-with-excl-it.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/vector-err-mod-with-excl-it.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,8 @@
+/*
+Temporarily: no associated .expect file, not runnable with test.py.
+For human acceptance: Should compile, but crash with an assertion failure when run.
+Detail of this error reporting is work in progress.
+*/
+
+#define TRY_MOD_WHILE_LOCKED_1
+#include "vector-demo.cfa"
Index: tests/collections/vector-err-pass-perm-it-byval.cfa
===================================================================
--- tests/collections/vector-err-pass-perm-it-byval.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/vector-err-pass-perm-it-byval.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,2 @@
+#define TRY_PASS_PERMIT_BYVAL_1
+#include "vector-demo.cfa"
Index: tests/collections/vector-err-retn-perm-it-byval.cfa
===================================================================
--- tests/collections/vector-err-retn-perm-it-byval.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/collections/vector-err-retn-perm-it-byval.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,2 @@
+#define TRY_RETURN_PERMIT_BYVAL_1
+#include "vector-demo.cfa"
Index: tests/concurrent/signal/disjoint.cfa
===================================================================
--- tests/concurrent/signal/disjoint.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/concurrent/signal/disjoint.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -77,5 +77,5 @@
 	wait( cond );
 	if( d.state != SIGNAL ) {
-		sout | "ERROR barging!";
+		abort | "ERROR barging!";
 	}
 
@@ -113,5 +113,5 @@
 	bool running = TEST(globals.data.counter < N) && globals.data.counter > 0;
 	if( globals.data.state != SIGNAL && running ) {
-		sout | "ERROR Eager signal" | globals.data.state;
+		abort | "ERROR Eager signal" | globals.data.state;
 	}
 }
Index: tests/coroutine/fibonacci.cfa
===================================================================
--- tests/coroutine/fibonacci.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/coroutine/fibonacci.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -31,13 +31,8 @@
 }
 
-int next( Fibonacci & fib ) with( fib ) {
-	resume( fib );										// restart last suspend
-	return fn;
-}
-
 int main() {
 	Fibonacci f1, f2;
 	for ( 10 ) {										// print N Fibonacci values
-		sout | next( f1 ) | next( f2 );
+		sout | resume( f1 ).fn | resume( f2 ).fn;
 	} // for
 }
Index: tests/device/.expect/cpu.txt
===================================================================
--- tests/device/.expect/cpu.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/device/.expect/cpu.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,1 @@
+Match!
Index: tests/device/cpu.cfa
===================================================================
--- tests/device/cpu.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/device/cpu.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,198 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// cpu.cfa -- checks that libcfa correctly counts the number of cpus.
+//
+// Author           : Thierry Delisle
+// Created On       : Mon Jun 14 13:59:01 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+
+#include <fstream.hfa>
+#include <device/cpu.hfa>
+#include <stdlib.hfa>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+extern "C" {
+	#include <dirent.h>
+	#include <sys/types.h>
+	#include <sys/stat.h>
+	#include <sys/sysinfo.h>
+	#include <fcntl.h>
+}
+
+// go through a directory calling fn on each file
+static int iterate_dir( const char * path, void (*fn)(struct dirent * ent) ) {
+	// open the directory
+	DIR *dir = opendir(path);
+	if(dir == 0p) { return ENOTDIR; }
+
+	// call fn for each
+	struct dirent * ent;
+	while ((ent = readdir(dir)) != 0p) {
+		fn( ent );
+	}
+
+	// no longer need this
+	closedir(dir);
+	return 0;
+}
+
+// count the number of directories with the specified prefix
+// the directories counted have the form '[prefix]N' where prefix is the parameter
+// and N is an base 10 integer.
+static int count_prefix_dirs(const char * path, const char * prefix) {
+	// read the directory and find the cpu count
+	// and make sure everything is as expected
+	int max = -1;
+	int count = 0;
+	void lambda(struct dirent * ent) {
+		// were are looking for prefixX, where X is a number
+		// check that it starts with 'cpu
+		char * s = strstr(ent->d_name, prefix);
+		if(s == 0p) { return; }
+		if(s != ent->d_name) { return; }
+
+		// check that the next part is a number
+		s += strlen(prefix);
+		char * end;
+		long int val = strtol(s, &end, 10);
+		if(*end != '\0' || val < 0) { return; }
+
+		// check that it's a directory
+		if(ent->d_type != DT_DIR) { return; }
+
+		// it's a match!
+		max = max(val, max);
+		count++;
+	}
+	iterate_dir(path, lambda);
+
+	/* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
+
+	return count;
+}
+
+// Count number of cache *indexes* in the system
+// cache indexes are distinct from cache level as Data or Instruction cache
+// can share a level but not an index
+// PITFALL: assumes all cpus have the same indexes as cpu0
+static int count_cache_indexes(void) {
+	return count_prefix_dirs("/sys/devices/system/cpu/cpu0/cache", "index");
+}
+
+// read information about a spcficic cache index/cpu file into the output buffer
+static size_t read_cpuidxinfo_into(unsigned cpu, unsigned idx, const char * file, char * out, size_t out_len) {
+	// Pick the file we want and read it
+	char buf[128];
+	/* paranoid */ __attribute__((unused)) int len =
+	snprintf(buf, 128, "/sys/devices/system/cpu/cpu%u/cache/index%u/%s", cpu, idx, file);
+	/* paranoid */ verifyf(len > 0, "Could not generate '%s' filename for cpu %u, index %u", file, cpu, idx);
+
+	int fd = open(buf, 0, O_RDONLY);
+	/* paranoid */ verifyf(fd > 0, "Could not open file '%s'", buf);
+
+	ssize_t r = read(fd, out, out_len);
+	/* paranoid */ verifyf(r > 0, "Could not read file '%s'", buf);
+
+	/* paranoid */ __attribute__((unused)) int ret =
+	close(fd);
+	/* paranoid */ verifyf(ret == 0, "Could not close file '%s'", buf);
+
+	out[r-1] = '\0';
+	return r-1;
+}
+
+unsigned find_idx() {
+	int idxs = count_cache_indexes();
+
+	unsigned found_level = 0;
+	unsigned found = -1u;
+	for(i; idxs) {
+		unsigned idx = idxs - 1 - i;
+		char buf[32];
+
+		// Level is the cache level: higher means bigger and slower
+		read_cpuidxinfo_into(0, idx, "level", buf, 32);
+		char * end;
+		unsigned long level = strtoul(buf, &end, 10);
+		/* paranoid */ verifyf(level <= 250, "Cpu %u has more than 250 levels of cache, that doesn't sound right", 0);
+		/* paranoid */ verify(*end == '\0');
+
+		if(found_level < level) {
+			found_level = level;
+			found = idx;
+		}
+	}
+
+	/* paranoid */ verify(found != -1u);
+	return found;
+}
+
+int main() {
+	//-----------------------------------------------------------------------
+	int ret1 = get_nprocs();
+	int ret2 = cpu_info.hthrd_count;
+	if(ret1 != ret2) {
+		sout | "No match! libcfa says" | ret2 | "hardware threads but linux counts" | ret1 | "processors";
+	}
+	else {
+		sout | "Match!";
+	}
+
+	//-----------------------------------------------------------------------
+	// Make sure no one has the same self
+	for(ime; cpu_info.hthrd_count) {
+		unsigned me = cpu_info.llc_map[ime].self;
+		{
+			unsigned s = cpu_info.llc_map[ime].start;
+			unsigned e = s + cpu_info.llc_map[ime].count;
+			if(me < s || me >= e) {
+				sout | "CPU" | ime | "outside of it's own map: " | s | "<=" | me | "<" | e;
+			}
+		}
+
+
+		for(ithem; cpu_info.hthrd_count) {
+			if(ime == ithem) continue;
+
+			unsigned them = cpu_info.llc_map[ithem].self;
+			if(me == them) {
+				sout | "CPU" | ime | "has conflicting self id with" | ithem | "(" | me | ")";
+			}
+		}
+	}
+
+
+	//-----------------------------------------------------------------------
+	unsigned idx = find_idx();
+	// For all procs check mapping is consistent
+	for(cpu_me; cpu_info.hthrd_count) {
+		char buf_me[32];
+		size_t len_me = read_cpuidxinfo_into(cpu_me, idx, "shared_cpu_list", buf_me, 32);
+		for(cpu_them; cpu_info.hthrd_count) {
+			if(cpu_me == cpu_them) continue;
+			char buf_them[32];
+			size_t len_them = read_cpuidxinfo_into(cpu_them, idx, "shared_cpu_list", buf_them, 32);
+
+			bool match_file = len_them == len_me && 0 == strncmp(buf_them, buf_me, len_me);
+			bool match_info = cpu_info.llc_map[cpu_me].start == cpu_info.llc_map[cpu_them].start && cpu_info.llc_map[cpu_me].count == cpu_info.llc_map[cpu_them].count;
+
+			if(match_file != match_info) {
+				sout | "CPU" | cpu_me | "and" | cpu_them | "have inconsitent file and cpu_info";
+				sout | cpu_me | ": <" | cpu_info.llc_map[cpu_me  ].start | "," | cpu_info.llc_map[cpu_me  ].count | "> '" | buf_me   | "'";
+				sout | cpu_me | ": <" | cpu_info.llc_map[cpu_them].start | "," | cpu_info.llc_map[cpu_them].count | "> '" | buf_them | "'";
+			}
+		}
+	}
+}
Index: tests/forall.cfa
===================================================================
--- tests/forall.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/forall.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -199,4 +199,30 @@
 }
 
+forall( T ) void check_otype() {
+	T & tr = *0p;
+	T * tp = 0p;
+
+	&tr += 1;
+	tp += 1;
+	T & tx = tp[1];
+
+	T t;
+	T t2 = t;
+}
+
+forall( T * ) void check_dstype() {
+	T & tr = *0p;
+	T * tp = 0p;
+
+	&tr += 1;
+	tp += 1;
+	T & tx = tp[1];
+}
+
+forall( T & ) void check_dtype() {
+	T & tr = *0p;
+	T * tp = 0p;
+}
+
 //otype T1 | { void xxx( T1 ); };
 
Index: tests/generator/fibonacci.cfa
===================================================================
--- tests/generator/fibonacci.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/generator/fibonacci.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -8,9 +8,11 @@
 //
 // Author           : Thierry Delisle
-// Created On       : Mon Mar  1 16:54:23 2020
-// Last Modified By :
-// Last Modified On :
-// Update Count     :
+// Created On       : Mon Mar 1 16:54:23 2020
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Jun 10 21:54:14 2021
+// Update Count     : 3
 //
+
+#include <fstream.hfa>
 
 generator Fib {
@@ -18,5 +20,5 @@
 };
 
-void main(Fib & b) with (b) {
+void main(Fib & fib) with (fib) {
 	[fn1, fn] = [1, 0];
 	for () {
@@ -29,9 +31,8 @@
 	Fib f1, f2;
 	for ( 10 ) {
-		resume( f1 );
-		resume( f2 );
-		printf("%d %d\n", f1.fn, f2.fn);
+		resume( f1 ); resume( f2 );
+		sout | f1.fn | f2.fn;
+		// sout | resume( f1 ).fn | resume( f2 ).fn; // compiler bug
 	}
-
 }
 
Index: tests/generator/fmtLines.cfa
===================================================================
--- tests/generator/fmtLines.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/generator/fmtLines.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -9,7 +9,7 @@
 // Author           : Thierry Delisle
 // Created On       : Thu Mar  5 16:09:08 2020
-// Last Modified By :
-// Last Modified On :
-// Update Count     :
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Jun 10 21:56:22 2021
+// Update Count     : 2
 //
 
Index: tests/generator/suspend_then.cfa
===================================================================
--- tests/generator/suspend_then.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/generator/suspend_then.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -9,7 +9,7 @@
 // Author           : Peter A. Buhr
 // Created On       : Mon Apr 29 12:01:35 2019
-// Last Modified By :
-// Last Modified On :
-// Update Count     :
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Thu Jun 10 21:55:51 2021
+// Update Count     : 1
 //
 
Index: tests/literals.cfa
===================================================================
--- tests/literals.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/literals.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  9 16:34:38 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Aug 29 10:57:56 2020
-// Update Count     : 226
+// Last Modified On : Sat Jun 19 15:47:49 2021
+// Update Count     : 237
 //
 
@@ -63,53 +63,105 @@
 	-0X0123456789ABCDEF;  -0X0123456789ABCDEFu;  -0X0123456789ABCDEFl;  -0X0123456789ABCDEFll;  -0X0123456789ABCDEFul;  -0X0123456789ABCDEFlu;  -0X0123456789ABCDEFull;  -0X0123456789ABCDEFllu;
 
+// floating literals
+
+	 0123456789.;   0123456789.f;   0123456789.d;   0123456789.l;   0123456789.F;   0123456789.D;   0123456789.L;
+	+0123456789.;  +0123456789.f;  +0123456789.d;  +0123456789.l;  +0123456789.F;  +0123456789.D;  +0123456789.L;
+	-0123456789.;  -0123456789.f;  -0123456789.d;  -0123456789.l;  -0123456789.F;  -0123456789.D;  -0123456789.L;
+
+	 0123456789.e09;   0123456789.e09f;   0123456789.e09d;   0123456789.e09l;   0123456789.e09F;   0123456789.e09D;   0123456789.e09L;
+	+0123456789.e09;  +0123456789.e09f;  +0123456789.e09d;  +0123456789.e09l;  +0123456789.e09F;  +0123456789.e09D;  +0123456789.e09L;
+	-0123456789.e09;  -0123456789.e09f;  -0123456789.e09d;  -0123456789.e09l;  -0123456789.e09F;  -0123456789.e09D;  -0123456789.e09L;
+					                     
+	 0123456789.e+09;   0123456789.e+09f;   0123456789.e+09d;   0123456789.e+09l;   0123456789.e+09F;   0123456789.e+09D;   0123456789.e+09L;
+	+0123456789.e+09;  +0123456789.e+09f;  +0123456789.e+09d;  +0123456789.e+09l;  +0123456789.e+09F;  +0123456789.e+09D;  +0123456789.e+09L;
+	-0123456789.e+09;  -0123456789.e+09f;  -0123456789.e+09d;  -0123456789.e+09l;  -0123456789.e+09F;  -0123456789.e+09D;  -0123456789.e+09L;
+					                     
+	 0123456789.e-09;   0123456789.e-09f;   0123456789.e-09d;   0123456789.e-09l;   0123456789.e-09F;   0123456789.e-09D;   0123456789.e-09L;
+	+0123456789.e-09;  +0123456789.e-09f;  +0123456789.e-09d;  +0123456789.e-09l;  +0123456789.e-09F;  +0123456789.e-09D;  +0123456789.e-09L;
+	-0123456789.e-09;  -0123456789.e-09f;  -0123456789.e-09d;  -0123456789.e-09l;  -0123456789.e-09F;  -0123456789.e-09D;  -0123456789.e-09L;
+
+	 .0123456789;   .0123456789f;   .0123456789d;   .0123456789l;   .0123456789F;   .0123456789D;   .0123456789L;
+	+.0123456789;  +.0123456789f;  +.0123456789d;  +.0123456789l;  +.0123456789F;  +.0123456789D;  +.0123456789L;
+	-.0123456789;  -.0123456789f;  -.0123456789d;  -.0123456789l;  -.0123456789F;  -.0123456789D;  -.0123456789L;
+
+	 .0123456789e09;   .0123456789e09f;   .0123456789e09d;   .0123456789e09l;   .0123456789e09F;   .0123456789e09D;   .0123456789e09L;
+	+.0123456789e09;  +.0123456789e09f;  +.0123456789e09d;  +.0123456789e09l;  +.0123456789e09F;  +.0123456789e09D;  +.0123456789e09L;
+	-.0123456789e09;  -.0123456789e09f;  -.0123456789e09d;  -.0123456789e09l;  -.0123456789e09F;  -.0123456789e09D;  -.0123456789e09L;
+					                     
+	 .0123456789E+09;   .0123456789E+09f;   .0123456789E+09d;   .0123456789E+09l;   .0123456789E+09F;   .0123456789E+09D;   .0123456789E+09L;
+	+.0123456789E+09;  +.0123456789E+09f;  +.0123456789E+09d;  +.0123456789E+09l;  +.0123456789E+09F;  +.0123456789E+09D;  +.0123456789E+09L;
+	-.0123456789E+09;  -.0123456789E+09f;  -.0123456789E+09d;  -.0123456789E+09l;  -.0123456789E+09F;  -.0123456789E+09D;  -.0123456789E+09L;
+					                     
+	 .0123456789E-09;   .0123456789E-09f;   .0123456789E-09d;   .0123456789E-09l;   .0123456789E-09F;   .0123456789E-09D;   .0123456789E-09L;
+	-.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09d;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09D;  -.0123456789E-09L;
+	-.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09d;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09D;  -.0123456789E-09L;
+
+	 0123456789.0123456789;   0123456789.0123456789f;   0123456789.0123456789d;   0123456789.0123456789l;   0123456789.0123456789F;   0123456789.0123456789D;   0123456789.0123456789L;
+	+0123456789.0123456789;  +0123456789.0123456789f;  +0123456789.0123456789d;  +0123456789.0123456789l;  +0123456789.0123456789F;  +0123456789.0123456789D;  +0123456789.0123456789L;
+	-0123456789.0123456789;  -0123456789.0123456789f;  -0123456789.0123456789d;  -0123456789.0123456789l;  -0123456789.0123456789F;  -0123456789.0123456789D;  -0123456789.0123456789L;
+
+	 0123456789.0123456789E09;   0123456789.0123456789E09f;   0123456789.0123456789E09d;   0123456789.0123456789E09l;   0123456789.0123456789E09F;   0123456789.0123456789E09D;   0123456789.0123456789E09L;
+	+0123456789.0123456789E09;  +0123456789.0123456789E09f;  +0123456789.0123456789E09d;  +0123456789.0123456789E09l;  +0123456789.0123456789E09F;  +0123456789.0123456789E09D;  +0123456789.0123456789E09L;
+	-0123456789.0123456789E09;  -0123456789.0123456789E09f;  -0123456789.0123456789E09d;  -0123456789.0123456789E09l;  -0123456789.0123456789E09F;  -0123456789.0123456789E09D;  -0123456789.0123456789E09L;
+							                                 
+	 0123456789.0123456789E+09;   0123456789.0123456789E+09f;   0123456789.0123456789E+09d;   0123456789.0123456789E+09l;   0123456789.0123456789E+09F;   0123456789.0123456789E+09D;   0123456789.0123456789E+09L;
+	+0123456789.0123456789E+09;  +0123456789.0123456789E+09f;  +0123456789.0123456789E+09d;  +0123456789.0123456789E+09l;  +0123456789.0123456789E+09F;  +0123456789.0123456789E+09D;  +0123456789.0123456789E+09L;
+	-0123456789.0123456789E+09;  -0123456789.0123456789E+09f;  -0123456789.0123456789E+09d;  -0123456789.0123456789E+09l;  -0123456789.0123456789E+09F;  -0123456789.0123456789E+09D;  -0123456789.0123456789E+09L;
+							                                 
+	 0123456789.0123456789E-09;   0123456789.0123456789E-09f;   0123456789.0123456789E-09d;   0123456789.0123456789E-09l;   0123456789.0123456789E-09F;   0123456789.0123456789E-09D;   0123456789.0123456789E-09L;
+	+0123456789.0123456789E-09;  +0123456789.0123456789E-09f;  +0123456789.0123456789E-09d;  +0123456789.0123456789E-09l;  +0123456789.0123456789E-09F;  +0123456789.0123456789E-09D;  +0123456789.0123456789E-09L;
+	-0123456789.0123456789E-09;  -0123456789.0123456789E-09f;  -0123456789.0123456789E-09d;  -0123456789.0123456789E-09l;  -0123456789.0123456789E-09F;  -0123456789.0123456789E-09D;  -0123456789.0123456789E-09L;
+
 // decimal floating literals
 
-	 0123456789.;   0123456789.f;   0123456789.l;   0123456789.F;   0123456789.L;   0123456789.DL;
-	+0123456789.;  +0123456789.f;  +0123456789.l;  +0123456789.F;  +0123456789.L;  +0123456789.DL;
-	-0123456789.;  -0123456789.f;  -0123456789.l;  -0123456789.F;  -0123456789.L;  -0123456789.DL;
-
-	 0123456789.e09;   0123456789.e09f;   0123456789.e09l;   0123456789.e09F;   0123456789.e09L;   0123456789.e09DL;
-	+0123456789.e09;  +0123456789.e09f;  +0123456789.e09l;  +0123456789.e09F;  +0123456789.e09L;  +0123456789.e09DL;
-	-0123456789.e09;  -0123456789.e09f;  -0123456789.e09l;  -0123456789.e09F;  -0123456789.e09L;  -0123456789.e09DL;
-
-	 0123456789.e+09;   0123456789.e+09f;   0123456789.e+09l;   0123456789.e+09F;   0123456789.e+09L;   0123456789.e+09DL;
-	+0123456789.e+09;  +0123456789.e+09f;  +0123456789.e+09l;  +0123456789.e+09F;  +0123456789.e+09L;  +0123456789.e+09DL;
-	-0123456789.e+09;  -0123456789.e+09f;  -0123456789.e+09l;  -0123456789.e+09F;  -0123456789.e+09L;  -0123456789.e+09DL;
-
-	 0123456789.e-09;   0123456789.e-09f;   0123456789.e-09l;   0123456789.e-09F;   0123456789.e-09L;   0123456789.e-09DL;
-	+0123456789.e-09;  +0123456789.e-09f;  +0123456789.e-09l;  +0123456789.e-09F;  +0123456789.e-09L;  +0123456789.e-09DL;
-	-0123456789.e-09;  -0123456789.e-09f;  -0123456789.e-09l;  -0123456789.e-09F;  -0123456789.e-09L;  -0123456789.e-09DL;
-
-	 .0123456789;   .0123456789f;   .0123456789l;   .0123456789F;   .0123456789L;   .0123456789DL;
-	+.0123456789;  +.0123456789f;  +.0123456789l;  +.0123456789F;  +.0123456789L;  +.0123456789DL;
-	-.0123456789;  -.0123456789f;  -.0123456789l;  -.0123456789F;  -.0123456789L;  -.0123456789DL;
-
-	 .0123456789e09;   .0123456789e09f;   .0123456789e09l;   .0123456789e09F;   .0123456789e09L;   .0123456789e09DL;
-	+.0123456789e09;  +.0123456789e09f;  +.0123456789e09l;  +.0123456789e09F;  +.0123456789e09L;  +.0123456789e09DL;
-	-.0123456789e09;  -.0123456789e09f;  -.0123456789e09l;  -.0123456789e09F;  -.0123456789e09L;  -.0123456789e09DL;
-
-	 .0123456789E+09;   .0123456789E+09f;   .0123456789E+09l;   .0123456789E+09F;   .0123456789E+09L;   .0123456789E+09DL;
-	+.0123456789E+09;  +.0123456789E+09f;  +.0123456789E+09l;  +.0123456789E+09F;  +.0123456789E+09L;  +.0123456789E+09DL;
-	-.0123456789E+09;  -.0123456789E+09f;  -.0123456789E+09l;  -.0123456789E+09F;  -.0123456789E+09L;  -.0123456789E+09DL;
-
-	 .0123456789E-09;   .0123456789E-09f;   .0123456789E-09l;   .0123456789E-09F;   .0123456789E-09L;   .0123456789E-09DL;
-	-.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09L;  -.0123456789E-09DL;
-	-.0123456789E-09;  -.0123456789E-09f;  -.0123456789E-09l;  -.0123456789E-09F;  -.0123456789E-09L;  -.0123456789E-09DL;
-
-	 0123456789.0123456789;   0123456789.0123456789f;   0123456789.0123456789l;   0123456789.0123456789F;   0123456789.0123456789L;   0123456789.0123456789DL;
-	+0123456789.0123456789;  +0123456789.0123456789f;  +0123456789.0123456789l;  +0123456789.0123456789F;  +0123456789.0123456789L;  +0123456789.0123456789DL;
-	-0123456789.0123456789;  -0123456789.0123456789f;  -0123456789.0123456789l;  -0123456789.0123456789F;  -0123456789.0123456789L;  -0123456789.0123456789DL;
-
-	 0123456789.0123456789E09;   0123456789.0123456789E09f;   0123456789.0123456789E09l;   0123456789.0123456789E09F;   0123456789.0123456789E09L;   0123456789.0123456789E09DL;
-	+0123456789.0123456789E09;  +0123456789.0123456789E09f;  +0123456789.0123456789E09l;  +0123456789.0123456789E09F;  +0123456789.0123456789E09L;  +0123456789.0123456789E09DL;
-	-0123456789.0123456789E09;  -0123456789.0123456789E09f;  -0123456789.0123456789E09l;  -0123456789.0123456789E09F;  -0123456789.0123456789E09L;  -0123456789.0123456789E09DL;
-
-	 0123456789.0123456789E+09;   0123456789.0123456789E+09f;   0123456789.0123456789E+09l;   0123456789.0123456789E+09F;   0123456789.0123456789E+09L;   0123456789.0123456789E+09DL;
-	+0123456789.0123456789E+09;  +0123456789.0123456789E+09f;  +0123456789.0123456789E+09l;  +0123456789.0123456789E+09F;  +0123456789.0123456789E+09L;  +0123456789.0123456789E+09DL;
-	-0123456789.0123456789E+09;  -0123456789.0123456789E+09f;  -0123456789.0123456789E+09l;  -0123456789.0123456789E+09F;  -0123456789.0123456789E+09L;  -0123456789.0123456789E+09DL;
-
-	 0123456789.0123456789E-09;   0123456789.0123456789E-09f;   0123456789.0123456789E-09l;   0123456789.0123456789E-09F;   0123456789.0123456789E-09L;   0123456789.0123456789E-09DL;
-	+0123456789.0123456789E-09;  +0123456789.0123456789E-09f;  +0123456789.0123456789E-09l;  +0123456789.0123456789E-09F;  +0123456789.0123456789E-09L;  +0123456789.0123456789E-09DL;
-	-0123456789.0123456789E-09;  -0123456789.0123456789E-09f;  -0123456789.0123456789E-09l;  -0123456789.0123456789E-09F;  -0123456789.0123456789E-09L;  -0123456789.0123456789E-09DL;
+#if ! defined( __aarch64__ )							// unsupported on ARM after gcc-9
+	 0123456789.df;   0123456789.dd;   0123456789.dl;   0123456789.DF;   0123456789.DD;   0123456789.DL;
+	+0123456789.df;  +0123456789.dd;  +0123456789.dl;  +0123456789.DF;  +0123456789.DD;  +0123456789.DL;
+	-0123456789.df;  -0123456789.dd;  -0123456789.dl;  -0123456789.DF;  -0123456789.DD;  -0123456789.DL;
+
+	 0123456789.e09df;   0123456789.e09dd;   0123456789.e09dl;   0123456789.e09DF;   0123456789.e09DD;   0123456789.e09DL;
+	+0123456789.e09df;  +0123456789.e09dd;  +0123456789.e09dl;  +0123456789.e09DF;  +0123456789.e09DD;  +0123456789.e09DL;
+	-0123456789.e09df;  -0123456789.e09dd;  -0123456789.e09dl;  -0123456789.e09DF;  -0123456789.e09DD;  -0123456789.e09DL;
+						                     
+	 0123456789.e+09df;   0123456789.e+09dd;  0123456789.e+09dl;   0123456789.e+09DF;   0123456789.e+09DD;   0123456789.e+09DL;
+	+0123456789.e+09df;  +0123456789.e+09dd; +0123456789.e+09dl;  +0123456789.e+09DF;  +0123456789.e+09DD;  +0123456789.e+09DL;
+	-0123456789.e+09df;  -0123456789.e+09dd; -0123456789.e+09dl;  -0123456789.e+09DF;  -0123456789.e+09DD;  -0123456789.e+09DL;
+						                     
+	 0123456789.e-09df;   0123456789.e-09dd;  0123456789.e-09dl;   0123456789.e-09DF;   0123456789.e-09DD;   0123456789.e-09DL;
+	+0123456789.e-09df;  +0123456789.e-09dd; +0123456789.e-09dl;  +0123456789.e-09DF;  +0123456789.e-09DD;  +0123456789.e-09DL;
+	-0123456789.e-09df;  -0123456789.e-09dd; -0123456789.e-09dl;  -0123456789.e-09DF;  -0123456789.e-09DD;  -0123456789.e-09DL;
+
+	 .0123456789df;   .0123456789dd;   .0123456789dl;   .0123456789DF;   .0123456789DD;   .0123456789DL;
+	+.0123456789df;  +.0123456789dd;  +.0123456789dl;  +.0123456789DF;  +.0123456789DD;  +.0123456789DL;
+	-.0123456789df;  -.0123456789dd;  -.0123456789dl;  -.0123456789DF;  -.0123456789DD;  -.0123456789DL;
+
+	 .0123456789e09df;   .0123456789e09dd;   .0123456789e09dl;   .0123456789e09DF;   .0123456789e09DD;   .0123456789e09DL;
+	+.0123456789e09df;  +.0123456789e09dd;  +.0123456789e09dl;  +.0123456789e09DF;  +.0123456789e09DD;  +.0123456789e09DL;
+	-.0123456789e09df;  -.0123456789e09dd;  -.0123456789e09dl;  -.0123456789e09DF;  -.0123456789e09DD;  -.0123456789e09DL;
+					                        
+	 .0123456789E+09df;   .0123456789E+09dd;   .0123456789E+09dl;   .0123456789E+09DF;   .0123456789E+09DD;   .0123456789E+09DL;
+	+.0123456789E+09df;  +.0123456789E+09dd;  +.0123456789E+09dl;  +.0123456789E+09DF;  +.0123456789E+09DD;  +.0123456789E+09DL;
+	-.0123456789E+09df;  -.0123456789E+09dd;  -.0123456789E+09dl;  -.0123456789E+09DF;  -.0123456789E+09DD;  -.0123456789E+09DL;
+					                        
+	 .0123456789E-09df;   .0123456789E-09dd;   .0123456789E-09dl;   .0123456789E-09DF;   .0123456789E-09DD;   .0123456789E-09DL;
+	-.0123456789E-09df;  -.0123456789E-09dd;  -.0123456789E-09dl;  -.0123456789E-09DF;  -.0123456789E-09DD;  -.0123456789E-09DL;
+	-.0123456789E-09df;  -.0123456789E-09dd;  -.0123456789E-09dl;  -.0123456789E-09DF;  -.0123456789E-09DD;  -.0123456789E-09DL;
+
+	 0123456789.0123456789df;   0123456789.0123456789dd;   0123456789.0123456789dl;   0123456789.0123456789DF;   0123456789.0123456789DD;   0123456789.0123456789DL;
+	+0123456789.0123456789df;  +0123456789.0123456789dd;  +0123456789.0123456789dl;  +0123456789.0123456789DF;  +0123456789.0123456789DD;  +0123456789.0123456789DL;
+	-0123456789.0123456789df;  -0123456789.0123456789dd;  -0123456789.0123456789dl;  -0123456789.0123456789DF;  -0123456789.0123456789DD;  -0123456789.0123456789DL;
+
+	 0123456789.0123456789E09df;   0123456789.0123456789E09dd;   0123456789.0123456789E09dl;   0123456789.0123456789E09DF;   0123456789.0123456789E09DD;   0123456789.0123456789E09DL;
+	+0123456789.0123456789E09df;  +0123456789.0123456789E09dd;  +0123456789.0123456789E09dl;  +0123456789.0123456789E09DF;  +0123456789.0123456789E09DD;  +0123456789.0123456789E09DL;
+	-0123456789.0123456789E09df;  -0123456789.0123456789E09dd;  -0123456789.0123456789E09dl;  -0123456789.0123456789E09DF;  -0123456789.0123456789E09DD;  -0123456789.0123456789E09DL;
+								                                
+	 0123456789.0123456789E+09df;   0123456789.0123456789E+09dd;   0123456789.0123456789E+09dl;   0123456789.0123456789E+09DF;   0123456789.0123456789E+09DD;   0123456789.0123456789E+09DL;
+	+0123456789.0123456789E+09df;  +0123456789.0123456789E+09dd;  +0123456789.0123456789E+09dl;  +0123456789.0123456789E+09DF;  +0123456789.0123456789E+09DD;  +0123456789.0123456789E+09DL;
+	-0123456789.0123456789E+09df;  -0123456789.0123456789E+09dd;  -0123456789.0123456789E+09dl;  -0123456789.0123456789E+09DF;  -0123456789.0123456789E+09DD;  -0123456789.0123456789E+09DL;
+								                                
+	 0123456789.0123456789E-09df;   0123456789.0123456789E-09dd;   0123456789.0123456789E-09dl;   0123456789.0123456789E-09DF;   0123456789.0123456789E-09DD;   0123456789.0123456789E-09DL;
+	+0123456789.0123456789E-09df;  +0123456789.0123456789E-09dd;  +0123456789.0123456789E-09dl;  +0123456789.0123456789E-09DF;  +0123456789.0123456789E-09DD;  +0123456789.0123456789E-09DL;
+	-0123456789.0123456789E-09df;  -0123456789.0123456789E-09dd;  -0123456789.0123456789E-09dl;  -0123456789.0123456789E-09DF;  -0123456789.0123456789E-09DD;  -0123456789.0123456789E-09DL;
+#endif // ! __aarch64__
 
 // hexadecimal floating literals, must have exponent
Index: tests/math.cfa
===================================================================
--- tests/math.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/math.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -10,6 +10,6 @@
 // Created On       : Fri Apr 22 14:59:21 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Apr 13 21:04:48 2021
-// Update Count     : 123
+// Last Modified On : Fri Jun 18 17:02:44 2021
+// Update Count     : 124
 //
 
@@ -40,9 +40,9 @@
 
 	sout | "exp:" | exp( 1.0F ) | exp( 1.0D ) | exp( 1.0L ) | nonl;
-	sout | exp( 1.0F+1.0FI ) | exp( 1.0D+1.0DI ) | exp( 1.0DL+1.0LI );
+	sout | exp( 1.0F+1.0FI ) | exp( 1.0D+1.0DI ) | exp( 1.0L+1.0LI );
 	sout | "exp2:" | exp2( 1.0F ) | exp2( 1.0D ) | exp2( 1.0L );
 	sout | "expm1:" | expm1( 1.0F ) | expm1( 1.0D ) | expm1( 1.0L );
 	sout | "pow:" | pow( 1.0F, 1.0F ) | pow( 1.0D, 1.0D ) | pow( 1.0L, 1.0L ) | nonl;
-	sout | pow( 1.0F+1.0FI, 1.0F+1.0FI ) | pow( 1.0D+1.0DI, 1.0D+1.0DI ) | pow( 1.5DL+1.5LI, 1.5DL+1.5LI );
+	sout | pow( 1.0F+1.0FI, 1.0F+1.0FI ) | pow( 1.0D+1.0DI, 1.0D+1.0DI ) | pow( 1.5L+1.5LI, 1.5L+1.5LI );
 
 	int b = 4;
@@ -68,5 +68,5 @@
 
 	sout | "log:" | log( 1.0F ) | log( 1.0D ) | log( 1.0L ) | nonl;
-	sout | log( 1.0F+1.0FI ) | log( 1.0D+1.0DI ) | log( 1.0DL+1.0LI );
+	sout | log( 1.0F+1.0FI ) | log( 1.0D+1.0DI ) | log( 1.0L+1.0LI );
 	sout | "log2:" | log2( 1024 ) | log2( 2 \ 17u ) | log2( 2 \ 23u );
 	sout | "log2:" | log2( 1024l ) | log2( 2l \ 17u ) | log2( 2l \ 23u );
@@ -82,5 +82,5 @@
 
 	sout | "sqrt:" | sqrt( 1.0F ) | sqrt( 1.0D ) | sqrt( 1.0L ) | nonl;
-	sout | sqrt( 1.0F+1.0FI ) | sqrt( 1.0D+1.0DI ) | sqrt( 1.0DL+1.0LI );
+	sout | sqrt( 1.0F+1.0FI ) | sqrt( 1.0D+1.0DI ) | sqrt( 1.0L+1.0LI );
 	sout | "cbrt:" | cbrt( 27.0F ) | cbrt( 27.0D ) | cbrt( 27.0L );
 	sout | "hypot:" | hypot( 1.0F, -1.0F ) | hypot( 1.0D, -1.0D ) | hypot( 1.0L, -1.0L );
@@ -89,15 +89,15 @@
 
 	sout | "sin:" | sin( 1.0F ) | sin( 1.0D ) | sin( 1.0L ) | nonl;
-	sout | sin( 1.0F+1.0FI ) | sin( 1.0D+1.0DI ) | sin( 1.0DL+1.0LI );
+	sout | sin( 1.0F+1.0FI ) | sin( 1.0D+1.0DI ) | sin( 1.0L+1.0LI );
 	sout | "cos:" | cos( 1.0F ) | cos( 1.0D ) | cos( 1.0L ) | nonl;
-	sout | cos( 1.0F+1.0FI ) | cos( 1.0D+1.0DI ) | cos( 1.0DL+1.0LI );
+	sout | cos( 1.0F+1.0FI ) | cos( 1.0D+1.0DI ) | cos( 1.0L+1.0LI );
 	sout | "tan:" | tan( 1.0F ) | tan( 1.0D ) | tan( 1.0L ) | nonl;
-	sout | tan( 1.0F+1.0FI ) | tan( 1.0D+1.0DI ) | tan( 1.0DL+1.0LI );
+	sout | tan( 1.0F+1.0FI ) | tan( 1.0D+1.0DI ) | tan( 1.0L+1.0LI );
 	sout | "asin:" | asin( 1.0F ) | asin( 1.0D ) | asin( 1.0L ) | nonl;
-	sout | asin( 1.0F+1.0FI ) | asin( 1.0D+1.0DI ) | asin( 1.0DL+1.0LI );
+	sout | asin( 1.0F+1.0FI ) | asin( 1.0D+1.0DI ) | asin( 1.0L+1.0LI );
 	sout | "acos:" | acos( 1.0F ) | acos( 1.0D ) | acos( 1.0L ) | nonl;
-	sout | acos( 1.0F+1.0FI ) | acos( 1.0D+1.0DI ) | acos( 1.0DL+1.0LI );
+	sout | acos( 1.0F+1.0FI ) | acos( 1.0D+1.0DI ) | acos( 1.0L+1.0LI );
 	sout | "atan:" | atan( 1.0F ) | atan( 1.0D ) | atan( 1.0L ) | nonl;
-	sout | atan( 1.0F+1.0FI ) | atan( 1.0D+1.0DI ) | atan( 1.0DL+1.0LI );
+	sout | atan( 1.0F+1.0FI ) | atan( 1.0D+1.0DI ) | atan( 1.0L+1.0LI );
 	sout | "atan2:" | atan2( 1.0F, 1.0F ) | atan2( 1.0D, 1.0D ) | atan2( 1.0L, 1.0L ) | nonl;
 	sout | "atan:" | atan( 1.0F, 1.0F ) | atan( 1.0D, 1.0D ) | atan( 1.0L, 1.0L );
@@ -106,15 +106,15 @@
 
 	sout | "sinh:" | sinh( 1.0F ) | sinh( 1.0D ) | sinh( 1.0L ) | nonl;
-	sout | sinh( 1.0F+1.0FI ) | sinh( 1.0D+1.0DI ) | sinh( 1.0DL+1.0LI );
+	sout | sinh( 1.0F+1.0FI ) | sinh( 1.0D+1.0DI ) | sinh( 1.0L+1.0LI );
 	sout | "cosh:" | cosh( 1.0F ) | cosh( 1.0D ) | cosh( 1.0L ) | nonl;
-	sout | cosh( 1.0F+1.0FI ) | cosh( 1.0D+1.0DI ) | cosh( 1.0DL+1.0LI );
+	sout | cosh( 1.0F+1.0FI ) | cosh( 1.0D+1.0DI ) | cosh( 1.0L+1.0LI );
 	sout | "tanh:" | tanh( 1.0F ) | tanh( 1.0D ) | tanh( 1.0L ) | nonl;
-	sout | tanh( 1.0F+1.0FI ) | tanh( 1.0D+1.0DI ) | tanh( 1.0DL+1.0LI );
+	sout | tanh( 1.0F+1.0FI ) | tanh( 1.0D+1.0DI ) | tanh( 1.0L+1.0LI );
 	sout | "acosh:" | acosh( 1.0F ) | acosh( 1.0D ) | acosh( 1.0L ) | nonl;
-	sout | acosh( 1.0F+1.0FI ) | acosh( 1.0D+1.0DI ) | acosh( 1.0DL+1.0LI );
+	sout | acosh( 1.0F+1.0FI ) | acosh( 1.0D+1.0DI ) | acosh( 1.0L+1.0LI );
 	sout | "asinh:" | asinh( 1.0F ) | asinh( 1.0D ) | asinh( 1.0L ) | nonl;
-	sout | asinh( 1.0F+1.0FI ) | asinh( 1.0D+1.0DI ) | asinh( 1.0DL+1.0LI );
+	sout | asinh( 1.0F+1.0FI ) | asinh( 1.0D+1.0DI ) | asinh( 1.0L+1.0LI );
 	sout | "atanh:" | atanh( 1.0F ) | atanh( 1.0D ) | atanh( 1.0L ) | nonl;
-	sout | atanh( 1.0F+1.0FI ) | atanh( 1.0D+1.0DI ) | atanh( 1.0DL+1.0LI );
+	sout | atanh( 1.0F+1.0FI ) | atanh( 1.0D+1.0DI ) | atanh( 1.0L+1.0LI );
 
 	//---------------------- Error / Gamma ----------------------
Index: tests/pybin/tools.py
===================================================================
--- tests/pybin/tools.py	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/pybin/tools.py	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -376,5 +376,8 @@
 		return 1, "ERR No core dump"
 
-	return sh('gdb', '-n', path, core, '-batch', '-x', cmd, output_file=subprocess.PIPE)
+	try:
+		return sh('gdb', '-n', path, core, '-batch', '-x', cmd, output_file=subprocess.PIPE)
+	except:
+		return 1, "ERR Could not read core with gdb"
 
 def core_archive(dst, name, exe):
Index: tests/test.py
===================================================================
--- tests/test.py	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/test.py	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -13,5 +13,4 @@
 
 import os
-import psutil
 import signal
 
Index: tests/typedefRedef.cfa
===================================================================
--- tests/typedefRedef.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/typedefRedef.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -45,4 +45,10 @@
 typedef int X2;
 
+X2 value  __attribute__((aligned(4 * sizeof(X2))));
+
+__attribute__((aligned(4 * sizeof(X2)))) struct rseq_cs {
+	int foo;
+};
+
 // xxx - this doesn't work yet due to parsing problems with generic types
 // #ifdef __CFA__
Index: tests/unified_locking/fast.cfa
===================================================================
--- tests/unified_locking/fast.cfa	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ tests/unified_locking/fast.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -22,5 +22,5 @@
 uint32_t cs() {
 	$thread * me = active_thread();
-	uint32_t value = (uint32_t)me;
+	uint32_t value;
 	lock(mo.l);
 	{
@@ -28,4 +28,5 @@
 		mo.id = me;
 		yield(random(5));
+		value = ((uint32_t)random()) ^ ((uint32_t)me);
 		if(mo.id != me) sout | "Intruder!";
 		mo.sum = tsum + value;
Index: tests/vector_math/.expect/vec2_ldouble.arm64.txt
===================================================================
--- tests/vector_math/.expect/vec2_ldouble.arm64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/vector_math/.expect/vec2_ldouble.arm64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,2 @@
+ctor(x,y):<1.18973149535723176508575932662801e+4932,3.36210314311209350626267781732175e-4932>
+dot(v2,v3):324.67070973663903806599369057642
Index: sts/vector_math/.expect/vec2_ldouble.txt
===================================================================
--- tests/vector_math/.expect/vec2_ldouble.txt	(revision 5a46e09dae381c4d10eda5d14c8e5293ab0dcbb9)
+++ 	(revision )
@@ -1,2 +1,0 @@
-ctor(x,y):<1.18973149535723177e+4932,3.36210314311209351e-4932>
-dot(v2,v3):324.670709736639038
Index: tests/vector_math/.expect/vec2_ldouble.x64.txt
===================================================================
--- tests/vector_math/.expect/vec2_ldouble.x64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/vector_math/.expect/vec2_ldouble.x64.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,2 @@
+ctor(x,y):<1.18973149535723177e+4932,3.36210314311209351e-4932>
+dot(v2,v3):324.670709736639038
Index: tests/vector_math/.expect/vec2_ldouble.x86.txt
===================================================================
--- tests/vector_math/.expect/vec2_ldouble.x86.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/vector_math/.expect/vec2_ldouble.x86.txt	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,2 @@
+ctor(x,y):<1.18973149535723177e+4932,3.36210314311209351e-4932>
+dot(v2,v3):324.670709736639038
Index: tests/zombies/vector-perf/iteration-perf.cfa
===================================================================
--- tests/zombies/vector-perf/iteration-perf.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/zombies/vector-perf/iteration-perf.cfa	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,32 @@
+#include "vector2.hfa"
+
+#include <time.h>
+
+enum { NumElements = 10000, NumReps = 50000 };
+
+// A layer of indirection to improve performance (naturally!)
+// Works around trac #248.
+// This test keeps a vector of notfloat, instead of a vector of float.  The optimizer removes this added indirection.
+// Furthermore, by passing notfloat's static-inline constructors, defined in this compile unit, to vector, the vector's element's constructors become eligible for inlining.
+// Skipping this optimization costs about a 25% slowdown.
+struct notfloat{ inline float; };
+
+int main() {
+    clock_t start, end;
+    vector(notfloat) x = { 4 };
+    for (i; NumElements) {
+        push_last(x, (notfloat){0.1f * i});
+    }
+    float total;
+    start = clock();
+    for (rep; NumReps) {
+        total = 0;
+        while( vector_exit(notfloat) it = x`origin; it`moveNext ) {
+            total += it`val;
+        }
+    }
+    end = clock();
+    printf("last total was %f\n", total);
+    double elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; \
+    printf("iterating duration was %f\n", elapsed);
+}
Index: tests/zombies/vector-perf/iteration-perf.cpp
===================================================================
--- tests/zombies/vector-perf/iteration-perf.cpp	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
+++ tests/zombies/vector-perf/iteration-perf.cpp	(revision 660665fdcc1db2b22595e18697e5ba335dda9b19)
@@ -0,0 +1,25 @@
+#include <vector>
+#include <iostream>
+#include <time.h>
+
+enum { NumElements = 10000, NumReps = 50000 };
+
+int main() {
+    clock_t start, end;
+    std::vector<float> x;
+    for (int i = 0; i < NumElements; i++) {
+        x.push_back(0.1f * i);
+    }
+    float total;
+    start = clock();
+    for (int rep = 0; rep < NumReps; rep++) {
+        total = 0;
+        for( std::vector<float>::iterator it = x.begin(); it < x.end(); it ++ ) {
+            total += *it;
+        }
+    }
+    end = clock();
+    std::cout << "last total was " << total << std::endl;
+    double elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; \
+    std::cout << "iterating duration was " << elapsed << std::endl;
+}