Index: Jenkinsfile
===================================================================
--- Jenkinsfile	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ Jenkinsfile	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -102,4 +102,9 @@
 
 		echo GitLogMessage()
+
+		// This is a complete hack but it solves problems with automake thinking it needs to regenerate makefiles
+		// We fudged automake/missing to handle that but automake stills bakes prints inside the makefiles
+		// and these cause more problems.
+		sh 'find . -name Makefile.in -exec touch {} +'
 	}
 }
Index: automake/missing
===================================================================
--- automake/missing	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ automake/missing	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -1,5 +1,1 @@
-#! /bin/sh
-# Tdelisle : having the Makefiles.in automatically regenerated causes problems
-#            when using multiple versions of automake, even if only on end user machines
-#            therefore I am disabling that feature by commenting this script
-exit 0
+/usr/share/automake-1.15/missing
Index: benchmark/Makefile.in
===================================================================
--- benchmark/Makefile.in	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ benchmark/Makefile.in	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -352,6 +352,5 @@
 LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
-	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) \
-	$(AM_CFLAGS) $(CFLAGS)
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)
 
 AM_V_CFA = $(am__v_CFA_@AM_V@)
Index: driver/cc1.cc
===================================================================
--- driver/cc1.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ driver/cc1.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -335,4 +335,12 @@
 	#endif // __DEBUG_H__
 
+	enum {
+		Color_Auto   = 0,
+		Color_Always = 1,
+		Color_Never  = 2,
+	} color_arg = Color_Auto;
+
+	const char * color_names[3] = { "--colors=auto", "--colors=always", "--colors=never" };
+
 	// process all the arguments
 
@@ -341,4 +349,13 @@
 		if ( prefix( arg, "-" ) ) {
 			// strip inappropriate flags
+
+			if ( prefix( arg, "-fdiagnostics-color=" ) ) {
+				string choice = arg.substr(20);
+				     if(choice == "always") color_arg = Color_Always;
+				else if(choice == "never" ) color_arg = Color_Never;
+				else if(choice == "auto"  ) color_arg = Color_Auto;
+			} else if ( arg == "-fno-diagnostics-color" ) {
+				color_arg = Color_Auto;
+			}
 
 			if ( arg == "-quiet" || arg == "-version" || arg == "-fpreprocessed" ||
@@ -440,4 +457,7 @@
 			cargs[ncargs++] = cfa_cpp_out.c_str();
 		} // if
+
+		cargs[ncargs++] = color_names[color_arg];
+
 		cargs[ncargs] = nullptr;						// terminate argument list
 
Index: driver/cfa.cc
===================================================================
--- driver/cfa.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ driver/cfa.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -401,4 +401,6 @@
 		args[nargs++] = "-Xlinker";
 		args[nargs++] = "--undefined=__cfaabi_appready_startup";
+		args[nargs++] = "-z";
+		args[nargs++] = "execstack";
 
 		// include the cfa library in case it is needed
@@ -409,5 +411,5 @@
 		args[nargs++] = "-Wl,--pop-state";
 		args[nargs++] = "-lcfa";
-		args[nargs++] = "-lpthread";
+		args[nargs++] = "-pthread";
 		args[nargs++] = "-ldl";
 		args[nargs++] = "-lrt";
Index: libcfa/configure
===================================================================
--- libcfa/configure	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/configure	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -3000,5 +3000,5 @@
 case $CONFIGURATION in
 	"debug"   )
-		CONFIG_CFLAGS="-Og -g"
+		CONFIG_CFLAGS="-O0 -g"
 		CONFIG_CFAFLAGS="-debug"
 		CONFIG_BUILDLIB="yes"
Index: libcfa/configure.ac
===================================================================
--- libcfa/configure.ac	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/configure.ac	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -68,5 +68,5 @@
 case $CONFIGURATION in
 	"debug"   )
-		CONFIG_CFLAGS="-Og -g"
+		CONFIG_CFLAGS="-O0 -g"
 		CONFIG_CFAFLAGS="-debug"
 		CONFIG_BUILDLIB="yes"
Index: libcfa/prelude/builtins.c
===================================================================
--- libcfa/prelude/builtins.c	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/prelude/builtins.c	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Fri Jul 21 16:21:03 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jun 25 18:06:52 2019
-// Update Count     : 97
+// Last Modified On : Thu Nov 21 16:31:39 2019
+// Update Count     : 101
 //
 
@@ -69,6 +69,5 @@
 
 // universal typed pointer constant
-// Compiler issue: there is a problem with anonymous types that do not have a size.
-static inline forall( dtype DT | sized(DT) ) DT * intptr( uintptr_t addr ) { return (DT *)addr; }
+static inline forall( dtype DT ) DT * intptr( uintptr_t addr ) { return (DT *)addr; }
 
 // exponentiation operator implementation
Index: libcfa/prelude/sync-builtins.cf
===================================================================
--- libcfa/prelude/sync-builtins.cf	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/prelude/sync-builtins.cf	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -1,402 +1,258 @@
 char __sync_fetch_and_add(volatile char *, char,...);
-char __sync_fetch_and_add_1(volatile char *, char,...);
 signed char __sync_fetch_and_add(volatile signed char *, signed char,...);
-signed char __sync_fetch_and_add_1(volatile signed char *, signed char,...);
 unsigned char __sync_fetch_and_add(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_fetch_and_add_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_fetch_and_add(volatile signed short *, signed short,...);
-signed short __sync_fetch_and_add_2(volatile signed short *, signed short,...);
 unsigned short __sync_fetch_and_add(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_fetch_and_add_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_fetch_and_add(volatile signed int *, signed int,...);
-signed int __sync_fetch_and_add_4(volatile signed int *, signed int,...);
 unsigned int __sync_fetch_and_add(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_fetch_and_add_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_fetch_and_add(volatile signed long int *, signed long int,...);
+unsigned long int __sync_fetch_and_add(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_fetch_and_add(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_fetch_and_add_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_fetch_and_add(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_fetch_and_add_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_fetch_and_add(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_fetch_and_add_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_fetch_and_add(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_fetch_and_add_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_fetch_and_sub(volatile char *, char,...);
-char __sync_fetch_and_sub_1(volatile char *, char,...);
 signed char __sync_fetch_and_sub(volatile signed char *, signed char,...);
-signed char __sync_fetch_and_sub_1(volatile signed char *, signed char,...);
 unsigned char __sync_fetch_and_sub(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_fetch_and_sub_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_fetch_and_sub(volatile signed short *, signed short,...);
-signed short __sync_fetch_and_sub_2(volatile signed short *, signed short,...);
 unsigned short __sync_fetch_and_sub(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_fetch_and_sub_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_fetch_and_sub(volatile signed int *, signed int,...);
-signed int __sync_fetch_and_sub_4(volatile signed int *, signed int,...);
 unsigned int __sync_fetch_and_sub(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_fetch_and_sub_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_fetch_and_sub(volatile signed long int *, signed long int,...);
+unsigned long int __sync_fetch_and_sub(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_fetch_and_sub(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_fetch_and_sub_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_fetch_and_sub(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_fetch_and_sub_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_fetch_and_sub(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_fetch_and_sub_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_fetch_and_sub(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_fetch_and_sub_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_fetch_and_or(volatile char *, char,...);
-char __sync_fetch_and_or_1(volatile char *, char,...);
 signed char __sync_fetch_and_or(volatile signed char *, signed char,...);
-signed char __sync_fetch_and_or_1(volatile signed char *, signed char,...);
 unsigned char __sync_fetch_and_or(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_fetch_and_or_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_fetch_and_or(volatile signed short *, signed short,...);
-signed short __sync_fetch_and_or_2(volatile signed short *, signed short,...);
 unsigned short __sync_fetch_and_or(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_fetch_and_or_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_fetch_and_or(volatile signed int *, signed int,...);
-signed int __sync_fetch_and_or_4(volatile signed int *, signed int,...);
 unsigned int __sync_fetch_and_or(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_fetch_and_or_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_fetch_and_or(volatile signed long int *, signed long int,...);
+unsigned long int __sync_fetch_and_or(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_fetch_and_or(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_fetch_and_or_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_fetch_and_or(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_fetch_and_or_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_fetch_and_or(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_fetch_and_or_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_fetch_and_or(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_fetch_and_or_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_fetch_and_and(volatile char *, char,...);
-char __sync_fetch_and_and_1(volatile char *, char,...);
 signed char __sync_fetch_and_and(volatile signed char *, signed char,...);
-signed char __sync_fetch_and_and_1(volatile signed char *, signed char,...);
 unsigned char __sync_fetch_and_and(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_fetch_and_and_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_fetch_and_and(volatile signed short *, signed short,...);
-signed short __sync_fetch_and_and_2(volatile signed short *, signed short,...);
 unsigned short __sync_fetch_and_and(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_fetch_and_and_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_fetch_and_and(volatile signed int *, signed int,...);
-signed int __sync_fetch_and_and_4(volatile signed int *, signed int,...);
 unsigned int __sync_fetch_and_and(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_fetch_and_and_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_fetch_and_and(volatile signed long int *, signed long int,...);
+unsigned long int __sync_fetch_and_and(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_fetch_and_and(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_fetch_and_and_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_fetch_and_and(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_fetch_and_and_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_fetch_and_and(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_fetch_and_and_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_fetch_and_and(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_fetch_and_and_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_fetch_and_xor(volatile char *, char,...);
-char __sync_fetch_and_xor_1(volatile char *, char,...);
 signed char __sync_fetch_and_xor(volatile signed char *, signed char,...);
-signed char __sync_fetch_and_xor_1(volatile signed char *, signed char,...);
 unsigned char __sync_fetch_and_xor(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_fetch_and_xor_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_fetch_and_xor(volatile signed short *, signed short,...);
-signed short __sync_fetch_and_xor_2(volatile signed short *, signed short,...);
 unsigned short __sync_fetch_and_xor(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_fetch_and_xor_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_fetch_and_xor(volatile signed int *, signed int,...);
-signed int __sync_fetch_and_xor_4(volatile signed int *, signed int,...);
 unsigned int __sync_fetch_and_xor(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_fetch_and_xor_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_fetch_and_xor(volatile signed long int *, signed long int,...);
+unsigned long int __sync_fetch_and_xor(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_fetch_and_xor(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_fetch_and_xor_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_fetch_and_xor(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_fetch_and_xor_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_fetch_and_xor(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_fetch_and_xor_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_fetch_and_xor(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_fetch_and_xor_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_fetch_and_nand(volatile char *, char,...);
-char __sync_fetch_and_nand_1(volatile char *, char,...);
 signed char __sync_fetch_and_nand(volatile signed char *, signed char,...);
-signed char __sync_fetch_and_nand_1(volatile signed char *, signed char,...);
 unsigned char __sync_fetch_and_nand(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_fetch_and_nand_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_fetch_and_nand(volatile signed short *, signed short,...);
-signed short __sync_fetch_and_nand_2(volatile signed short *, signed short,...);
 unsigned short __sync_fetch_and_nand(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_fetch_and_nand_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_fetch_and_nand(volatile signed int *, signed int,...);
-signed int __sync_fetch_and_nand_4(volatile signed int *, signed int,...);
 unsigned int __sync_fetch_and_nand(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_fetch_and_nand_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_fetch_and_nand(volatile signed long int *, signed long int,...);
+unsigned long int __sync_fetch_and_nand(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_fetch_and_nand(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_fetch_and_nand_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_fetch_and_nand(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_fetch_and_nand_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_fetch_and_nand(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_fetch_and_nand_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_fetch_and_nand(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_fetch_and_nand_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_add_and_fetch(volatile char *, char,...);
-char __sync_add_and_fetch_1(volatile char *, char,...);
 signed char __sync_add_and_fetch(volatile signed char *, signed char,...);
-signed char __sync_add_and_fetch_1(volatile signed char *, signed char,...);
 unsigned char __sync_add_and_fetch(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_add_and_fetch_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_add_and_fetch(volatile signed short *, signed short,...);
-signed short __sync_add_and_fetch_2(volatile signed short *, signed short,...);
 unsigned short __sync_add_and_fetch(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_add_and_fetch_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_add_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_add_and_fetch_4(volatile signed int *, signed int,...);
 signed int __sync_add_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_add_and_fetch_4(volatile signed int *, signed int,...);
+signed long int __sync_add_and_fetch(volatile signed long int *, signed long int,...);
+unsigned long int __sync_add_and_fetch(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_add_and_fetch(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_add_and_fetch_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_add_and_fetch(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_add_and_fetch_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_add_and_fetch(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_add_and_fetch_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_add_and_fetch(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_add_and_fetch_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_sub_and_fetch(volatile char *, char,...);
-char __sync_sub_and_fetch_1(volatile char *, char,...);
 signed char __sync_sub_and_fetch(volatile signed char *, signed char,...);
-signed char __sync_sub_and_fetch_1(volatile signed char *, signed char,...);
 unsigned char __sync_sub_and_fetch(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_sub_and_fetch_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_sub_and_fetch(volatile signed short *, signed short,...);
-signed short __sync_sub_and_fetch_2(volatile signed short *, signed short,...);
 unsigned short __sync_sub_and_fetch(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_sub_and_fetch_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_sub_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_sub_and_fetch_4(volatile signed int *, signed int,...);
 unsigned int __sync_sub_and_fetch(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_sub_and_fetch_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_sub_and_fetch(volatile signed long int *, signed long int,...);
+unsigned long int __sync_sub_and_fetch(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_sub_and_fetch(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_sub_and_fetch_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_sub_and_fetch(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_sub_and_fetch_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_sub_and_fetch(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_sub_and_fetch_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_sub_and_fetch(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_sub_and_fetch_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_or_and_fetch(volatile char *, char,...);
-char __sync_or_and_fetch_1(volatile char *, char,...);
 signed char __sync_or_and_fetch(volatile signed char *, signed char,...);
-signed char __sync_or_and_fetch_1(volatile signed char *, signed char,...);
 unsigned char __sync_or_and_fetch(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_or_and_fetch_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_or_and_fetch(volatile signed short *, signed short,...);
-signed short __sync_or_and_fetch_2(volatile signed short *, signed short,...);
 unsigned short __sync_or_and_fetch(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_or_and_fetch_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_or_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_or_and_fetch_4(volatile signed int *, signed int,...);
 unsigned int __sync_or_and_fetch(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_or_and_fetch_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_or_and_fetch(volatile signed long int *, signed long int,...);
+unsigned long int __sync_or_and_fetch(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_or_and_fetch(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_or_and_fetch_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_or_and_fetch(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_or_and_fetch_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_or_and_fetch(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_or_and_fetch_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_or_and_fetch(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_or_and_fetch_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_and_and_fetch(volatile char *, char,...);
-char __sync_and_and_fetch_1(volatile char *, char,...);
 signed char __sync_and_and_fetch(volatile signed char *, signed char,...);
-signed char __sync_and_and_fetch_1(volatile signed char *, signed char,...);
 unsigned char __sync_and_and_fetch(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_and_and_fetch_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_and_and_fetch(volatile signed short *, signed short,...);
-signed short __sync_and_and_fetch_2(volatile signed short *, signed short,...);
 unsigned short __sync_and_and_fetch(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_and_and_fetch_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_and_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_and_and_fetch_4(volatile signed int *, signed int,...);
 unsigned int __sync_and_and_fetch(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_and_and_fetch_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_and_and_fetch(volatile signed long int *, signed long int,...);
+unsigned long int __sync_and_and_fetch(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_and_and_fetch(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_and_and_fetch_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_and_and_fetch(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_and_and_fetch_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_and_and_fetch(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_and_and_fetch_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_and_and_fetch(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_and_and_fetch_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_xor_and_fetch(volatile char *, char,...);
-char __sync_xor_and_fetch_1(volatile char *, char,...);
 signed char __sync_xor_and_fetch(volatile signed char *, signed char,...);
-signed char __sync_xor_and_fetch_1(volatile signed char *, signed char,...);
 unsigned char __sync_xor_and_fetch(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_xor_and_fetch_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_xor_and_fetch(volatile signed short *, signed short,...);
-signed short __sync_xor_and_fetch_2(volatile signed short *, signed short,...);
 unsigned short __sync_xor_and_fetch(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_xor_and_fetch_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_xor_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_xor_and_fetch_4(volatile signed int *, signed int,...);
 unsigned int __sync_xor_and_fetch(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_xor_and_fetch_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_xor_and_fetch(volatile signed long int *, signed long int,...);
+unsigned long int __sync_xor_and_fetch(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_xor_and_fetch(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_xor_and_fetch_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_xor_and_fetch(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_xor_and_fetch_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_xor_and_fetch(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_xor_and_fetch_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_xor_and_fetch(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_xor_and_fetch_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 char __sync_nand_and_fetch(volatile char *, char,...);
-char __sync_nand_and_fetch_1(volatile char *, char,...);
 signed char __sync_nand_and_fetch(volatile signed char *, signed char,...);
-signed char __sync_nand_and_fetch_1(volatile signed char *, signed char,...);
 unsigned char __sync_nand_and_fetch(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_nand_and_fetch_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_nand_and_fetch(volatile signed short *, signed short,...);
-signed short __sync_nand_and_fetch_2(volatile signed short *, signed short,...);
 unsigned short __sync_nand_and_fetch(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_nand_and_fetch_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_nand_and_fetch(volatile signed int *, signed int,...);
-signed int __sync_nand_and_fetch_4(volatile signed int *, signed int,...);
 unsigned int __sync_nand_and_fetch(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_nand_and_fetch_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_nand_and_fetch(volatile signed long int *, signed long int,...);
+unsigned long int __sync_nand_and_fetch(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_nand_and_fetch(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_nand_and_fetch_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_nand_and_fetch(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_nand_and_fetch_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_nand_and_fetch(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_nand_and_fetch_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_nand_and_fetch(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_nand_and_fetch_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 _Bool __sync_bool_compare_and_swap(volatile char *, char, char,...);
-_Bool __sync_bool_compare_and_swap_1(volatile char *, char, char,...);
 _Bool __sync_bool_compare_and_swap(volatile signed char *, signed char, signed char,...);
-_Bool __sync_bool_compare_and_swap_1(volatile signed char *, signed char, signed char,...);
 _Bool __sync_bool_compare_and_swap(volatile unsigned char *, unsigned char, unsigned char,...);
-_Bool __sync_bool_compare_and_swap_1(volatile unsigned char *, unsigned char, unsigned char,...);
 _Bool __sync_bool_compare_and_swap(volatile short *, signed short, signed short,...);
-_Bool __sync_bool_compare_and_swap_2(volatile short *, signed short, signed short,...);
 _Bool __sync_bool_compare_and_swap(volatile short *, unsigned short, unsigned short,...);
-_Bool __sync_bool_compare_and_swap_2(volatile short *, unsigned short, unsigned short,...);
 _Bool __sync_bool_compare_and_swap(volatile signed int *, signed int, signed int,...);
-_Bool __sync_bool_compare_and_swap_4(volatile signed int *, signed int, signed int,...);
 _Bool __sync_bool_compare_and_swap(volatile unsigned int *, unsigned int, unsigned int,...);
-_Bool __sync_bool_compare_and_swap_4(volatile unsigned int *, unsigned int, unsigned int,...);
+_Bool __sync_bool_compare_and_swap(volatile signed long int *, signed long int, signed long int,...);
+_Bool __sync_bool_compare_and_swap(volatile unsigned long int *, unsigned long int, unsigned long int,...);
 _Bool __sync_bool_compare_and_swap(volatile signed long long int *, signed long long int, signed long long int,...);
-_Bool __sync_bool_compare_and_swap_8(volatile signed long long int *, signed long long int, signed long long int,...);
 _Bool __sync_bool_compare_and_swap(volatile unsigned long long int *, unsigned long long int, unsigned long long int,...);
-_Bool __sync_bool_compare_and_swap_8(volatile unsigned long long int *, unsigned long long int, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 _Bool __sync_bool_compare_and_swap(volatile signed __int128 *, signed __int128, signed __int128,...);
-_Bool __sync_bool_compare_and_swap_16(volatile signed __int128 *, signed __int128, signed __int128,...);
 _Bool __sync_bool_compare_and_swap(volatile unsigned __int128 *, unsigned __int128, unsigned __int128,...);
-_Bool __sync_bool_compare_and_swap_16(volatile unsigned __int128 *, unsigned __int128, unsigned __int128,...);
 #endif
 forall(dtype T) _Bool __sync_bool_compare_and_swap(T * volatile *, T *, T*, ...);
 
 char __sync_val_compare_and_swap(volatile char *, char, char,...);
-char __sync_val_compare_and_swap_1(volatile char *, char, char,...);
 signed char __sync_val_compare_and_swap(volatile signed char *, signed char, signed char,...);
-signed char __sync_val_compare_and_swap_1(volatile signed char *, signed char, signed char,...);
 unsigned char __sync_val_compare_and_swap(volatile unsigned char *, unsigned char, unsigned char,...);
-unsigned char __sync_val_compare_and_swap_1(volatile unsigned char *, unsigned char, unsigned char,...);
 signed short __sync_val_compare_and_swap(volatile signed short *, signed short, signed short,...);
-signed short __sync_val_compare_and_swap_2(volatile signed short *, signed short, signed short,...);
 unsigned short __sync_val_compare_and_swap(volatile unsigned short *, unsigned short, unsigned short,...);
-unsigned short __sync_val_compare_and_swap_2(volatile unsigned short *, unsigned short, unsigned short,...);
 signed int __sync_val_compare_and_swap(volatile signed int *, signed int, signed int,...);
-signed int __sync_val_compare_and_swap_4(volatile signed int *, signed int, signed int,...);
 unsigned int __sync_val_compare_and_swap(volatile unsigned int *, unsigned int, unsigned int,...);
-unsigned int __sync_val_compare_and_swap_4(volatile unsigned int *, unsigned int, unsigned int,...);
+signed long int __sync_val_compare_and_swap(volatile signed long int *, signed long int, signed long int,...);
+unsigned long int __sync_val_compare_and_swap(volatile unsigned long int *, unsigned long int, unsigned long int,...);
 signed long long int __sync_val_compare_and_swap(volatile signed long long int *, signed long long int, signed long long int,...);
-signed long long int __sync_val_compare_and_swap_8(volatile signed long long int *, signed long long int, signed long long int,...);
 unsigned long long int __sync_val_compare_and_swap(volatile unsigned long long int *, unsigned long long int, unsigned long long int,...);
-unsigned long long int __sync_val_compare_and_swap_8(volatile unsigned long long int *, unsigned long long int, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_val_compare_and_swap(volatile signed __int128 *, signed __int128, signed __int128,...);
-signed __int128 __sync_val_compare_and_swap_16(volatile signed __int128 *, signed __int128, signed __int128,...);
 unsigned __int128 __sync_val_compare_and_swap(volatile unsigned __int128 *, unsigned __int128, unsigned __int128,...);
-unsigned __int128 __sync_val_compare_and_swap_16(volatile unsigned __int128 *, unsigned __int128, unsigned __int128,...);
 #endif
 forall(dtype T) T * __sync_val_compare_and_swap(T * volatile *, T *, T*,...);
 
 char __sync_lock_test_and_set(volatile char *, char,...);
-char __sync_lock_test_and_set_1(volatile char *, char,...);
 signed char __sync_lock_test_and_set(volatile signed char *, signed char,...);
-signed char __sync_lock_test_and_set_1(volatile signed char *, signed char,...);
 unsigned char __sync_lock_test_and_set(volatile unsigned char *, unsigned char,...);
-unsigned char __sync_lock_test_and_set_1(volatile unsigned char *, unsigned char,...);
 signed short __sync_lock_test_and_set(volatile signed short *, signed short,...);
-signed short __sync_lock_test_and_set_2(volatile signed short *, signed short,...);
 unsigned short __sync_lock_test_and_set(volatile unsigned short *, unsigned short,...);
-unsigned short __sync_lock_test_and_set_2(volatile unsigned short *, unsigned short,...);
 signed int __sync_lock_test_and_set(volatile signed int *, signed int,...);
-signed int __sync_lock_test_and_set_4(volatile signed int *, signed int,...);
 unsigned int __sync_lock_test_and_set(volatile unsigned int *, unsigned int,...);
-unsigned int __sync_lock_test_and_set_4(volatile unsigned int *, unsigned int,...);
+signed long int __sync_lock_test_and_set(volatile signed long int *, signed long int,...);
+unsigned long int __sync_lock_test_and_set(volatile unsigned long int *, unsigned long int,...);
 signed long long int __sync_lock_test_and_set(volatile signed long long int *, signed long long int,...);
-signed long long int __sync_lock_test_and_set_8(volatile signed long long int *, signed long long int,...);
 unsigned long long int __sync_lock_test_and_set(volatile unsigned long long int *, unsigned long long int,...);
-unsigned long long int __sync_lock_test_and_set_8(volatile unsigned long long int *, unsigned long long int,...);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __sync_lock_test_and_set(volatile signed __int128 *, signed __int128,...);
-signed __int128 __sync_lock_test_and_set_16(volatile signed __int128 *, signed __int128,...);
 unsigned __int128 __sync_lock_test_and_set(volatile unsigned __int128 *, unsigned __int128,...);
-unsigned __int128 __sync_lock_test_and_set_16(volatile unsigned __int128 *, unsigned __int128,...);
 #endif
 
 void __sync_lock_release(volatile char *,...);
-void __sync_lock_release_1(volatile char *,...);
 void __sync_lock_release(volatile signed char *,...);
-void __sync_lock_release_1(volatile signed char *,...);
 void __sync_lock_release(volatile unsigned char *,...);
-void __sync_lock_release_1(volatile unsigned char *,...);
 void __sync_lock_release(volatile signed short *,...);
-void __sync_lock_release_2(volatile signed short *,...);
 void __sync_lock_release(volatile unsigned short *,...);
-void __sync_lock_release_2(volatile unsigned short *,...);
 void __sync_lock_release(volatile signed int *,...);
-void __sync_lock_release_4(volatile signed int *,...);
 void __sync_lock_release(volatile unsigned int *,...);
-void __sync_lock_release_4(volatile unsigned int *,...);
+void __sync_lock_release(volatile signed long int *,...);
+void __sync_lock_release(volatile unsigned long int *,...);
 void __sync_lock_release(volatile signed long long int *,...);
-void __sync_lock_release_8(volatile signed long long int *,...);
 void __sync_lock_release(volatile unsigned long long int *,...);
-void __sync_lock_release_8(volatile unsigned long long int *,...);
 #if defined(__SIZEOF_INT128__)
 void __sync_lock_release(volatile signed __int128 *,...);
-void __sync_lock_release_16(volatile signed __int128 *,...);
 void __sync_lock_release(volatile unsigned __int128 *,...);
-void __sync_lock_release_16(volatile unsigned __int128 *,...);
 #endif
 
@@ -414,4 +270,6 @@
 _Bool __atomic_test_and_set(volatile signed int *, int);
 _Bool __atomic_test_and_set(volatile unsigned int *, int);
+_Bool __atomic_test_and_set(volatile signed long int *, int);
+_Bool __atomic_test_and_set(volatile unsigned long int *, int);
 _Bool __atomic_test_and_set(volatile signed long long int *, int);
 _Bool __atomic_test_and_set(volatile unsigned long long int *, int);
@@ -429,4 +287,6 @@
 void __atomic_clear(volatile signed int *, int);
 void __atomic_clear(volatile unsigned int *, int);
+void __atomic_clear(volatile signed long int *, int);
+void __atomic_clear(volatile unsigned long int *, int);
 void __atomic_clear(volatile signed long long int *, int);
 void __atomic_clear(volatile unsigned long long int *, int);
@@ -436,37 +296,32 @@
 #endif
 
+_Bool __atomic_exchange_n(volatile _Bool *, _Bool, int);
+void __atomic_exchange(volatile _Bool *, volatile _Bool *, volatile _Bool *, int);
 char __atomic_exchange_n(volatile char *, char, int);
-char __atomic_exchange_1(volatile char *, char, int);
 void __atomic_exchange(volatile char *, volatile char *, volatile char *, int);
 signed char __atomic_exchange_n(volatile signed char *, signed char, int);
-signed char __atomic_exchange_1(volatile signed char *, signed char, int);
 void __atomic_exchange(volatile signed char *, volatile signed char *, volatile signed char *, int);
 unsigned char __atomic_exchange_n(volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_exchange_1(volatile unsigned char *, unsigned char, int);
 void __atomic_exchange(volatile unsigned char *, volatile unsigned char *, volatile unsigned char *, int);
 signed short __atomic_exchange_n(volatile signed short *, signed short, int);
-signed short __atomic_exchange_2(volatile signed short *, signed short, int);
 void __atomic_exchange(volatile signed short *, volatile signed short *, volatile signed short *, int);
 unsigned short __atomic_exchange_n(volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_exchange_2(volatile unsigned short *, unsigned short, int);
 void __atomic_exchange(volatile unsigned short *, volatile unsigned short *, volatile unsigned short *, int);
 signed int __atomic_exchange_n(volatile signed int *, signed int, int);
-signed int __atomic_exchange_4(volatile signed int *, signed int, int);
 void __atomic_exchange(volatile signed int *, volatile signed int *, volatile signed int *, int);
 unsigned int __atomic_exchange_n(volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_exchange_4(volatile unsigned int *, unsigned int, int);
 void __atomic_exchange(volatile unsigned int *, volatile unsigned int *, volatile unsigned int *, int);
+signed long int __atomic_exchange_n(volatile signed long int *, signed long int, int);
+void __atomic_exchange(volatile signed long int *, volatile signed long int *, volatile signed long int *, int);
+unsigned long int __atomic_exchange_n(volatile unsigned long int *, unsigned long int, int);
+void __atomic_exchange(volatile unsigned long int *, volatile unsigned long int *, volatile unsigned long int *, int);
 signed long long int __atomic_exchange_n(volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_exchange_8(volatile signed long long int *, signed long long int, int);
 void __atomic_exchange(volatile signed long long int *, volatile signed long long int *, volatile signed long long int *, int);
 unsigned long long int __atomic_exchange_n(volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_exchange_8(volatile unsigned long long int *, unsigned long long int, int);
 void __atomic_exchange(volatile unsigned long long int *, volatile unsigned long long int *, volatile unsigned long long int *, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_exchange_n(volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_exchange_16(volatile signed __int128 *, signed __int128, int);
 void __atomic_exchange(volatile signed __int128 *, volatile signed __int128 *, volatile signed __int128 *, int);
 unsigned __int128 __atomic_exchange_n(volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_exchange_16(volatile unsigned __int128 *, unsigned __int128, int);
 void __atomic_exchange(volatile unsigned __int128 *, volatile unsigned __int128 *, volatile unsigned __int128 *, int);
 #endif
@@ -477,36 +332,29 @@
 void __atomic_load(const volatile _Bool *, volatile _Bool *, int);
 char __atomic_load_n(const volatile char *, int);
-char __atomic_load_1(const volatile char *, int);
 void __atomic_load(const volatile char *, volatile char *, int);
 signed char __atomic_load_n(const volatile signed char *, int);
-signed char __atomic_load_1(const volatile signed char *, int);
 void __atomic_load(const volatile signed char *, volatile signed char *, int);
 unsigned char __atomic_load_n(const volatile unsigned char *, int);
-unsigned char __atomic_load_1(const volatile unsigned char *, int);
 void __atomic_load(const volatile unsigned char *, volatile unsigned char *, int);
 signed short __atomic_load_n(const volatile signed short *, int);
-signed short __atomic_load_2(const volatile signed short *, int);
 void __atomic_load(const volatile signed short *, volatile signed short *, int);
 unsigned short __atomic_load_n(const volatile unsigned short *, int);
-unsigned short __atomic_load_2(const volatile unsigned short *, int);
 void __atomic_load(const volatile unsigned short *, volatile unsigned short *, int);
 signed int __atomic_load_n(const volatile signed int *, int);
-signed int __atomic_load_4(const volatile signed int *, int);
 void __atomic_load(const volatile signed int *, volatile signed int *, int);
 unsigned int __atomic_load_n(const volatile unsigned int *, int);
-unsigned int __atomic_load_4(const volatile unsigned int *, int);
 void __atomic_load(const volatile unsigned int *, volatile unsigned int *, int);
+signed long int __atomic_load_n(const volatile signed long int *, int);
+void __atomic_load(const volatile signed long int *, volatile signed long int *, int);
+unsigned long int __atomic_load_n(const volatile unsigned long int *, int);
+void __atomic_load(const volatile unsigned long int *, volatile unsigned long int *, int);
 signed long long int __atomic_load_n(const volatile signed long long int *, int);
-signed long long int __atomic_load_8(const volatile signed long long int *, int);
 void __atomic_load(const volatile signed long long int *, volatile signed long long int *, int);
 unsigned long long int __atomic_load_n(const volatile unsigned long long int *, int);
-unsigned long long int __atomic_load_8(const volatile unsigned long long int *, int);
 void __atomic_load(const volatile unsigned long long int *, volatile unsigned long long int *, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_load_n(const volatile signed __int128 *, int);
-signed __int128 __atomic_load_16(const volatile signed __int128 *, int);
 void __atomic_load(const volatile signed __int128 *, volatile signed __int128 *, int);
 unsigned __int128 __atomic_load_n(const volatile unsigned __int128 *, int);
-unsigned __int128 __atomic_load_16(const volatile unsigned __int128 *, int);
 void __atomic_load(const volatile unsigned __int128 *, volatile unsigned __int128 *, int);
 #endif
@@ -515,36 +363,29 @@
 
 _Bool __atomic_compare_exchange_n(volatile char *, char *, char, _Bool, int, int);
-_Bool __atomic_compare_exchange_1(volatile char *, char *, char, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile char *, char *, char *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile signed char *, signed char *, signed char, _Bool, int, int);
-_Bool __atomic_compare_exchange_1(volatile signed char *, signed char *, signed char, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile signed char *, signed char *, signed char *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile unsigned char *, unsigned char *, unsigned char, _Bool, int, int);
-_Bool __atomic_compare_exchange_1(volatile unsigned char *, unsigned char *, unsigned char, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile unsigned char *, unsigned char *, unsigned char *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile signed short *, signed short *, signed short, _Bool, int, int);
-_Bool __atomic_compare_exchange_2(volatile signed short *, signed short *, signed short, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile signed short *, signed short *, signed short *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile unsigned short *, unsigned short *, unsigned short, _Bool, int, int);
-_Bool __atomic_compare_exchange_2(volatile unsigned short *, unsigned short *, unsigned short, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile unsigned short *, unsigned short *, unsigned short *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile signed int *, signed int *, signed int, _Bool, int, int);
-_Bool __atomic_compare_exchange_4(volatile signed int *, signed int *, signed int, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile signed int *, signed int *, signed int *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile unsigned int *, unsigned int *, unsigned int, _Bool, int, int);
-_Bool __atomic_compare_exchange_4(volatile unsigned int *, unsigned int *, unsigned int, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile unsigned int *, unsigned int *, unsigned int *, _Bool, int, int);
+_Bool __atomic_compare_exchange_n(volatile signed long int *, signed long int *, signed long int, _Bool, int, int);
+_Bool __atomic_compare_exchange  (volatile signed long int *, signed long int *, signed long int *, _Bool, int, int);
+_Bool __atomic_compare_exchange_n(volatile unsigned long int *, unsigned long int *, unsigned long int, _Bool, int, int);
+_Bool __atomic_compare_exchange  (volatile unsigned long int *, unsigned long int *, unsigned long int *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile signed long long int *, signed long long int *, signed long long int, _Bool, int, int);
-_Bool __atomic_compare_exchange_8(volatile signed long long int *, signed long long int *, signed long long int, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile signed long long int *, signed long long int *, signed long long int *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n(volatile unsigned long long int *, unsigned long long int *, unsigned long long int, _Bool, int, int);
-_Bool __atomic_compare_exchange_8(volatile unsigned long long int *, unsigned long long int *, unsigned long long int, _Bool, int, int);
 _Bool __atomic_compare_exchange  (volatile unsigned long long int *, unsigned long long int *, unsigned long long int *, _Bool, int, int);
 #if defined(__SIZEOF_INT128__)
 _Bool __atomic_compare_exchange_n (volatile signed __int128 *, signed __int128 *, signed __int128, _Bool, int, int);
-_Bool __atomic_compare_exchange_16(volatile signed __int128 *, signed __int128 *, signed __int128, _Bool, int, int);
 _Bool __atomic_compare_exchange   (volatile signed __int128 *, signed __int128 *, signed __int128 *, _Bool, int, int);
 _Bool __atomic_compare_exchange_n (volatile unsigned __int128 *, unsigned __int128 *, unsigned __int128, _Bool, int, int);
-_Bool __atomic_compare_exchange_16(volatile unsigned __int128 *, unsigned __int128 *, unsigned __int128, _Bool, int, int);
 _Bool __atomic_compare_exchange   (volatile unsigned __int128 *, unsigned __int128 *, unsigned __int128 *, _Bool, int, int);
 #endif
@@ -555,36 +396,29 @@
 void __atomic_store(volatile _Bool *, _Bool *, int);
 void __atomic_store_n(volatile char *, char, int);
-void __atomic_store_1(volatile char *, char, int);
 void __atomic_store(volatile char *, char *, int);
 void __atomic_store_n(volatile signed char *, signed char, int);
-void __atomic_store_1(volatile signed char *, signed char, int);
 void __atomic_store(volatile signed char *, signed char *, int);
 void __atomic_store_n(volatile unsigned char *, unsigned char, int);
-void __atomic_store_1(volatile unsigned char *, unsigned char, int);
 void __atomic_store(volatile unsigned char *, unsigned char *, int);
 void __atomic_store_n(volatile signed short *, signed short, int);
-void __atomic_store_2(volatile signed short *, signed short, int);
 void __atomic_store(volatile signed short *, signed short *, int);
 void __atomic_store_n(volatile unsigned short *, unsigned short, int);
-void __atomic_store_2(volatile unsigned short *, unsigned short, int);
 void __atomic_store(volatile unsigned short *, unsigned short *, int);
 void __atomic_store_n(volatile signed int *, signed int, int);
-void __atomic_store_4(volatile signed int *, signed int, int);
 void __atomic_store(volatile signed int *, signed int *, int);
 void __atomic_store_n(volatile unsigned int *, unsigned int, int);
-void __atomic_store_4(volatile unsigned int *, unsigned int, int);
 void __atomic_store(volatile unsigned int *, unsigned int *, int);
+void __atomic_store_n(volatile signed long int *, signed long int, int);
+void __atomic_store(volatile signed long int *, signed long int *, int);
+void __atomic_store_n(volatile unsigned long int *, unsigned long int, int);
+void __atomic_store(volatile unsigned long int *, unsigned long int *, int);
 void __atomic_store_n(volatile signed long long int *, signed long long int, int);
-void __atomic_store_8(volatile signed long long int *, signed long long int, int);
 void __atomic_store(volatile signed long long int *, signed long long int *, int);
 void __atomic_store_n(volatile unsigned long long int *, unsigned long long int, int);
-void __atomic_store_8(volatile unsigned long long int *, unsigned long long int, int);
 void __atomic_store(volatile unsigned long long int *, unsigned long long int *, int);
 #if defined(__SIZEOF_INT128__)
 void __atomic_store_n(volatile signed __int128 *, signed __int128, int);
-void __atomic_store_16(volatile signed __int128 *, signed __int128, int);
 void __atomic_store(volatile signed __int128 *, signed __int128 *, int);
 void __atomic_store_n(volatile unsigned __int128 *, unsigned __int128, int);
-void __atomic_store_16(volatile unsigned __int128 *, unsigned __int128, int);
 void __atomic_store(volatile unsigned __int128 *, unsigned __int128 *, int);
 #endif
@@ -593,301 +427,193 @@
 
 char __atomic_add_fetch  (volatile char *, char, int);
-char __atomic_add_fetch_1(volatile char *, char, int);
 signed char __atomic_add_fetch  (volatile signed char *, signed char, int);
-signed char __atomic_add_fetch_1(volatile signed char *, signed char, int);
 unsigned char __atomic_add_fetch  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_add_fetch_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_add_fetch  (volatile signed short *, signed short, int);
-signed short __atomic_add_fetch_2(volatile signed short *, signed short, int);
 unsigned short __atomic_add_fetch  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_add_fetch_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_add_fetch  (volatile signed int *, signed int, int);
-signed int __atomic_add_fetch_4(volatile signed int *, signed int, int);
 unsigned int __atomic_add_fetch  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_add_fetch_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_add_fetch  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_add_fetch  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_add_fetch  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_add_fetch_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_add_fetch  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_add_fetch_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_add_fetch   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_add_fetch_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_add_fetch   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_add_fetch_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_sub_fetch  (volatile char *, char, int);
-char __atomic_sub_fetch_1(volatile char *, char, int);
 signed char __atomic_sub_fetch  (volatile signed char *, signed char, int);
-signed char __atomic_sub_fetch_1(volatile signed char *, signed char, int);
 unsigned char __atomic_sub_fetch  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_sub_fetch_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_sub_fetch  (volatile signed short *, signed short, int);
-signed short __atomic_sub_fetch_2(volatile signed short *, signed short, int);
 unsigned short __atomic_sub_fetch  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_sub_fetch_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_sub_fetch  (volatile signed int *, signed int, int);
-signed int __atomic_sub_fetch_4(volatile signed int *, signed int, int);
 unsigned int __atomic_sub_fetch  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_sub_fetch_4(volatile unsigned int *, unsigned int, int);
+signed long long int __atomic_sub_fetch  (volatile signed long int *, signed long int, int);
+unsigned long long int __atomic_sub_fetch  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_sub_fetch  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_sub_fetch_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_sub_fetch  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_sub_fetch_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_sub_fetch   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_sub_fetch_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_sub_fetch   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_sub_fetch_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_and_fetch  (volatile char *, char, int);
-char __atomic_and_fetch_1(volatile char *, char, int);
 signed char __atomic_and_fetch  (volatile signed char *, signed char, int);
-signed char __atomic_and_fetch_1(volatile signed char *, signed char, int);
 unsigned char __atomic_and_fetch  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_and_fetch_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_and_fetch  (volatile signed short *, signed short, int);
-signed short __atomic_and_fetch_2(volatile signed short *, signed short, int);
 unsigned short __atomic_and_fetch  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_and_fetch_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_and_fetch  (volatile signed int *, signed int, int);
-signed int __atomic_and_fetch_4(volatile signed int *, signed int, int);
 unsigned int __atomic_and_fetch  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_and_fetch_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_and_fetch  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_and_fetch  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_and_fetch  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_and_fetch_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_and_fetch  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_and_fetch_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_and_fetch   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_and_fetch_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_and_fetch   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_and_fetch_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_nand_fetch  (volatile char *, char, int);
-char __atomic_nand_fetch_1(volatile char *, char, int);
 signed char __atomic_nand_fetch  (volatile signed char *, signed char, int);
-signed char __atomic_nand_fetch_1(volatile signed char *, signed char, int);
 unsigned char __atomic_nand_fetch  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_nand_fetch_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_nand_fetch  (volatile signed short *, signed short, int);
-signed short __atomic_nand_fetch_2(volatile signed short *, signed short, int);
 unsigned short __atomic_nand_fetch  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_nand_fetch_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_nand_fetch  (volatile signed int *, signed int, int);
-signed int __atomic_nand_fetch_4(volatile signed int *, signed int, int);
 unsigned int __atomic_nand_fetch  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_nand_fetch_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_nand_fetch  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_nand_fetch  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_nand_fetch  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_nand_fetch_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_nand_fetch  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_nand_fetch_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_nand_fetch   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_nand_fetch_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_nand_fetch   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_nand_fetch_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_xor_fetch  (volatile char *, char, int);
-char __atomic_xor_fetch_1(volatile char *, char, int);
 signed char __atomic_xor_fetch  (volatile signed char *, signed char, int);
-signed char __atomic_xor_fetch_1(volatile signed char *, signed char, int);
 unsigned char __atomic_xor_fetch  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_xor_fetch_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_xor_fetch  (volatile signed short *, signed short, int);
-signed short __atomic_xor_fetch_2(volatile signed short *, signed short, int);
 unsigned short __atomic_xor_fetch  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_xor_fetch_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_xor_fetch  (volatile signed int *, signed int, int);
-signed int __atomic_xor_fetch_4(volatile signed int *, signed int, int);
 unsigned int __atomic_xor_fetch  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_xor_fetch_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_xor_fetch  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_xor_fetch  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_xor_fetch  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_xor_fetch_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_xor_fetch  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_xor_fetch_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_xor_fetch   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_xor_fetch_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_xor_fetch   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_xor_fetch_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_or_fetch  (volatile char *, char, int);
-char __atomic_or_fetch_1(volatile char *, char, int);
 signed char __atomic_or_fetch  (volatile signed char *, signed char, int);
-signed char __atomic_or_fetch_1(volatile signed char *, signed char, int);
 unsigned char __atomic_or_fetch  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_or_fetch_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_or_fetch  (volatile signed short *, signed short, int);
-signed short __atomic_or_fetch_2(volatile signed short *, signed short, int);
 unsigned short __atomic_or_fetch  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_or_fetch_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_or_fetch  (volatile signed int *, signed int, int);
-signed int __atomic_or_fetch_4(volatile signed int *, signed int, int);
 unsigned int __atomic_or_fetch  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_or_fetch_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_or_fetch  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_or_fetch  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_or_fetch  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_or_fetch_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_or_fetch  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_or_fetch_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_or_fetch   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_or_fetch_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_or_fetch   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_or_fetch_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_fetch_add  (volatile char *, char, int);
-char __atomic_fetch_add_1(volatile char *, char, int);
 signed char __atomic_fetch_add  (volatile signed char *, signed char, int);
-signed char __atomic_fetch_add_1(volatile signed char *, signed char, int);
 unsigned char __atomic_fetch_add  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_fetch_add_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_fetch_add  (volatile signed short *, signed short, int);
-signed short __atomic_fetch_add_2(volatile signed short *, signed short, int);
 unsigned short __atomic_fetch_add  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_fetch_add_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_fetch_add  (volatile signed int *, signed int, int);
-signed int __atomic_fetch_add_4(volatile signed int *, signed int, int);
 unsigned int __atomic_fetch_add  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_fetch_add_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_fetch_add  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_fetch_add  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_fetch_add  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_fetch_add_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_fetch_add  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_fetch_add_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_fetch_add   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_fetch_add_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_fetch_add   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_fetch_add_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_fetch_sub  (volatile char *, char, int);
-char __atomic_fetch_sub_1(volatile char *, char, int);
 signed char __atomic_fetch_sub  (volatile signed char *, signed char, int);
-signed char __atomic_fetch_sub_1(volatile signed char *, signed char, int);
 unsigned char __atomic_fetch_sub  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_fetch_sub_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_fetch_sub  (volatile signed short *, signed short, int);
-signed short __atomic_fetch_sub_2(volatile signed short *, signed short, int);
 unsigned short __atomic_fetch_sub  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_fetch_sub_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_fetch_sub  (volatile signed int *, signed int, int);
-signed int __atomic_fetch_sub_4(volatile signed int *, signed int, int);
 unsigned int __atomic_fetch_sub  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_fetch_sub_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_fetch_sub  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_fetch_sub  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_fetch_sub  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_fetch_sub_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_fetch_sub  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_fetch_sub_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_fetch_sub   (volatile signed  __int128 *, signed __int128, int);
-signed __int128 __atomic_fetch_sub_16(volatile signed  __int128 *, signed __int128, int);
 unsigned __int128 __atomic_fetch_sub   (volatile unsigned  __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_fetch_sub_16(volatile unsigned  __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_fetch_and  (volatile char *, char, int);
-char __atomic_fetch_and_1(volatile char *, char, int);
 signed char __atomic_fetch_and  (volatile signed char *, signed char, int);
-signed char __atomic_fetch_and_1(volatile signed char *, signed char, int);
 unsigned char __atomic_fetch_and  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_fetch_and_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_fetch_and  (volatile signed short *, signed short, int);
-signed short __atomic_fetch_and_2(volatile signed short *, signed short, int);
 unsigned short __atomic_fetch_and  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_fetch_and_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_fetch_and  (volatile signed int *, signed int, int);
-signed int __atomic_fetch_and_4(volatile signed int *, signed int, int);
 unsigned int __atomic_fetch_and  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_fetch_and_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_fetch_and  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_fetch_and  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_fetch_and  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_fetch_and_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_fetch_and  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_fetch_and_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_fetch_and   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_fetch_and_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_fetch_and   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_fetch_and_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_fetch_nand  (volatile char *, char, int);
-char __atomic_fetch_nand_1(volatile char *, char, int);
 signed char __atomic_fetch_nand  (volatile signed char *, signed char, int);
-signed char __atomic_fetch_nand_1(volatile signed char *, signed char, int);
 unsigned char __atomic_fetch_nand  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_fetch_nand_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_fetch_nand  (volatile signed short *, signed short, int);
-signed short __atomic_fetch_nand_2(volatile signed short *, signed short, int);
 unsigned short __atomic_fetch_nand  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_fetch_nand_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_fetch_nand  (volatile signed int *, signed int, int);
-signed int __atomic_fetch_nand_4(volatile signed int *, signed int, int);
 unsigned int __atomic_fetch_nand  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_fetch_nand_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_fetch_nand  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_fetch_nand  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_fetch_nand  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_fetch_nand_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_fetch_nand  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_fetch_nand_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_fetch_nand   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_fetch_nand_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_fetch_nand   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_fetch_nand_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_fetch_xor  (volatile char *, char, int);
-char __atomic_fetch_xor_1(volatile char *, char, int);
 signed char __atomic_fetch_xor  (volatile signed char *, signed char, int);
-signed char __atomic_fetch_xor_1(volatile signed char *, signed char, int);
 unsigned char __atomic_fetch_xor  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_fetch_xor_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_fetch_xor  (volatile signed short *, signed short, int);
-signed short __atomic_fetch_xor_2(volatile signed short *, signed short, int);
 unsigned short __atomic_fetch_xor  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_fetch_xor_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_fetch_xor  (volatile signed int *, signed int, int);
-signed int __atomic_fetch_xor_4(volatile signed int *, signed int, int);
 unsigned int __atomic_fetch_xor  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_fetch_xor_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_fetch_xor  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_fetch_xor  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_fetch_xor  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_fetch_xor_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_fetch_xor  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_fetch_xor_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_fetch_xor   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_fetch_xor_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_fetch_xor   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_fetch_xor_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
 char __atomic_fetch_or  (volatile char *, char, int);
-char __atomic_fetch_or_1(volatile char *, char, int);
 signed char __atomic_fetch_or  (volatile signed char *, signed char, int);
-signed char __atomic_fetch_or_1(volatile signed char *, signed char, int);
 unsigned char __atomic_fetch_or  (volatile unsigned char *, unsigned char, int);
-unsigned char __atomic_fetch_or_1(volatile unsigned char *, unsigned char, int);
 signed short __atomic_fetch_or  (volatile signed short *, signed short, int);
-signed short __atomic_fetch_or_2(volatile signed short *, signed short, int);
 unsigned short __atomic_fetch_or  (volatile unsigned short *, unsigned short, int);
-unsigned short __atomic_fetch_or_2(volatile unsigned short *, unsigned short, int);
 signed int __atomic_fetch_or  (volatile signed int *, signed int, int);
-signed int __atomic_fetch_or_4(volatile signed int *, signed int, int);
 unsigned int __atomic_fetch_or  (volatile unsigned int *, unsigned int, int);
-unsigned int __atomic_fetch_or_4(volatile unsigned int *, unsigned int, int);
+signed long int __atomic_fetch_or  (volatile signed long int *, signed long int, int);
+unsigned long int __atomic_fetch_or  (volatile unsigned long int *, unsigned long int, int);
 signed long long int __atomic_fetch_or  (volatile signed long long int *, signed long long int, int);
-signed long long int __atomic_fetch_or_8(volatile signed long long int *, signed long long int, int);
 unsigned long long int __atomic_fetch_or  (volatile unsigned long long int *, unsigned long long int, int);
-unsigned long long int __atomic_fetch_or_8(volatile unsigned long long int *, unsigned long long int, int);
 #if defined(__SIZEOF_INT128__)
 signed __int128 __atomic_fetch_or   (volatile signed __int128 *, signed __int128, int);
-signed __int128 __atomic_fetch_or_16(volatile signed __int128 *, signed __int128, int);
 unsigned __int128 __atomic_fetch_or   (volatile unsigned __int128 *, unsigned __int128, int);
-unsigned __int128 __atomic_fetch_or_16(volatile unsigned __int128 *, unsigned __int128, int);
 #endif
 
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/Makefile.am	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -33,5 +33,5 @@
 # The built sources must not depend on the installed headers
 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
-AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@
+AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
 AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
 CFACC = @CFACC@
Index: libcfa/src/Makefile.in
===================================================================
--- libcfa/src/Makefile.in	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/Makefile.in	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -416,6 +416,5 @@
 LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
-	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) \
-	$(AM_CFLAGS) $(CFLAGS)
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)
 
 AM_V_CFA = $(am__v_CFA_@AM_V@)
@@ -445,5 +444,5 @@
 # The built sources must not depend on the installed headers
 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
-AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@
+AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
 AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
 @BUILDLIB_FALSE@headers_nosrc = 
Index: libcfa/src/assert.cfa
===================================================================
--- libcfa/src/assert.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/assert.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Jul 20 15:10:26 2017
-// Update Count     : 2
+// Last Modified On : Thu Nov 21 17:09:26 2019
+// Update Count     : 5
 //
 
@@ -17,4 +17,5 @@
 #include <stdarg.h>								// varargs
 #include <stdio.h>								// fprintf
+#include <unistd.h>								// STDERR_FILENO
 #include "bits/debug.hfa"
 
@@ -26,5 +27,5 @@
 	// called by macro assert in assert.h
 	void __assert_fail( const char *assertion, const char *file, unsigned int line, const char *function ) {
-		__cfaabi_dbg_bits_print_safe( CFA_ASSERT_FMT ".\n", assertion, __progname, function, line, file );
+		__cfaabi_bits_print_safe( STDERR_FILENO, CFA_ASSERT_FMT ".\n", assertion, __progname, function, line, file );
 		abort();
 	}
@@ -32,14 +33,14 @@
 	// called by macro assertf
 	void __assert_fail_f( const char *assertion, const char *file, unsigned int line, const char *function, const char *fmt, ... ) {
-		__cfaabi_dbg_bits_acquire();
-		__cfaabi_dbg_bits_print_nolock( CFA_ASSERT_FMT ": ", assertion, __progname, function, line, file );
+		__cfaabi_bits_acquire();
+		__cfaabi_bits_print_nolock( STDERR_FILENO, CFA_ASSERT_FMT ": ", assertion, __progname, function, line, file );
 
 		va_list args;
 		va_start( args, fmt );
-		__cfaabi_dbg_bits_print_vararg( fmt, args );
+		__cfaabi_bits_print_vararg( STDERR_FILENO, fmt, args );
 		va_end( args );
 
-		__cfaabi_dbg_bits_print_nolock( "\n" );
-		__cfaabi_dbg_bits_release();
+		__cfaabi_bits_print_nolock( STDERR_FILENO, "\n" );
+		__cfaabi_bits_release();
 		abort();
 	}
Index: libcfa/src/bits/align.hfa
===================================================================
--- libcfa/src/bits/align.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/bits/align.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jul 21 23:05:35 2017
-// Update Count     : 2
+// Last Modified On : Sat Nov 16 18:58:22 2019
+// Update Count     : 3
 //
 // This  library is free  software; you  can redistribute  it and/or  modify it
@@ -33,5 +33,7 @@
 
 // Minimum size used to align memory boundaries for memory allocations.
-#define libAlign() (sizeof(double))
+//#define libAlign() (sizeof(double))
+// gcc-7 uses xmms instructions, which require 16 byte alignment.
+#define libAlign() (16)
 
 // Check for power of 2
Index: libcfa/src/bits/debug.cfa
===================================================================
--- libcfa/src/bits/debug.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/bits/debug.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Thu Mar 30 12:30:01 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Jul 14 22:17:35 2019
-// Update Count     : 4
+// Last Modified On : Thu Nov 21 17:16:30 2019
+// Update Count     : 10
 //
 
@@ -28,5 +28,5 @@
 extern "C" {
 
-	void __cfaabi_dbg_bits_write( const char *in_buffer, int len ) {
+	void __cfaabi_bits_write( int fd, const char *in_buffer, int len ) {
 		// ensure all data is written
 		for ( int count = 0, retcode; count < len; count += retcode ) {
@@ -34,5 +34,5 @@
 
 			for ( ;; ) {
-				retcode = write( STDERR_FILENO, in_buffer, len - count );
+				retcode = write( fd, in_buffer, len - count );
 
 				// not a timer interrupt ?
@@ -44,21 +44,21 @@
 	}
 
-	void __cfaabi_dbg_bits_acquire() __attribute__((__weak__)) {}
-	void __cfaabi_dbg_bits_release() __attribute__((__weak__)) {}
+	void __cfaabi_bits_acquire() __attribute__((__weak__)) {}
+	void __cfaabi_bits_release() __attribute__((__weak__)) {}
 
-	void __cfaabi_dbg_bits_print_safe  ( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) )) {
+	void __cfaabi_bits_print_safe  ( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) )) {
 		va_list args;
 
 		va_start( args, fmt );
-		__cfaabi_dbg_bits_acquire();
+		__cfaabi_bits_acquire();
 
 		int len = vsnprintf( buffer, buffer_size, fmt, args );
-		__cfaabi_dbg_bits_write( buffer, len );
+		__cfaabi_bits_write( fd, buffer, len );
 
-		__cfaabi_dbg_bits_release();
+		__cfaabi_bits_release();
 		va_end( args );
 	}
 
-	void __cfaabi_dbg_bits_print_nolock( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) )) {
+	void __cfaabi_bits_print_nolock( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) )) {
 		va_list args;
 
@@ -66,15 +66,15 @@
 
 		int len = vsnprintf( buffer, buffer_size, fmt, args );
-		__cfaabi_dbg_bits_write( buffer, len );
+		__cfaabi_bits_write( fd, buffer, len );
 
 		va_end( args );
 	}
 
-	void __cfaabi_dbg_bits_print_vararg( const char fmt[], va_list args ) {
+	void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list args ) {
 		int len = vsnprintf( buffer, buffer_size, fmt, args );
-		__cfaabi_dbg_bits_write( buffer, len );
+		__cfaabi_bits_write( fd, buffer, len );
 	}
 
-	void __cfaabi_dbg_bits_print_buffer( char in_buffer[], int in_buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 3, 4) )) {
+	void __cfaabi_bits_print_buffer( int fd, char in_buffer[], int in_buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) )) {
 		va_list args;
 
@@ -82,5 +82,5 @@
 
 		int len = vsnprintf( in_buffer, in_buffer_size, fmt, args );
-		__cfaabi_dbg_bits_write( in_buffer, len );
+		__cfaabi_bits_write( fd, in_buffer, len );
 
 		va_end( args );
Index: libcfa/src/bits/debug.hfa
===================================================================
--- libcfa/src/bits/debug.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/bits/debug.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Feb  8 12:35:19 2018
-// Update Count     : 2
+// Last Modified On : Thu Nov 21 17:06:58 2019
+// Update Count     : 8
 //
 
@@ -38,11 +38,11 @@
 	#include <stdio.h>
 
-      extern void __cfaabi_dbg_bits_write( const char *buffer, int len );
-      extern void __cfaabi_dbg_bits_acquire();
-      extern void __cfaabi_dbg_bits_release();
-      extern void __cfaabi_dbg_bits_print_safe  ( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
-      extern void __cfaabi_dbg_bits_print_nolock( const char fmt[], ... ) __attribute__(( format(printf, 1, 2) ));
-      extern void __cfaabi_dbg_bits_print_vararg( const char fmt[], va_list arg );
-      extern void __cfaabi_dbg_bits_print_buffer( char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 3, 4) ));
+	extern void __cfaabi_bits_write( int fd, const char *buffer, int len );
+	extern void __cfaabi_bits_acquire();
+	extern void __cfaabi_bits_release();
+	extern void __cfaabi_bits_print_safe  ( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) ));
+	extern void __cfaabi_bits_print_nolock( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) ));
+	extern void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list arg );
+	extern void __cfaabi_bits_print_buffer( int fd, char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) ));
 #ifdef __cforall
 }
@@ -50,12 +50,12 @@
 
 #ifdef __CFA_DEBUG_PRINT__
-	#define __cfaabi_dbg_write( buffer, len )         __cfaabi_dbg_bits_write( buffer, len )
-	#define __cfaabi_dbg_acquire()                    __cfaabi_dbg_bits_acquire()
-	#define __cfaabi_dbg_release()                    __cfaabi_dbg_bits_release()
-	#define __cfaabi_dbg_print_safe(...)              __cfaabi_dbg_bits_print_safe   (__VA_ARGS__)
-	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_dbg_bits_print_nolock (__VA_ARGS__)
-	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_dbg_bits_print_buffer (__VA_ARGS__)
-	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
-	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_bits_write( __dbg_text, __dbg_len );
+	#define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
+	#define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
+	#define __cfaabi_dbg_release()                    __cfaabi_bits_release()
+	#define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   (__VA_ARGS__)
+	#define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock (__VA_ARGS__)
+	#define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer (__VA_ARGS__)
+	#define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len );
+	#define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( __dbg_text, __dbg_len );
 #else
 	#define __cfaabi_dbg_write(...)               ((void)0)
Index: libcfa/src/bits/defs.hfa
===================================================================
--- libcfa/src/bits/defs.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/bits/defs.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -47,2 +47,8 @@
 #define OPTIONAL_THREAD __attribute__((weak))
 #endif
+
+static inline long long rdtscl(void) {
+    unsigned int lo, hi;
+    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
+}
Index: libcfa/src/concurrency/alarm.cfa
===================================================================
--- libcfa/src/concurrency/alarm.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/alarm.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Fri Jun 2 11:31:25 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri May 25 06:25:47 2018
-// Update Count     : 67
+// Last Modified On : Tue Dec  3 22:47:24 2019
+// Update Count     : 68
 //
 
@@ -40,5 +40,5 @@
 void __kernel_set_timer( Duration alarm ) {
 	verifyf(alarm >= 1`us || alarm == 0, "Setting timer to < 1us (%jins)", alarm.tv);
-	setitimer( ITIMER_REAL, &(itimerval){ alarm }, NULL );
+	setitimer( ITIMER_REAL, &(itimerval){ alarm }, 0p );
 }
 
@@ -113,5 +113,5 @@
 			this->tail = &this->head;
 		}
-		head->next = NULL;
+		head->next = 0p;
 	}
 	verify( validate( this ) );
@@ -127,5 +127,5 @@
 		this->tail = it;
 	}
-	n->next = NULL;
+	n->next = 0p;
 
 	verify( validate( this ) );
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Mar 30 17:20:57 2018
-// Update Count     : 9
+// Last Modified On : Thu Dec  5 14:37:29 2019
+// Update Count     : 15
 //
 
@@ -90,11 +90,11 @@
 
 void ?{}( coroutine_desc & this, const char * name, void * storage, size_t storageSize ) with( this ) {
-	(this.context){NULL, NULL};
+	(this.context){0p, 0p};
 	(this.stack){storage, storageSize};
 	this.name = name;
 	state = Start;
-	starter = NULL;
-	last = NULL;
-	cancellation = NULL;
+	starter = 0p;
+	last = 0p;
+	cancellation = 0p;
 }
 
@@ -131,5 +131,5 @@
 
 [void *, size_t] __stack_alloc( size_t storageSize ) {
-	static const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
+	const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
 	assert(__page_size != 0l);
 	size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
@@ -157,5 +157,5 @@
 
 void __stack_prepare( __stack_info_t * this, size_t create_size ) {
-	static const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
+	const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
 	bool userStack;
 	void * storage;
Index: libcfa/src/concurrency/coroutine.hfa
===================================================================
--- libcfa/src/concurrency/coroutine.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/coroutine.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jun 21 17:49:39 2019
-// Update Count     : 9
+// Last Modified On : Tue Dec  3 22:47:58 2019
+// Update Count     : 10
 //
 
@@ -38,9 +38,9 @@
 void ^?{}( coroutine_desc & this );
 
-static inline void ?{}( coroutine_desc & this)                                       { this{ "Anonymous Coroutine", NULL, 0 }; }
-static inline void ?{}( coroutine_desc & this, size_t stackSize)                     { this{ "Anonymous Coroutine", NULL, stackSize }; }
+static inline void ?{}( coroutine_desc & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
+static inline void ?{}( coroutine_desc & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
 static inline void ?{}( coroutine_desc & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
-static inline void ?{}( coroutine_desc & this, const char * name)                    { this{ name, NULL, 0 }; }
-static inline void ?{}( coroutine_desc & this, const char * name, size_t stackSize ) { this{ name, NULL, stackSize }; }
+static inline void ?{}( coroutine_desc & this, const char * name)                    { this{ name, 0p, 0 }; }
+static inline void ?{}( coroutine_desc & this, const char * name, size_t stackSize ) { this{ name, 0p, stackSize }; }
 
 //-----------------------------------------------------------------------------
@@ -89,5 +89,5 @@
 	src->state = Active;
 
-	if( unlikely(src->cancellation != NULL) ) {
+	if( unlikely(src->cancellation != 0p) ) {
 		_CtxCoroutine_Unwind(src->cancellation, src);
 	}
@@ -128,5 +128,5 @@
 	coroutine_desc * dst = get_coroutine(cor);
 
-	if( unlikely(dst->context.SP == NULL) ) {
+	if( unlikely(dst->context.SP == 0p) ) {
 		__stack_prepare(&dst->stack, 65000);
 		CtxStart(&cor, CtxInvokeCoroutine);
Index: libcfa/src/concurrency/invoke.h
===================================================================
--- libcfa/src/concurrency/invoke.h	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/invoke.h	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jun 22 18:19:13 2019
-// Update Count     : 40
+// Last Modified On : Thu Dec  5 16:26:03 2019
+// Update Count     : 44
 //
 
@@ -46,5 +46,5 @@
 	#ifdef __cforall
 	extern "Cforall" {
-		extern thread_local struct KernelThreadData {
+		extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
 			struct thread_desc    * volatile this_thread;
 			struct processor      * volatile this_processor;
@@ -55,4 +55,6 @@
 				volatile bool in_progress;
 			} preemption_state;
+
+			uint32_t rand_seed;
 		} kernelTLS __attribute__ ((tls_model ( "initial-exec" )));
 	}
@@ -205,5 +207,5 @@
 
 		static inline void ?{}(__monitor_group_t & this) {
-			(this.data){NULL};
+			(this.data){0p};
 			(this.size){0};
 			(this.func){NULL};
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/kernel.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Jun 20 17:21:23 2019
-// Update Count     : 25
+// Last Modified On : Thu Dec  5 16:25:52 2019
+// Update Count     : 52
 //
 
@@ -26,4 +26,6 @@
 #include <signal.h>
 #include <unistd.h>
+#include <limits.h>										// PTHREAD_STACK_MIN
+#include <sys/mman.h>									// mprotect
 }
 
@@ -40,5 +42,5 @@
 //-----------------------------------------------------------------------------
 // Some assembly required
-#if   defined( __i386 )
+#if defined( __i386 )
 	#define CtxGet( ctx )        \
 		__asm__ volatile (     \
@@ -123,5 +125,5 @@
 
 extern "C" {
-struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
+	struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
 }
 
@@ -131,7 +133,8 @@
 // Global state
 thread_local struct KernelThreadData kernelTLS __attribute__ ((tls_model ( "initial-exec" ))) = {
+	NULL,												// cannot use 0p
 	NULL,
-	NULL,
-	{ 1, false, false }
+	{ 1, false, false },
+	6u //this should be seeded better but due to a bug calling rdtsc doesn't work
 };
 
@@ -139,8 +142,8 @@
 // Struct to steal stack
 struct current_stack_info_t {
-	__stack_t * storage;		// pointer to stack object
-	void *base;				// base of stack
-	void *limit;			// stack grows towards stack limit
-	void *context;			// address of cfa_context_t
+	__stack_t * storage;								// pointer to stack object
+	void * base;										// base of stack
+	void * limit;										// stack grows towards stack limit
+	void * context;										// address of cfa_context_t
 };
 
@@ -171,7 +174,7 @@
 	name = "Main Thread";
 	state = Start;
-	starter = NULL;
-	last = NULL;
-	cancellation = NULL;
+	starter = 0p;
+	last = 0p;
+	cancellation = 0p;
 }
 
@@ -184,8 +187,8 @@
 	self_mon.recursion = 1;
 	self_mon_p = &self_mon;
-	next = NULL;
-
-	node.next = NULL;
-	node.prev = NULL;
+	next = 0p;
+
+	node.next = 0p;
+	node.prev = 0p;
 	doregister(curr_cluster, this);
 
@@ -211,5 +214,5 @@
 	terminated{ 0 };
 	do_terminate = false;
-	preemption_alarm = NULL;
+	preemption_alarm = 0p;
 	pending_preemption = false;
 	runner.proc = &this;
@@ -231,5 +234,6 @@
 	}
 
-	pthread_join( kernel_thread, NULL );
+	pthread_join( kernel_thread, 0p );
+	free( this.stack );
 }
 
@@ -260,4 +264,8 @@
 //Main of the processor contexts
 void main(processorCtx_t & runner) {
+	// Because of a bug, we couldn't initialized the seed on construction
+	// Do it here
+	kernelTLS.rand_seed ^= rdtscl();
+
 	processor * this = runner.proc;
 	verify(this);
@@ -273,11 +281,9 @@
 		__cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
 
-		thread_desc * readyThread = NULL;
-		for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ )
-		{
+		thread_desc * readyThread = 0p;
+		for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
 			readyThread = nextThread( this->cltr );
 
-			if(readyThread)
-			{
+			if(readyThread) {
 				verify( ! kernelTLS.preemption_state.enabled );
 
@@ -290,7 +296,5 @@
 
 				spin_count = 0;
-			}
-			else
-			{
+			} else {
 				// spin(this, &spin_count);
 				halt(this);
@@ -405,5 +409,5 @@
 	processor * proc = (processor *) arg;
 	kernelTLS.this_processor = proc;
-	kernelTLS.this_thread    = NULL;
+	kernelTLS.this_thread    = 0p;
 	kernelTLS.preemption_state.[enabled, disable_count] = [false, 1];
 	// SKULLDUGGERY: We want to create a context for the processor coroutine
@@ -418,5 +422,5 @@
 
 	//Set global state
-	kernelTLS.this_thread    = NULL;
+	kernelTLS.this_thread = 0p;
 
 	//We now have a proper context from which to schedule threads
@@ -434,5 +438,39 @@
 	__cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
 
-	return NULL;
+	return 0p;
+}
+
+static void Abort( int ret, const char * func ) {
+	if ( ret ) {										// pthread routines return errno values
+		abort( "%s : internal error, error(%d) %s.", func, ret, strerror( ret ) );
+	} // if
+} // Abort
+
+void * create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
+	pthread_attr_t attr;
+
+	Abort( pthread_attr_init( &attr ), "pthread_attr_init" ); // initialize attribute
+
+	size_t stacksize;
+	// default stack size, normally defined by shell limit
+	Abort( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
+	assert( stacksize >= PTHREAD_STACK_MIN );
+
+	void * stack;
+	__cfaabi_dbg_debug_do(
+		stack = memalign( __page_size, stacksize + __page_size );
+		// pthread has no mechanism to create the guard page in user supplied stack.
+		if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
+			abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
+		} // if
+	);
+	__cfaabi_dbg_no_debug_do(
+		stack = malloc( stacksize );
+	);
+
+	Abort( pthread_attr_setstack( &attr, stack, stacksize ), "pthread_attr_setstack" ); 
+
+	Abort( pthread_create( pthread, &attr, start, arg ), "pthread_create" );
+	return stack;
 }
 
@@ -440,5 +478,5 @@
 	__cfaabi_dbg_print_safe("Kernel : Starting core %p\n", this);
 
-	pthread_create( &this->kernel_thread, NULL, CtxInvokeProcessor, (void*)this );
+	this->stack = create_pthread( &this->kernel_thread, CtxInvokeProcessor, (void *)this );
 
 	__cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
@@ -497,5 +535,5 @@
 	verify( ! kernelTLS.preemption_state.enabled );
 
-	verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
+	verifyf( thrd->next == 0p, "Expected null got %p", thrd->next );
 
 	with( *thrd->curr_cluster ) {
@@ -676,5 +714,5 @@
 	void ?{}(processorCtx_t & this, processor * proc) {
 		(this.__cor){ "Processor" };
-		this.__cor.starter = NULL;
+		this.__cor.starter = 0p;
 		this.proc = proc;
 	}
@@ -685,5 +723,5 @@
 		terminated{ 0 };
 		do_terminate = false;
-		preemption_alarm = NULL;
+		preemption_alarm = 0p;
 		pending_preemption = false;
 		kernel_thread = pthread_self();
@@ -819,17 +857,17 @@
 	if(thrd) {
 		int len = snprintf( abort_text, abort_text_size, "Error occurred while executing thread %.256s (%p)", thrd->self_cor.name, thrd );
-		__cfaabi_dbg_bits_write( abort_text, len );
+		__cfaabi_bits_write( STDERR_FILENO, abort_text, len );
 
 		if ( &thrd->self_cor != thrd->curr_cor ) {
 			len = snprintf( abort_text, abort_text_size, " in coroutine %.256s (%p).\n", thrd->curr_cor->name, thrd->curr_cor );
-			__cfaabi_dbg_bits_write( abort_text, len );
+			__cfaabi_bits_write( STDERR_FILENO, abort_text, len );
 		}
 		else {
-			__cfaabi_dbg_bits_write( ".\n", 2 );
+			__cfaabi_bits_write( STDERR_FILENO, ".\n", 2 );
 		}
 	}
 	else {
 		int len = snprintf( abort_text, abort_text_size, "Error occurred outside of any thread.\n" );
-		__cfaabi_dbg_bits_write( abort_text, len );
+		__cfaabi_bits_write( STDERR_FILENO, abort_text, len );
 	}
 }
@@ -842,9 +880,9 @@
 
 extern "C" {
-	void __cfaabi_dbg_bits_acquire() {
+	void __cfaabi_bits_acquire() {
 		lock( kernel_debug_lock __cfaabi_dbg_ctx2 );
 	}
 
-	void __cfaabi_dbg_bits_release() {
+	void __cfaabi_bits_release() {
 		unlock( kernel_debug_lock );
 	}
@@ -879,5 +917,5 @@
 
 void V(semaphore & this) with( this ) {
-	thread_desc * thrd = NULL;
+	thread_desc * thrd = 0p;
 	lock( lock __cfaabi_dbg_ctx2 );
 	count += 1;
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/kernel.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jun 22 11:39:17 2019
-// Update Count     : 16
+// Last Modified On : Wed Dec  4 07:54:51 2019
+// Update Count     : 18
 //
 
@@ -20,4 +20,5 @@
 #include "invoke.h"
 #include "time_t.hfa"
+#include "coroutine.hfa"
 
 extern "C" {
@@ -88,6 +89,6 @@
 static inline void ?{}(FinishAction & this) {
 	this.action_code = No_Action;
-	this.thrd = NULL;
-	this.lock = NULL;
+	this.thrd = 0p;
+	this.lock = 0p;
 }
 static inline void ^?{}(FinishAction &) {}
@@ -134,4 +135,7 @@
 	semaphore terminated;
 
+	// pthread Stack
+	void * stack;
+
 	// Link lists fields
 	struct __dbg_node_proc {
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Feb 13 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Mar 29 14:06:40 2018
-// Update Count     : 3
+// Last Modified On : Sat Nov 30 19:25:02 2019
+// Update Count     : 8
 //
 
@@ -57,4 +57,6 @@
 void main(processorCtx_t *);
 
+void * create_pthread( pthread_t *, void * (*)(void *), void * );
+
 static inline void wake_fast(processor * this) {
 	__cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
@@ -101,4 +103,11 @@
 #define KERNEL_STORAGE(T,X) static char storage_##X[sizeof(T)]
 
+static inline uint32_t tls_rand() {
+	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
+	kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
+	kernelTLS.rand_seed ^= kernelTLS.rand_seed << 7;
+	return kernelTLS.rand_seed;
+}
+
 
 void doregister( struct cluster & cltr );
Index: libcfa/src/concurrency/monitor.cfa
===================================================================
--- libcfa/src/concurrency/monitor.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/monitor.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Thd Feb 23 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Mar 30 14:30:26 2018
-// Update Count     : 9
+// Last Modified On : Wed Dec  4 07:55:14 2019
+// Update Count     : 10
 //
 
@@ -363,5 +363,5 @@
 	this.waiting_thread = waiting_thread;
 	this.count = count;
-	this.next = NULL;
+	this.next = 0p;
 	this.user_info = user_info;
 }
@@ -369,7 +369,7 @@
 void ?{}(__condition_criterion_t & this ) with( this ) {
 	ready  = false;
-	target = NULL;
-	owner  = NULL;
-	next   = NULL;
+	target = 0p;
+	owner  = 0p;
+	next   = 0p;
 }
 
@@ -378,5 +378,5 @@
 	this.target = target;
 	this.owner  = &owner;
-	this.next   = NULL;
+	this.next   = 0p;
 }
 
@@ -387,5 +387,5 @@
 
 	// Check that everything is as expected
-	assertf( this.monitors != NULL, "Waiting with no monitors (%p)", this.monitors );
+	assertf( this.monitors != 0p, "Waiting with no monitors (%p)", this.monitors );
 	verifyf( this.monitor_count != 0, "Waiting with 0 monitors (%"PRIiFAST16")", this.monitor_count );
 	verifyf( this.monitor_count < 32u, "Excessive monitor count (%"PRIiFAST16")", this.monitor_count );
@@ -449,5 +449,5 @@
 
 	// Lock all monitors
-	lock_all( this.monitors, NULL, count );
+	lock_all( this.monitors, 0p, count );
 
 	//Pop the head of the waiting queue
@@ -471,5 +471,5 @@
 
 	//Check that everything is as expected
-	verifyf( this.monitors != NULL, "Waiting with no monitors (%p)", this.monitors );
+	verifyf( this.monitors != 0p, "Waiting with no monitors (%p)", this.monitors );
 	verifyf( this.monitor_count != 0, "Waiting with 0 monitors (%"PRIiFAST16")", this.monitor_count );
 
@@ -674,6 +674,6 @@
 
 static inline void reset_mask( monitor_desc * this ) {
-	this->mask.accepted = NULL;
-	this->mask.data = NULL;
+	this->mask.accepted = 0p;
+	this->mask.data = 0p;
 	this->mask.size = 0;
 }
@@ -816,6 +816,6 @@
 	}
 
-	__cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread : NULL );
-	return ready2run ? node->waiting_thread : NULL;
+	__cfaabi_dbg_print_safe( "Kernel :  Runing %i (%p)\n", ready2run, ready2run ? node->waiting_thread : 0p );
+	return ready2run ? node->waiting_thread : 0p;
 }
 
@@ -824,5 +824,5 @@
 	if( !this.monitors ) {
 		// __cfaabi_dbg_print_safe( "Branding\n" );
-		assertf( thrd->monitors.data != NULL, "No current monitor to brand condition %p", thrd->monitors.data );
+		assertf( thrd->monitors.data != 0p, "No current monitor to brand condition %p", thrd->monitors.data );
 		this.monitor_count = thrd->monitors.size;
 
Index: libcfa/src/concurrency/monitor.hfa
===================================================================
--- libcfa/src/concurrency/monitor.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/monitor.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Thd Feb 23 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Oct  7 18:06:45 2017
-// Update Count     : 10
+// Last Modified On : Wed Dec  4 07:55:32 2019
+// Update Count     : 11
 //
 
@@ -31,11 +31,13 @@
 	entry_queue{};
 	signal_stack{};
-	owner         = NULL;
+	owner         = 0p;
 	recursion     = 0;
-	mask.accepted = NULL;
-	mask.data     = NULL;
+	mask.accepted = 0p;
+	mask.data     = 0p;
 	mask.size     = 0;
-	dtor_node     = NULL;
+	dtor_node     = 0p;
 }
+
+static inline void ^?{}(monitor_desc & ) {}
 
 struct monitor_guard_t {
@@ -120,5 +122,5 @@
 
 static inline void ?{}( condition & this ) {
-	this.monitors = NULL;
+	this.monitors = 0p;
 	this.monitor_count = 0;
 }
Index: libcfa/src/concurrency/mutex.cfa
===================================================================
--- libcfa/src/concurrency/mutex.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/mutex.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -11,7 +11,7 @@
 // Author           : Thierry Delisle
 // Created On       : Fri May 25 01:37:11 2018
-// Last Modified By : Thierry Delisle
-// Last Modified On : Fri May 25 01:37:51 2018
-// Update Count     : 0
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Dec  4 09:16:39 2019
+// Update Count     : 1
 //
 
@@ -73,5 +73,5 @@
 	this.lock{};
 	this.blocked_threads{};
-	this.owner = NULL;
+	this.owner = 0p;
 	this.recursion_count = 0;
 }
@@ -83,5 +83,5 @@
 void lock(recursive_mutex_lock & this) with(this) {
 	lock( lock __cfaabi_dbg_ctx2 );
-	if( owner == NULL ) {
+	if( owner == 0p ) {
 		owner = kernelTLS.this_thread;
 		recursion_count = 1;
@@ -101,5 +101,5 @@
 	bool ret = false;
 	lock( lock __cfaabi_dbg_ctx2 );
-	if( owner == NULL ) {
+	if( owner == 0p ) {
 		owner = kernelTLS.this_thread;
 		recursion_count = 1;
Index: libcfa/src/concurrency/mutex.hfa
===================================================================
--- libcfa/src/concurrency/mutex.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/mutex.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -11,7 +11,7 @@
 // Author           : Thierry Delisle
 // Created On       : Fri May 25 01:24:09 2018
-// Last Modified By : Thierry Delisle
-// Last Modified On : Fri May 25 01:24:12 2018
-// Update Count     : 0
+// Last Modified By : Peter A. Buhr
+// Last Modified On : Wed Dec  4 09:16:53 2019
+// Update Count     : 1
 //
 
@@ -110,5 +110,5 @@
 
 	static inline void ?{}(lock_scope(L) & this) {
-		this.locks = NULL;
+		this.locks = 0p;
 		this.count = 0;
 	}
Index: libcfa/src/concurrency/preemption.cfa
===================================================================
--- libcfa/src/concurrency/preemption.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/preemption.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Mon Jun 5 14:20:42 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Jun  5 17:35:49 2018
-// Update Count     : 37
+// Last Modified On : Thu Dec  5 16:34:05 2019
+// Update Count     : 43
 //
 
@@ -24,4 +24,5 @@
 #include <string.h>
 #include <unistd.h>
+#include <limits.h>										// PTHREAD_STACK_MIN
 }
 
@@ -64,4 +65,5 @@
 event_kernel_t * event_kernel;                        // kernel public handle to even kernel
 static pthread_t alarm_thread;                        // pthread handle to alarm thread
+static void * alarm_stack;							  // pthread stack for alarm thread
 
 static void ?{}(event_kernel_t & this) with( this ) {
@@ -81,14 +83,14 @@
 // Get next expired node
 static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
-	if( !alarms->head ) return NULL;                          // If no alarms return null
-	if( alarms->head->alarm >= currtime ) return NULL;        // If alarms head not expired return null
-	return pop(alarms);                                       // Otherwise just pop head
+	if( !alarms->head ) return 0p;						// If no alarms return null
+	if( alarms->head->alarm >= currtime ) return 0p;	// If alarms head not expired return null
+	return pop(alarms);									// Otherwise just pop head
 }
 
 // Tick one frame of the Discrete Event Simulation for alarms
 static void tick_preemption() {
-	alarm_node_t * node = NULL;                     // Used in the while loop but cannot be declared in the while condition
-	alarm_list_t * alarms = &event_kernel->alarms;  // Local copy for ease of reading
-	Time currtime = __kernel_get_time();			// Check current time once so we everything "happens at once"
+	alarm_node_t * node = 0p;							// Used in the while loop but cannot be declared in the while condition
+	alarm_list_t * alarms = &event_kernel->alarms;		// Local copy for ease of reading
+	Time currtime = __kernel_get_time();				// Check current time once so everything "happens at once"
 
 	//Loop throught every thing expired
@@ -243,5 +245,5 @@
 	sigaddset( &mask, sig );
 
-	if ( pthread_sigmask( SIG_UNBLOCK, &mask, NULL ) == -1 ) {
+	if ( pthread_sigmask( SIG_UNBLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
@@ -254,5 +256,5 @@
 	sigaddset( &mask, sig );
 
-	if ( pthread_sigmask( SIG_BLOCK, &mask, NULL ) == -1 ) {
+	if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
@@ -301,9 +303,9 @@
 
 	// Setup proper signal handlers
-	__cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART );         // CtxSwitch handler
+	__cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // CtxSwitch handler
 
 	signal_block( SIGALRM );
 
-	pthread_create( &alarm_thread, NULL, alarm_loop, NULL );
+	alarm_stack = create_pthread( &alarm_thread, alarm_loop, 0p );
 }
 
@@ -316,5 +318,5 @@
 	sigset_t mask;
 	sigfillset( &mask );
-	sigprocmask( SIG_BLOCK, &mask, NULL );
+	sigprocmask( SIG_BLOCK, &mask, 0p );
 
 	// Notify the alarm thread of the shutdown
@@ -323,5 +325,7 @@
 
 	// Wait for the preemption thread to finish
-	pthread_join( alarm_thread, NULL );
+
+	pthread_join( alarm_thread, 0p );
+	free( alarm_stack );
 
 	// Preemption is now fully stopped
@@ -380,5 +384,5 @@
 	static_assert( sizeof( sigset_t ) == sizeof( cxt->uc_sigmask ), "Expected cxt->uc_sigmask to be of sigset_t" );
 	#endif
-	if ( pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), NULL ) == -1 ) {
+	if ( pthread_sigmask( SIG_SETMASK, (sigset_t *)&(cxt->uc_sigmask), 0p ) == -1 ) {
 		abort( "internal error, sigprocmask" );
 	}
@@ -399,5 +403,5 @@
 	sigset_t mask;
 	sigfillset(&mask);
-	if ( pthread_sigmask( SIG_BLOCK, &mask, NULL ) == -1 ) {
+	if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
 	    abort( "internal error, pthread_sigmask" );
 	}
@@ -420,5 +424,5 @@
 					{__cfaabi_dbg_print_buffer_decl( " KERNEL: Spurious wakeup %d.\n", err );}
 					continue;
-       			case EINVAL :
+				case EINVAL :
 				 	abort( "Timeout was invalid." );
 				default:
@@ -453,5 +457,5 @@
 EXIT:
 	__cfaabi_dbg_print_safe( "Kernel : Preemption thread stopping\n" );
-	return NULL;
+	return 0p;
 }
 
@@ -466,5 +470,5 @@
 	sigset_t oldset;
 	int ret;
-	ret = pthread_sigmask(0, NULL, &oldset);
+	ret = pthread_sigmask(0, 0p, &oldset);
 	if(ret != 0) { abort("ERROR sigprocmask returned %d", ret); }
 
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/thread.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Mar 30 17:19:52 2018
-// Update Count     : 8
+// Last Modified On : Wed Dec  4 09:17:49 2019
+// Update Count     : 9
 //
 
@@ -33,5 +33,5 @@
 // Thread ctors and dtors
 void ?{}(thread_desc & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
-	context{ NULL, NULL };
+	context{ 0p, 0p };
 	self_cor{ name, storage, storageSize };
 	state = Start;
@@ -41,8 +41,8 @@
 	self_mon_p = &self_mon;
 	curr_cluster = &cl;
-	next = NULL;
+	next = 0p;
 
-	node.next = NULL;
-	node.prev = NULL;
+	node.next = 0p;
+	node.prev = 0p;
 	doregister(curr_cluster, this);
 
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/concurrency/thread.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jun 21 17:51:33 2019
-// Update Count     : 5
+// Last Modified On : Wed Dec  4 09:18:14 2019
+// Update Count     : 6
 //
 
@@ -61,13 +61,13 @@
 void ^?{}(thread_desc & this);
 
-static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, NULL, 65000 }; }
-static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, NULL, stackSize }; }
+static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
+static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
 static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
-static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, NULL, 65000 }; }
-static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, NULL, stackSize }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
+static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
 static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
-static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, NULL, 65000 }; }
-static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, NULL, 65000 }; }
-static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, NULL, stackSize }; }
+static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
+static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
+static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
 
 //-----------------------------------------------------------------------------
Index: libcfa/src/fstream.cfa
===================================================================
--- libcfa/src/fstream.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/fstream.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Sep 10 22:19:56 2019
-// Update Count     : 354
+// Last Modified On : Fri Nov 29 06:56:46 2019
+// Update Count     : 355
 //
 
@@ -66,4 +66,8 @@
 } // ?{}
 
+void ^?{}( ofstream & os ) {
+	close( os );
+} // ^?{}
+
 void sepOn( ofstream & os ) { os.sepOnOff = ! getNL( os ); }
 void sepOff( ofstream & os ) { os.sepOnOff = false; }
@@ -195,4 +199,8 @@
 } // ?{}
 
+void ^?{}( ifstream & is ) {
+	close( is );
+} // ^?{}
+
 void nlOn( ifstream & os ) { os.nlOnOff = true; }
 void nlOff( ifstream & os ) { os.nlOnOff = false; }
Index: libcfa/src/fstream.hfa
===================================================================
--- libcfa/src/fstream.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/fstream.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Jul 15 18:10:23 2019
-// Update Count     : 167
+// Last Modified On : Fri Nov 29 06:56:02 2019
+// Update Count     : 168
 //
 
@@ -72,4 +72,5 @@
 void ?{}( ofstream & os, const char * name, const char * mode );
 void ?{}( ofstream & os, const char * name );
+void ^?{}( ofstream & os );
 
 extern ofstream & sout, & stdout, & serr, & stderr;		// aliases
@@ -101,4 +102,5 @@
 void ?{}( ifstream & is, const char * name, const char * mode );
 void ?{}( ifstream & is, const char * name );
+void ^?{}( ifstream & is );
 
 extern ifstream & sin, & stdin;							// aliases
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/heap.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Oct 18 07:42:09 2019
-// Update Count     : 556
+// Last Modified On : Wed Dec  4 21:42:46 2019
+// Update Count     : 646
 //
 
@@ -18,4 +18,5 @@
 #include <stdio.h>										// snprintf, fileno
 #include <errno.h>										// errno
+#include <string.h>										// memset, memcpy
 extern "C" {
 #include <sys/mman.h>									// mmap, munmap
@@ -27,13 +28,12 @@
 #include "bits/locks.hfa"								// __spinlock_t
 #include "startup.hfa"									// STARTUP_PRIORITY_MEMORY
-#include "stdlib.hfa"									// bsearchl
+//#include "stdlib.hfa"									// bsearchl
 #include "malloc.h"
 
+#define MIN(x, y) (y > x ? x : y)
 
 static bool traceHeap = false;
 
-inline bool traceHeap() {
-	return traceHeap;
-} // traceHeap
+inline bool traceHeap() { return traceHeap; }
 
 bool traceHeapOn() {
@@ -49,46 +49,34 @@
 } // traceHeapOff
 
-
-static bool checkFree = false;
-
-inline bool checkFree() {
-	return checkFree;
-} // checkFree
-
-bool checkFreeOn() {
-	bool temp = checkFree;
-	checkFree = true;
+bool traceHeapTerm() { return false; }
+
+
+static bool prtFree = false;
+
+inline bool prtFree() {
+	return prtFree;
+} // prtFree
+
+bool prtFreeOn() {
+	bool temp = prtFree;
+	prtFree = true;
 	return temp;
-} // checkFreeOn
-
-bool checkFreeOff() {
-	bool temp = checkFree;
-	checkFree = false;
+} // prtFreeOn
+
+bool prtFreeOff() {
+	bool temp = prtFree;
+	prtFree = false;
 	return temp;
-} // checkFreeOff
-
-
-// static bool traceHeapTerm = false;
-
-// inline bool traceHeapTerm() {
-// 	return traceHeapTerm;
-// } // traceHeapTerm
-
-// bool traceHeapTermOn() {
-// 	bool temp = traceHeapTerm;
-// 	traceHeapTerm = true;
-// 	return temp;
-// } // traceHeapTermOn
-
-// bool traceHeapTermOff() {
-// 	bool temp = traceHeapTerm;
-// 	traceHeapTerm = false;
-// 	return temp;
-// } // traceHeapTermOff
+} // prtFreeOff
 
 
 enum {
+	// Define the default extension heap amount in units of bytes. When the uC++ supplied heap reaches the brk address,
+	// the brk address is extended by the extension amount.
+	__CFA_DEFAULT_HEAP_EXPANSION__ = (1 * 1024 * 1024),
+
+	// Define the mmap crossover point during allocation. Allocations less than this amount are allocated from buckets;
+	// values greater than or equal to this value are mmap from the operating system.
 	__CFA_DEFAULT_MMAP_START__ = (512 * 1024 + 1),
-	__CFA_DEFAULT_HEAP_EXPANSION__ = (1 * 1024 * 1024),
 };
 
@@ -105,14 +93,14 @@
 static unsigned int allocFree;							// running total of allocations minus frees
 
-static void checkUnfreed() {
+static void prtUnfreed() {
 	if ( allocFree != 0 ) {
 		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-		// char helpText[512];
-		// int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
-		// 					"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
-		// 					(long int)getpid(), allocFree, allocFree ); // always print the UNIX pid
-		// __cfaabi_dbg_bits_write( helpText, len );
-	} // if
-} // checkUnfreed
+		char helpText[512];
+		int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %u(0x%x) bytes of storage allocated but not freed.\n"
+							"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+							(long int)getpid(), allocFree, allocFree ); // always print the UNIX pid
+		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+	} // if
+} // prtUnfreed
 
 extern "C" {
@@ -123,8 +111,9 @@
 	void heapAppStop() {								// called by __cfaabi_appready_startdown
 		fclose( stdin ); fclose( stdout );
-		checkUnfreed();
+		prtUnfreed();
 	} // heapAppStop
 } // extern "C"
 #endif // __CFA_DEBUG__
+
 
 // statically allocated variables => zero filled.
@@ -134,7 +123,4 @@
 static unsigned int maxBucketsUsed;						// maximum number of buckets in use
 
-
-// #comment TD : This defined is significantly different from the __ALIGN__ define from locks.hfa
-#define ALIGN 16
 
 #define SPINLOCK 0
@@ -147,5 +133,5 @@
 // Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
 // Break recusion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
-enum { NoBucketSizes = 93 };							// number of buckets sizes
+enum { NoBucketSizes = 91 };							// number of buckets sizes
 
 struct HeapManager {
@@ -194,9 +180,9 @@
 			} kind; // Kind
 		} header; // Header
-		char pad[ALIGN - sizeof( Header )];
+		char pad[libAlign() - sizeof( Header )];
 		char data[0];									// storage
 	}; // Storage
 
-	static_assert( ALIGN >= sizeof( Storage ), "ALIGN < sizeof( Storage )" );
+	static_assert( libAlign() >= sizeof( Storage ), "libAlign() < sizeof( Storage )" );
 
 	struct FreeHeader {
@@ -228,19 +214,24 @@
 #define __STATISTICS__
 
+// Bucket size must be multiple of 16.
 // Powers of 2 are common allocation sizes, so make powers of 2 generate the minimum required size.
 static const unsigned int bucketSizes[] @= {			// different bucket sizes
-	16, 32, 48, 64,
-	64 + sizeof(HeapManager.Storage), 96, 112, 128, 128 + sizeof(HeapManager.Storage), 160, 192, 224,
-	256 + sizeof(HeapManager.Storage), 320, 384, 448, 512 + sizeof(HeapManager.Storage), 640, 768, 896,
-	1_024 + sizeof(HeapManager.Storage), 1_536, 2_048 + sizeof(HeapManager.Storage), 2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), 6_144,
-	8_192 + sizeof(HeapManager.Storage), 9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360,
-	16_384 + sizeof(HeapManager.Storage), 18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720,
-	32_768 + sizeof(HeapManager.Storage), 36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440,
-	65_536 + sizeof(HeapManager.Storage), 73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880,
-	131_072 + sizeof(HeapManager.Storage), 147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760,
-	262_144 + sizeof(HeapManager.Storage), 294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520,
-	524_288 + sizeof(HeapManager.Storage), 655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), 1_179_648, 1_310_720, 1_441_792,
-	1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), 2_621_440, 3_145_728, 3_670_016,
-	4_194_304 + sizeof(HeapManager.Storage)
+	16, 32, 48, 64 + sizeof(HeapManager.Storage), // 4
+	96, 112, 128 + sizeof(HeapManager.Storage), // 3
+	160, 192, 224, 256 + sizeof(HeapManager.Storage), // 4
+	320, 384, 448, 512 + sizeof(HeapManager.Storage), // 4
+	640, 768, 896, 1_024 + sizeof(HeapManager.Storage), // 4
+	1_536, 2_048 + sizeof(HeapManager.Storage), // 2
+	2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), // 4
+	6_144, 8_192 + sizeof(HeapManager.Storage), // 2
+	9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360, 16_384 + sizeof(HeapManager.Storage), // 8
+	18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720, 32_768 + sizeof(HeapManager.Storage), // 8
+	36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440, 65_536 + sizeof(HeapManager.Storage), // 8
+	73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880, 131_072 + sizeof(HeapManager.Storage), // 8
+	147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760, 262_144 + sizeof(HeapManager.Storage), // 8
+	294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520, 524_288 + sizeof(HeapManager.Storage), // 8
+	655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), // 4
+	1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), // 8
+	2_621_440, 3_145_728, 3_670_016, 4_194_304 + sizeof(HeapManager.Storage), // 4
 };
 
@@ -251,7 +242,6 @@
 static unsigned char lookup[LookupSizes];				// O(1) lookup for small sizes
 #endif // FASTLOOKUP
+
 static int mmapFd = -1;									// fake or actual fd for anonymous file
-
-
 #ifdef __CFA_DEBUG__
 static bool heapBoot = 0;								// detect recursion during boot
@@ -259,76 +249,8 @@
 static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
 
-// #comment TD : The return type of this function should be commented
-static inline bool setMmapStart( size_t value ) {
-  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
-	mmapStart = value;									// set global
-
-	// find the closest bucket size less than or equal to the mmapStart size
-	maxBucketsUsed = bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
-	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
-	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
-	return false;
-} // setMmapStart
-
-
-static void ?{}( HeapManager & manager ) with ( manager ) {
-	pageSize = sysconf( _SC_PAGESIZE );
-
-	for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
-		freeLists[i].blockSize = bucketSizes[i];
-	} // for
-
-	#ifdef FASTLOOKUP
-	unsigned int idx = 0;
-	for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
-		if ( i > bucketSizes[idx] ) idx += 1;
-		lookup[i] = idx;
-	} // for
-	#endif // FASTLOOKUP
-
-	if ( setMmapStart( default_mmap_start() ) ) {
-		abort( "HeapManager : internal error, mmap start initialization failure." );
-	} // if
-	heapExpand = default_heap_expansion();
-
-	char * End = (char *)sbrk( 0 );
-	sbrk( (char *)libCeiling( (long unsigned int)End, libAlign() ) - End ); // move start of heap to multiple of alignment
-	heapBegin = heapEnd = sbrk( 0 );					// get new start point
-} // HeapManager
-
-
-static void ^?{}( HeapManager & ) {
-	#ifdef __STATISTICS__
-	// if ( traceHeapTerm() ) {
-	// 	printStats();
-	// 	if ( checkfree() ) checkFree( heapManager, true );
-	// } // if
-	#endif // __STATISTICS__
-} // ~HeapManager
-
-
-static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
-void memory_startup( void ) {
-	#ifdef __CFA_DEBUG__
-	if ( unlikely( heapBoot ) ) {						// check for recursion during system boot
-		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-		abort( "boot() : internal error, recursively invoked during system boot." );
-	} // if
-	heapBoot = true;
-	#endif // __CFA_DEBUG__
-
-	//assert( heapManager.heapBegin != 0 );
-	//heapManager{};
-	if ( heapManager.heapBegin == 0 ) heapManager{};
-} // memory_startup
-
-static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
-void memory_shutdown( void ) {
-	^heapManager{};
-} // memory_shutdown
-
 
 #ifdef __STATISTICS__
-static unsigned long long int mmap_storage;				// heap statistics counters
+// Heap statistics counters.
+static unsigned long long int mmap_storage;
 static unsigned int mmap_calls;
 static unsigned long long int munmap_storage;
@@ -348,12 +270,11 @@
 static unsigned long long int realloc_storage;
 static unsigned int realloc_calls;
-
-static int statfd;										// statistics file descriptor (changed by malloc_stats_fd)
-
+// Statistics file descriptor (changed by malloc_stats_fd).
+static int statfd = STDERR_FILENO;						// default stderr
 
 // Use "write" because streams may be shutdown when calls are made.
 static void printStats() {
 	char helpText[512];
-	__cfaabi_dbg_bits_print_buffer( helpText, sizeof(helpText),
+	__cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
 									"\nHeap statistics:\n"
 									"  malloc: calls %u / storage %llu\n"
@@ -405,7 +326,9 @@
 						sbrk_calls, sbrk_storage
 		);
-	return write( fileno( stream ), helpText, len );	// -1 => error
+	__cfaabi_bits_write( fileno( stream ), helpText, len );	// ensures all bytes written or exit
+	return len;
 } // printStatsXML
 #endif // __STATISTICS__
+
 
 // #comment TD : Is this the samething as Out-of-Memory?
@@ -418,6 +341,6 @@
 
 static inline void checkAlign( size_t alignment ) {
-	if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) {
-		abort( "Alignment %zu for memory allocation is less than sizeof(void *) and/or not a power of 2.", alignment );
+	if ( alignment < libAlign() || ! libPow2( alignment ) ) {
+		abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
 	} // if
 } // checkAlign
@@ -431,94 +354,5 @@
 
 
-static inline void checkHeader( bool check, const char * name, void * addr ) {
-	if ( unlikely( check ) ) {							// bad address ?
-		abort( "Attempt to %s storage %p with address outside the heap.\n"
-			   "Possible cause is duplicate free on same block or overwriting of memory.",
-			   name, addr );
-	} // if
-} // checkHeader
-
-// #comment TD : function should be commented and/or have a more evocative name
-//               this isn't either a check or a constructor which is what I would expect this function to be
-static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & size, size_t & alignment ) {
-	if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
-		size_t offset = header->kind.fake.offset;
-		alignment = header->kind.fake.alignment & -2;	// remove flag from value
-		#ifdef __CFA_DEBUG__
-		checkAlign( alignment );						// check alignment
-		#endif // __CFA_DEBUG__
-		header = (HeapManager.Storage.Header *)((char *)header - offset);
-	} // if
-} // fakeHeader
-
-// #comment TD : Why is this a define
-#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
-
-static inline bool headers( const char * name, void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
-	header = headerAddr( addr );
-
-	if ( unlikely( heapEnd < addr ) ) {					// mmapped ?
-		fakeHeader( header, size, alignment );
-		size = header->kind.real.blockSize & -3;		// mmap size
-		return true;
-	} // if
-
-	#ifdef __CFA_DEBUG__
-	checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
-	#endif // __CFA_DEBUG__
-
-	// #comment TD : This code looks weird...
-	//               It's called as the first statement of both branches of the last if, with the same parameters in all cases
-
-	// header may be safe to dereference
-	fakeHeader( header, size, alignment );
-	#ifdef __CFA_DEBUG__
-	checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
-	#endif // __CFA_DEBUG__
-
-	freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
-	#ifdef __CFA_DEBUG__
-	if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
-		abort( "Attempt to %s storage %p with corrupted header.\n"
-			   "Possible cause is duplicate free on same block or overwriting of header information.",
-			   name, addr );
-	} // if
-	#endif // __CFA_DEBUG__
-	size = freeElem->blockSize;
-	return false;
-} // headers
-
-
-static inline void * extend( size_t size ) with ( heapManager ) {
-	lock( extlock __cfaabi_dbg_ctx2 );
-	ptrdiff_t rem = heapRemaining - size;
-	if ( rem < 0 ) {
-		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
-
-		size_t increase = libCeiling( size > heapExpand ? size : heapExpand, libAlign() );
-		if ( sbrk( increase ) == (void *)-1 ) {
-			unlock( extlock );
-			errno = ENOMEM;
-			return 0;
-		} // if
-		#ifdef __STATISTICS__
-		sbrk_calls += 1;
-		sbrk_storage += increase;
-		#endif // __STATISTICS__
-		#ifdef __CFA_DEBUG__
-		// Set new memory to garbage so subsequent uninitialized usages might fail.
-		memset( (char *)heapEnd + heapRemaining, '\377', increase );
-		#endif // __CFA_DEBUG__
-		rem = heapRemaining + increase - size;
-	} // if
-
-	HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
-	heapRemaining = rem;
-	heapEnd = (char *)heapEnd + size;
-	unlock( extlock );
-	return block;
-} // extend
-
-
+// thunk problem
 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
 	size_t l = 0, m, h = dim;
@@ -535,4 +369,117 @@
 
 
+static inline bool setMmapStart( size_t value ) {		// true => mmapped, false => sbrk
+  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
+	mmapStart = value;									// set global
+
+	// find the closest bucket size less than or equal to the mmapStart size
+	maxBucketsUsed = Bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
+	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
+	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
+	return false;
+} // setMmapStart
+
+
+static inline void checkHeader( bool check, const char * name, void * addr ) {
+	if ( unlikely( check ) ) {							// bad address ?
+		abort( "Attempt to %s storage %p with address outside the heap.\n"
+			   "Possible cause is duplicate free on same block or overwriting of memory.",
+			   name, addr );
+	} // if
+} // checkHeader
+
+
+static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & alignment ) {
+	if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+		size_t offset = header->kind.fake.offset;
+		alignment = header->kind.fake.alignment & -2;	// remove flag from value
+		#ifdef __CFA_DEBUG__
+		checkAlign( alignment );						// check alignment
+		#endif // __CFA_DEBUG__
+		header = (HeapManager.Storage.Header *)((char *)header - offset);
+	} // if
+} // fakeHeader
+
+
+// <-------+----------------------------------------------------> bsize (bucket size)
+// |header |addr
+//==================================================================================
+//                                | alignment
+// <-----------------<------------+-----------------------------> bsize (bucket size)
+//                   |fake-header | addr
+#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
+
+// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
+// |header |addr
+//==================================================================================
+//                                | alignment
+// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
+//                   |fake-header |addr
+#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
+
+
+static inline bool headers( const char * name __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem, size_t & size, size_t & alignment ) with ( heapManager ) {
+	header = headerAddr( addr );
+
+	if ( unlikely( heapEnd < addr ) ) {					// mmapped ?
+		fakeHeader( header, alignment );
+		size = header->kind.real.blockSize & -3;		// mmap size
+		return true;
+	} // if
+
+	#ifdef __CFA_DEBUG__
+	checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
+	#endif // __CFA_DEBUG__
+
+	// header may be safe to dereference
+	fakeHeader( header, alignment );
+	#ifdef __CFA_DEBUG__
+	checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
+	#endif // __CFA_DEBUG__
+
+	freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
+	#ifdef __CFA_DEBUG__
+	if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
+		abort( "Attempt to %s storage %p with corrupted header.\n"
+			   "Possible cause is duplicate free on same block or overwriting of header information.",
+			   name, addr );
+	} // if
+	#endif // __CFA_DEBUG__
+	size = freeElem->blockSize;
+	return false;
+} // headers
+
+
+static inline void * extend( size_t size ) with ( heapManager ) {
+	lock( extlock __cfaabi_dbg_ctx2 );
+	ptrdiff_t rem = heapRemaining - size;
+	if ( rem < 0 ) {
+		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
+
+		size_t increase = libCeiling( size > heapExpand ? size : heapExpand, libAlign() );
+		if ( sbrk( increase ) == (void *)-1 ) {
+			unlock( extlock );
+			errno = ENOMEM;
+			return 0p;
+		} // if
+		#ifdef __STATISTICS__
+		sbrk_calls += 1;
+		sbrk_storage += increase;
+		#endif // __STATISTICS__
+		#ifdef __CFA_DEBUG__
+		// Set new memory to garbage so subsequent uninitialized usages might fail.
+		memset( (char *)heapEnd + heapRemaining, '\377', increase );
+		#endif // __CFA_DEBUG__
+		rem = heapRemaining + increase - size;
+	} // if
+
+	HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
+	heapRemaining = rem;
+	heapEnd = (char *)heapEnd + size;
+	unlock( extlock );
+	return block;
+} // extend
+
+
 static inline void * doMalloc( size_t size ) with ( heapManager ) {
 	HeapManager.Storage * block;						// pointer to new block of storage
@@ -541,5 +488,5 @@
 	// along with the block and is a multiple of the alignment size.
 
-  if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0;
+  if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0p;
 	size_t tsize = size + sizeof(HeapManager.Storage);
 	if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
@@ -574,5 +521,5 @@
 		block = freeElem->freeList.pop();
 		#endif // SPINLOCK
-		if ( unlikely( block == 0 ) ) {					// no free block ?
+		if ( unlikely( block == 0p ) ) {				// no free block ?
 			#if defined( SPINLOCK )
 			unlock( freeElem->lock );
@@ -583,15 +530,15 @@
 
 			block = (HeapManager.Storage *)extend( tsize );	// mutual exclusion on call
-  if ( unlikely( block == 0 ) ) return 0;
-			#if defined( SPINLOCK )
+  if ( unlikely( block == 0p ) ) return 0p;
+		#if defined( SPINLOCK )
 		} else {
 			freeElem->freeList = block->header.kind.real.next;
 			unlock( freeElem->lock );
-			#endif // SPINLOCK
+		#endif // SPINLOCK
 		} // if
 
 		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
 	} else {											// large size => mmap
-  if ( unlikely( size > ~0ul - pageSize ) ) return 0;
+  if ( unlikely( size > ~0ul - pageSize ) ) return 0p;
 		tsize = libCeiling( tsize, pageSize );			// must be multiple of page size
 		#ifdef __STATISTICS__
@@ -611,19 +558,19 @@
 	} // if
 
-	void * area = &(block->data);						// adjust off header to user bytes
+	void * addr = &(block->data);						// adjust off header to user bytes
 
 	#ifdef __CFA_DEBUG__
-	assert( ((uintptr_t)area & (libAlign() - 1)) == 0 ); // minimum alignment ?
+	assert( ((uintptr_t)addr & (libAlign() - 1)) == 0 ); // minimum alignment ?
 	__atomic_add_fetch( &allocFree, tsize, __ATOMIC_SEQ_CST );
 	if ( traceHeap() ) {
 		enum { BufferSize = 64 };
 		char helpText[BufferSize];
-		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", area, size, tsize );
-		// int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", area, size );
-		__cfaabi_dbg_bits_write( helpText, len );
+		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", addr, size, tsize );
+		// int len = snprintf( helpText, BufferSize, "Malloc %p %zu\n", addr, size );
+		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
 	} // if
 	#endif // __CFA_DEBUG__
 
-	return area;
+	return addr;
 } // doMalloc
 
@@ -631,5 +578,5 @@
 static inline void doFree( void * addr ) with ( heapManager ) {
 	#ifdef __CFA_DEBUG__
-	if ( unlikely( heapManager.heapBegin == 0 ) ) {
+	if ( unlikely( heapManager.heapBegin == 0p ) ) {
 		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
 	} // if
@@ -677,5 +624,5 @@
 		char helpText[BufferSize];
 		int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
-		__cfaabi_dbg_bits_write( helpText, len );
+		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
 	} // if
 	#endif // __CFA_DEBUG__
@@ -683,9 +630,9 @@
 
 
-size_t checkFree( HeapManager & manager ) with ( manager ) {
+size_t prtFree( HeapManager & manager ) with ( manager ) {
 	size_t total = 0;
 	#ifdef __STATISTICS__
-	__cfaabi_dbg_bits_acquire();
-	__cfaabi_dbg_bits_print_nolock( "\nBin lists (bin size : free blocks on list)\n" );
+	__cfaabi_bits_acquire();
+	__cfaabi_bits_print_nolock( STDERR_FILENO, "\nBin lists (bin size : free blocks on list)\n" );
 	#endif // __STATISTICS__
 	for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
@@ -696,7 +643,7 @@
 
 		#if defined( SPINLOCK )
-		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0; p = p->header.kind.real.next ) {
+		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
 		#else
-		for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0; p = p->header.kind.real.next.top ) {
+		for ( HeapManager.Storage * p = freeLists[i].freeList.top(); p != 0p; p = p->header.kind.real.next.top ) {
 		#endif // SPINLOCK
 			total += size;
@@ -707,23 +654,103 @@
 
 		#ifdef __STATISTICS__
-		__cfaabi_dbg_bits_print_nolock( "%7zu, %-7u  ", size, N );
-		if ( (i + 1) % 8 == 0 ) __cfaabi_dbg_bits_print_nolock( "\n" );
+		__cfaabi_bits_print_nolock( STDERR_FILENO, "%7zu, %-7u  ", size, N );
+		if ( (i + 1) % 8 == 0 ) __cfaabi_bits_print_nolock( STDERR_FILENO, "\n" );
 		#endif // __STATISTICS__
 	} // for
 	#ifdef __STATISTICS__
-	__cfaabi_dbg_bits_print_nolock( "\ntotal free blocks:%zu\n", total );
-	__cfaabi_dbg_bits_release();
+	__cfaabi_bits_print_nolock( STDERR_FILENO, "\ntotal free blocks:%zu\n", total );
+	__cfaabi_bits_release();
 	#endif // __STATISTICS__
 	return (char *)heapEnd - (char *)heapBegin - total;
-} // checkFree
+} // prtFree
+
+
+static void ?{}( HeapManager & manager ) with ( manager ) {
+	pageSize = sysconf( _SC_PAGESIZE );
+
+	for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
+		freeLists[i].blockSize = bucketSizes[i];
+	} // for
+
+	#ifdef FASTLOOKUP
+	unsigned int idx = 0;
+	for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
+		if ( i > bucketSizes[idx] ) idx += 1;
+		lookup[i] = idx;
+	} // for
+	#endif // FASTLOOKUP
+
+	if ( setMmapStart( default_mmap_start() ) ) {
+		abort( "HeapManager : internal error, mmap start initialization failure." );
+	} // if
+	heapExpand = default_heap_expansion();
+
+	char * end = (char *)sbrk( 0 );
+	sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
+	heapBegin = heapEnd = sbrk( 0 );					// get new start point
+} // HeapManager
+
+
+static void ^?{}( HeapManager & ) {
+	#ifdef __STATISTICS__
+	if ( traceHeapTerm() ) {
+		printStats();
+		// if ( prtfree() ) prtFree( heapManager, true );
+	} // if
+	#endif // __STATISTICS__
+} // ~HeapManager
+
+
+static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
+void memory_startup( void ) {
+	#ifdef __CFA_DEBUG__
+	if ( unlikely( heapBoot ) ) {						// check for recursion during system boot
+		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+		abort( "boot() : internal error, recursively invoked during system boot." );
+	} // if
+	heapBoot = true;
+	#endif // __CFA_DEBUG__
+
+	//assert( heapManager.heapBegin != 0 );
+	//heapManager{};
+	if ( heapManager.heapBegin == 0p ) heapManager{};
+} // memory_startup
+
+static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
+void memory_shutdown( void ) {
+	^heapManager{};
+} // memory_shutdown
 
 
 static inline void * mallocNoStats( size_t size ) {		// necessary for malloc statistics
 	//assert( heapManager.heapBegin != 0 );
-	if ( unlikely( heapManager.heapBegin == 0 ) ) heapManager{}; // called before memory_startup ?
-	void * area = doMalloc( size );
-	if ( unlikely( area == 0 ) ) errno = ENOMEM;		// POSIX
-	return area;
+	if ( unlikely( heapManager.heapBegin == 0p ) ) heapManager{}; // called before memory_startup ?
+	void * addr = doMalloc( size );
+	if ( unlikely( addr == 0p ) ) errno = ENOMEM;		// POSIX
+	return addr;
 } // mallocNoStats
+
+
+static inline void * callocNoStats( size_t noOfElems, size_t elemSize ) {
+	size_t size = noOfElems * elemSize;
+	char * addr = (char *)mallocNoStats( size );
+  if ( unlikely( addr == 0p ) ) return 0p;
+
+	HeapManager.Storage.Header * header;
+	HeapManager.FreeHeader * freeElem;
+	size_t bsize, alignment;
+	bool mapped __attribute__(( unused )) = headers( "calloc", addr, header, freeElem, bsize, alignment );
+	#ifndef __CFA_DEBUG__
+	// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
+	if ( ! mapped )
+	#endif // __CFA_DEBUG__
+		// Zero entire data space even when > than size => realloc without a new allocation and zero fill works.
+		// <-------00000000000000000000000000000000000000000000000000000> bsize (bucket size)
+		// `-header`-addr                      `-size
+		memset( addr, '\0', bsize - sizeof(HeapManager.Storage) ); // set to zeros
+
+	header->kind.real.blockSize |= 2;					// mark as zero filled
+	return addr;
+} // callocNoStats
 
 
@@ -745,13 +772,12 @@
 	// subtract libAlign() because it is already the minimum alignment
 	// add sizeof(Storage) for fake header
-	// #comment TD : this is the only place that calls doMalloc without calling mallocNoStats, why ?
-	char * area = (char *)doMalloc( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
-  if ( unlikely( area == 0 ) ) return area;
+	char * addr = (char *)mallocNoStats( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
+  if ( unlikely( addr == 0p ) ) return addr;
 
 	// address in the block of the "next" alignment address
-	char * user = (char *)libCeiling( (uintptr_t)(area + sizeof(HeapManager.Storage)), alignment );
+	char * user = (char *)libCeiling( (uintptr_t)(addr + sizeof(HeapManager.Storage)), alignment );
 
 	// address of header from malloc
-	HeapManager.Storage.Header * realHeader = headerAddr( area );
+	HeapManager.Storage.Header * realHeader = headerAddr( addr );
 	// address of fake header * before* the alignment location
 	HeapManager.Storage.Header * fakeHeader = headerAddr( user );
@@ -763,4 +789,23 @@
 	return user;
 } // memalignNoStats
+
+
+static inline void * cmemalignNoStats( size_t alignment, size_t noOfElems, size_t elemSize ) {
+	size_t size = noOfElems * elemSize;
+	char * addr = (char *)memalignNoStats( alignment, size );
+  if ( unlikely( addr == 0p ) ) return 0p;
+	HeapManager.Storage.Header * header;
+	HeapManager.FreeHeader * freeElem;
+	size_t bsize;
+	bool mapped __attribute__(( unused )) = headers( "cmemalign", addr, header, freeElem, bsize, alignment );
+	#ifndef __CFA_DEBUG__
+	// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
+	if ( ! mapped )
+	#endif // __CFA_DEBUG__
+		memset( addr, '\0', dataStorage( bsize, addr, header ) ); // set to zeros
+	header->kind.real.blockSize |= 2;				// mark as zero filled
+
+	return addr;
+} // cmemalignNoStats
 
 
@@ -776,5 +821,5 @@
 extern "C" {
 	// The malloc() function allocates size bytes and returns a pointer to the allocated memory. The memory is not
-	// initialized. If size is 0, then malloc() returns either NULL, or a unique pointer value that can later be
+	// initialized. If size is 0, then malloc() returns either 0p, or a unique pointer value that can later be
 	// successfully passed to free().
 	void * malloc( size_t size ) {
@@ -788,78 +833,43 @@
 
 	// The calloc() function allocates memory for an array of nmemb elements of size bytes each and returns a pointer to
-	// the allocated memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either NULL, or a
+	// the allocated memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either 0p, or a
 	// unique pointer value that can later be successfully passed to free().
 	void * calloc( size_t noOfElems, size_t elemSize ) {
-		size_t size = noOfElems * elemSize;
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &calloc_storage, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		char * area = (char *)mallocNoStats( size );
-	  if ( unlikely( area == 0 ) ) return 0;
+		__atomic_add_fetch( &calloc_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		return callocNoStats( noOfElems, elemSize );
+	} // calloc
+
+	// The realloc() function changes the size of the memory block pointed to by ptr to size bytes. The contents will be
+	// unchanged in the range from the start of the region up to the minimum of the old and new sizes. If the new size
+	// is larger than the old size, the added memory will not be initialized.  If ptr is 0p, then the call is
+	// equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not 0p, then the call
+	// is equivalent to free(ptr). Unless ptr is 0p, it must have been returned by an earlier call to malloc(),
+	// calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done.
+	void * realloc( void * oaddr, size_t size ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+	  if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
+	  if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
 
 		HeapManager.Storage.Header * header;
 		HeapManager.FreeHeader * freeElem;
-		size_t asize, alignment;
-		bool mapped __attribute__(( unused )) = headers( "calloc", area, header, freeElem, asize, alignment );
-		#ifndef __CFA_DEBUG__
-		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		if ( ! mapped )
-		#endif // __CFA_DEBUG__
-			memset( area, '\0', asize - sizeof(HeapManager.Storage) ); // set to zeros
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return area;
-	} // calloc
-
-	// #comment TD : Document this function
-	void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
-		size_t size = noOfElems * elemSize;
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		char * area = (char *)memalignNoStats( alignment, size );
-	  if ( unlikely( area == 0 ) ) return 0;
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t asize;
-		bool mapped __attribute__(( unused )) = headers( "cmemalign", area, header, freeElem, asize, alignment );
-		#ifndef __CFA_DEBUG__
-		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		if ( ! mapped )
-			#endif // __CFA_DEBUG__
-			memset( area, '\0', asize - ( (char *)area - (char *)header ) ); // set to zeros
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-
-		return area;
-	} // cmemalign
-
-	// The realloc() function changes the size of the memory block pointed to by ptr to size bytes. The contents will be
-	// unchanged in the range from the start of the region up to the minimum of the old and new sizes. If the new size
-	// is larger than the old size, the added memory will not be initialized.  If ptr is NULL, then the call is
-	// equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not NULL, then the call
-	// is equivalent to free(ptr). Unless ptr is NULL, it must have been returned by an earlier call to malloc(),
-	// calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done.
-	void * realloc( void * addr, size_t size ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-	  if ( unlikely( addr == 0 ) ) return mallocNoStats( size ); // special cases
-	  if ( unlikely( size == 0 ) ) { free( addr ); return 0; }
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t asize, alignment = 0;
-		headers( "realloc", addr, header, freeElem, asize, alignment );
-
-		size_t usize = asize - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
-		if ( usize >= size ) {							// already sufficient storage
+		size_t bsize, oalign = 0;
+		headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+
+		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+	  if ( size <= odsize && odsize <= size * 2 ) {	// allow up to 50% wasted storage in smaller size
+			// Do not know size of original allocation => cannot do 0 fill for any additional space because do not know
+			// where to start filling, i.e., do not overwrite existing values in space.
+			//
 			// This case does not result in a new profiler entry because the previous one still exists and it must match with
 			// the free for this memory.  Hence, this realloc does not appear in the profiler output.
-			return addr;
+			return oaddr;
 		} // if
 
@@ -868,24 +878,28 @@
 		#endif // __STATISTICS__
 
-		void * area;
-		if ( unlikely( alignment != 0 ) ) {				// previous request memalign?
-			area = memalign( alignment, size );			// create new aligned area
+		// change size and copy old content to new storage
+
+		void * naddr;
+		if ( unlikely( oalign != 0 ) ) {				// previous request memalign?
+			if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
+				naddr = cmemalignNoStats( oalign, 1, size ); // create new aligned area
+			} else {
+				naddr = memalignNoStats( oalign, size ); // create new aligned area
+			} // if
 		} else {
-			area = mallocNoStats( size );				// create new area
+			if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
+				naddr = callocNoStats( 1, size );		// create new area
+			} else {
+				naddr = mallocNoStats( size );			// create new area
+			} // if
 		} // if
-	  if ( unlikely( area == 0 ) ) return 0;
-		if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill (calloc/cmemalign) ?
-			assert( (header->kind.real.blockSize & 1) == 0 );
-			bool mapped __attribute__(( unused )) = headers( "realloc", area, header, freeElem, asize, alignment );
-			#ifndef __CFA_DEBUG__
-			// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-			if ( ! mapped )
-			#endif // __CFA_DEBUG__
-				memset( (char *)area + usize, '\0', asize - ( (char *)area - (char *)header ) - usize ); // zero-fill back part
-			header->kind.real.blockSize |= 2;			// mark new request as zero fill
-		} // if
-		memcpy( area, addr, usize );					// copy bytes
-		free( addr );
-		return area;
+	  if ( unlikely( naddr == 0p ) ) return 0p;
+
+		headers( "realloc", naddr, header, freeElem, bsize, oalign );
+		size_t ndsize = dataStorage( bsize, naddr, header ); // data storage avilable in bucket
+		// To preserve prior fill, the entire bucket must be copied versus the size.
+		memcpy( naddr, oaddr, MIN( odsize, ndsize ) );	// copy bytes
+		free( oaddr );
+		return naddr;
 	} // realloc
 
@@ -898,8 +912,17 @@
 		#endif // __STATISTICS__
 
-		void * area = memalignNoStats( alignment, size );
-
-		return area;
+		return memalignNoStats( alignment, size );
 	} // memalign
+
+
+	// The cmemalign() function is the same as calloc() with memory alignment.
+	void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
+		#ifdef __STATISTICS__
+		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &cmemalign_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
+		#endif // __STATISTICS__
+
+		return cmemalignNoStats( alignment, noOfElems, elemSize );
+	} // cmemalign
 
 	// The function aligned_alloc() is the same as memalign(), except for the added restriction that size should be a
@@ -912,10 +935,10 @@
 	// The function posix_memalign() allocates size bytes and places the address of the allocated memory in *memptr. The
 	// address of the allocated memory will be a multiple of alignment, which must be a power of two and a multiple of
-	// sizeof(void *). If size is 0, then posix_memalign() returns either NULL, or a unique pointer value that can later
+	// sizeof(void *). If size is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later
 	// be successfully passed to free(3).
 	int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
 	  if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
 		* memptr = memalign( alignment, size );
-	  if ( unlikely( * memptr == 0 ) ) return ENOMEM;
+	  if ( unlikely( * memptr == 0p ) ) return ENOMEM;
 		return 0;
 	} // posix_memalign
@@ -930,5 +953,5 @@
 	// The free() function frees the memory space pointed to by ptr, which must have been returned by a previous call to
 	// malloc(), calloc() or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behavior
-	// occurs. If ptr is NULL, no operation is performed.
+	// occurs. If ptr is 0p, no operation is performed.
 	void free( void * addr ) {
 		#ifdef __STATISTICS__
@@ -936,15 +959,12 @@
 		#endif // __STATISTICS__
 
-		// #comment TD : To decrease nesting I would but the special case in the
-		//               else instead, plus it reads more naturally to have the
-		//               short / normal case instead
-		if ( unlikely( addr == 0 ) ) {					// special case
-			#ifdef __CFA_DEBUG__
-			if ( traceHeap() ) {
-				#define nullmsg "Free( 0x0 ) size:0\n"
-				// Do not debug print free( 0 ), as it can cause recursive entry from sprintf.
-				__cfaabi_dbg_bits_write( nullmsg, sizeof(nullmsg) - 1 );
-			} // if
-			#endif // __CFA_DEBUG__
+	  if ( unlikely( addr == 0p ) ) {					// special case
+			// #ifdef __CFA_DEBUG__
+			// if ( traceHeap() ) {
+			// 	#define nullmsg "Free( 0x0 ) size:0\n"
+			// 	// Do not debug print free( 0p ), as it can cause recursive entry from sprintf.
+			// 	__cfaabi_dbg_write( nullmsg, sizeof(nullmsg) - 1 );
+			// } // if
+			// #endif // __CFA_DEBUG__
 			return;
 		} // exit
@@ -953,44 +973,8 @@
 	} // free
 
-	// The mallopt() function adjusts parameters that control the behavior of the memory-allocation functions (see
-	// malloc(3)). The param argument specifies the parameter to be modified, and value specifies the new value for that
-	// parameter.
-	int mallopt( int option, int value ) {
-		choose( option ) {
-		  case M_TOP_PAD:
-			if ( setHeapExpand( value ) ) fallthru default;
-		  case M_MMAP_THRESHOLD:
-			if ( setMmapStart( value ) ) fallthru default;
-		  default:
-			// #comment TD : 1 for unsopported feels wrong
-			return 1;									// success, or unsupported
-		} // switch
-		return 0;										// error
-	} // mallopt
-
-	// The malloc_trim() function attempts to release free memory at the top of the heap (by calling sbrk(2) with a
-	// suitable argument).
-	int malloc_trim( size_t ) {
-		return 0;										// => impossible to release memory
-	} // malloc_trim
-
-	// The malloc_usable_size() function returns the number of usable bytes in the block pointed to by ptr, a pointer to
-	// a block of memory allocated by malloc(3) or a related function.
-	size_t malloc_usable_size( void * addr ) {
-	  if ( unlikely( addr == 0 ) ) return 0;			// null allocation has 0 size
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t size, alignment;
-
-		headers( "malloc_usable_size", addr, header, freeElem, size, alignment );
-		size_t usize = size - ( (char *)addr - (char *)header ); // compute the amount of user storage in the block
-		return usize;
-	} // malloc_usable_size
-
-
-    // The malloc_alignment() function returns the alignment of the allocation.
+
+	// The malloc_alignment() function returns the alignment of the allocation.
 	size_t malloc_alignment( void * addr ) {
-	  if ( unlikely( addr == 0 ) ) return libAlign();	// minimum alignment
+	  if ( unlikely( addr == 0p ) ) return libAlign();	// minimum alignment
 		HeapManager.Storage.Header * header = headerAddr( addr );
 		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
@@ -1002,7 +986,7 @@
 
 
-    // The malloc_zero_fill() function returns true if the allocation is zero filled, i.e., initially allocated by calloc().
+	// The malloc_zero_fill() function returns true if the allocation is zero filled, i.e., initially allocated by calloc().
 	bool malloc_zero_fill( void * addr ) {
-	  if ( unlikely( addr == 0 ) ) return false;		// null allocation is not zero fill
+	  if ( unlikely( addr == 0p ) ) return false;		// null allocation is not zero fill
 		HeapManager.Storage.Header * header = headerAddr( addr );
 		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
@@ -1013,15 +997,28 @@
 
 
-    // The malloc_stats() function prints (on default standard error) statistics about memory allocated by malloc(3) and
-    // related functions.
+	// The malloc_usable_size() function returns the number of usable bytes in the block pointed to by ptr, a pointer to
+	// a block of memory allocated by malloc(3) or a related function.
+	size_t malloc_usable_size( void * addr ) {
+	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has 0 size
+		HeapManager.Storage.Header * header;
+		HeapManager.FreeHeader * freeElem;
+		size_t bsize, alignment;
+
+		headers( "malloc_usable_size", addr, header, freeElem, bsize, alignment );
+		return dataStorage( bsize, addr, header );	// data storage in bucket
+	} // malloc_usable_size
+
+
+	// The malloc_stats() function prints (on default standard error) statistics about memory allocated by malloc(3) and
+	// related functions.
 	void malloc_stats( void ) {
 		#ifdef __STATISTICS__
 		printStats();
-		if ( checkFree() ) checkFree( heapManager );
+		if ( prtFree() ) prtFree( heapManager );
 		#endif // __STATISTICS__
 	} // malloc_stats
 
 	// The malloc_stats_fd() function changes the file descripter where malloc_stats() writes the statistics.
-	int malloc_stats_fd( int fd ) {
+	int malloc_stats_fd( int fd __attribute__(( unused )) ) {
 		#ifdef __STATISTICS__
 		int temp = statfd;
@@ -1033,8 +1030,30 @@
 	} // malloc_stats_fd
 
+
+	// The mallopt() function adjusts parameters that control the behavior of the memory-allocation functions (see
+	// malloc(3)). The param argument specifies the parameter to be modified, and value specifies the new value for that
+	// parameter.
+	int mallopt( int option, int value ) {
+		choose( option ) {
+		  case M_TOP_PAD:
+			if ( setHeapExpand( value ) ) return 1;
+		  case M_MMAP_THRESHOLD:
+			if ( setMmapStart( value ) ) return 1;
+		} // switch
+		return 0;										// error, unsupported
+	} // mallopt
+
+	// The malloc_trim() function attempts to release free memory at the top of the heap (by calling sbrk(2) with a
+	// suitable argument).
+	int malloc_trim( size_t ) {
+		return 0;										// => impossible to release memory
+	} // malloc_trim
+
+
 	// The malloc_info() function exports an XML string that describes the current state of the memory-allocation
 	// implementation in the caller.  The string is printed on the file stream stream.  The exported string includes
 	// information about all arenas (see malloc(3)).
 	int malloc_info( int options, FILE * stream ) {
+		if ( options != 0 ) { errno = EINVAL; return -1; }
 		return printStatsXML( stream );
 	} // malloc_info
@@ -1046,5 +1065,5 @@
 	// structure is returned as the function result.  (It is the caller's responsibility to free(3) this memory.)
 	void * malloc_get_state( void ) {
-		return 0;										// unsupported
+		return 0p;										// unsupported
 	} // malloc_get_state
 
@@ -1058,4 +1077,53 @@
 
 
+// Must have CFA linkage to overload with C linkage realloc.
+void * realloc( void * oaddr, size_t nalign, size_t size ) {
+	#ifdef __STATISTICS__
+	__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
+	#endif // __STATISTICS__
+
+	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
+  if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
+
+	if ( unlikely( nalign == 0 ) ) nalign = libAlign();	// reset alignment to minimum
+	#ifdef __CFA_DEBUG__
+	else
+		checkAlign( nalign );							// check alignment
+	#endif // __CFA_DEBUG__
+
+	HeapManager.Storage.Header * header;
+	HeapManager.FreeHeader * freeElem;
+	size_t bsize, oalign = 0;
+	headers( "realloc", oaddr, header, freeElem, bsize, oalign );
+	size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+
+  if ( oalign != 0 && (uintptr_t)oaddr % nalign == 0 ) { // has alignment and just happens to work out
+		headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+		return realloc( oaddr, size );
+	} // if
+
+	#ifdef __STATISTICS__
+	__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+	#endif // __STATISTICS__
+
+	// change size and copy old content to new storage
+
+	void * naddr;
+	if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
+		naddr = cmemalignNoStats( nalign, 1, size );	// create new aligned area
+	} else {
+		naddr = memalignNoStats( nalign, size );		// create new aligned area
+	} // if
+
+	headers( "realloc", naddr, header, freeElem, bsize, oalign );
+	size_t ndsize = dataStorage( bsize, naddr, header ); // data storage avilable in bucket
+	// To preserve prior fill, the entire bucket must be copied versus the size.
+	memcpy( naddr, oaddr, MIN( odsize, ndsize ) );		// copy bytes
+	free( oaddr );
+	return naddr;
+} // realloc
+
+
 // Local Variables: //
 // tab-width: 4 //
Index: libcfa/src/interpose.cfa
===================================================================
--- libcfa/src/interpose.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/interpose.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Wed Mar 29 16:10:31 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Jul 14 22:57:16 2019
-// Update Count     : 116
+// Last Modified On : Sat Nov 30 07:09:42 2019
+// Update Count     : 119
 //
 
@@ -163,5 +163,5 @@
 	abort_lastframe = kernel_abort_lastframe();
 	len = snprintf( abort_text, abort_text_size, "Cforall Runtime error (UNIX pid:%ld) ", (long int)getpid() ); // use UNIX pid (versus getPid)
-	__cfaabi_dbg_bits_write( abort_text, len );
+	__cfaabi_dbg_write( abort_text, len );
 
 	if ( fmt ) {
@@ -171,8 +171,8 @@
 		len = vsnprintf( abort_text, abort_text_size, fmt, args );
 		va_end( args );
-		__cfaabi_dbg_bits_write( abort_text, len );
+		__cfaabi_dbg_write( abort_text, len );
 
 		if ( fmt[strlen( fmt ) - 1] != '\n' ) {		// add optional newline if missing at the end of the format text
-			__cfaabi_dbg_bits_write( "\n", 1 );
+			__cfaabi_dbg_write( "\n", 1 );
 		}
 	}
@@ -194,11 +194,11 @@
 	// find executable name
 	*index( messages[0], '(' ) = '\0';
-	__cfaabi_dbg_bits_print_nolock( "Stack back trace for: %s\n", messages[0]);
-
-	for ( int i = Start; i < size - abort_lastframe && messages != NULL; i += 1 ) {
-		char * name = NULL, * offset_begin = NULL, * offset_end = NULL;
+	__cfaabi_bits_print_nolock( STDERR_FILENO, "Stack back trace for: %s\n", messages[0]);
+
+	for ( int i = Start; i < size - abort_lastframe && messages != 0p; i += 1 ) {
+		char * name = 0p, * offset_begin = 0p, * offset_end = 0p;
 
 		for ( char * p = messages[i]; *p; ++p ) {
-			//__cfaabi_dbg_bits_print_nolock( "X %s\n", p);
+			//__cfaabi_bits_print_nolock( "X %s\n", p);
 			// find parantheses and +offset
 			if ( *p == '(' ) {
@@ -220,7 +220,7 @@
 			*offset_end++ = '\0';
 
-			__cfaabi_dbg_bits_print_nolock( "(%i) %s : %s + %s %s\n", frameNo, messages[i], name, offset_begin, offset_end);
+			__cfaabi_bits_print_nolock( STDERR_FILENO, "(%i) %s : %s + %s %s\n", frameNo, messages[i], name, offset_begin, offset_end);
 		} else {										// otherwise, print the whole line
-			__cfaabi_dbg_bits_print_nolock( "(%i) %s\n", frameNo, messages[i] );
+			__cfaabi_bits_print_nolock( STDERR_FILENO, "(%i) %s\n", frameNo, messages[i] );
 		}
 	}
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/startup.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,15 +10,15 @@
 // Created On       : Tue Jul 24 16:21:57 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Jul 25 16:42:01 2018
-// Update Count     : 11
+// Last Modified On : Sat Nov 30 07:07:56 2019
+// Update Count     : 13
 //
 
 #include "startup.hfa"
-#include <unistd.h>
-
+#include <time.h>										// tzset
 
 extern "C" {
     static void __cfaabi_appready_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_APPREADY ) ));
     void __cfaabi_appready_startup( void ) {
+		tzset();										// initialize time global variables
 		#ifdef __CFA_DEBUG__
 		extern void heapAppStart();
Index: libcfa/src/stdlib.cfa
===================================================================
--- libcfa/src/stdlib.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/stdlib.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:10:29 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Oct 22 08:57:52 2019
-// Update Count     : 478
+// Last Modified On : Wed Nov 20 17:22:47 2019
+// Update Count     : 485
 //
 
@@ -30,50 +30,19 @@
 	T * alloc_set( T ptr[], size_t dim, char fill ) {	// realloc array with fill
 		size_t olen = malloc_usable_size( ptr );		// current allocation
-		char * nptr = (char *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
+		void * nptr = (void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
 		size_t nlen = malloc_usable_size( nptr );		// new allocation
 		if ( nlen > olen ) {							// larger ?
-			memset( nptr + olen, (int)fill, nlen - olen ); // initialize added storage
+			memset( (char *)nptr + olen, (int)fill, nlen - olen ); // initialize added storage
 		} // if
 		return (T *)nptr;
 	} // alloc_set
 
-	T * alloc_align( T ptr[], size_t align ) {			// aligned realloc array
-		char * nptr;
-		size_t alignment = malloc_alignment( ptr );
-		if ( align != alignment && (uintptr_t)ptr % align != 0 ) {
-			size_t olen = malloc_usable_size( ptr );	// current allocation
-			nptr = (char *)memalign( align, olen );
-			size_t nlen = malloc_usable_size( nptr );	// new allocation
-			size_t lnth = olen < nlen ? olen : nlen;	// min
-			memcpy( nptr, ptr, lnth );					// initialize storage
-			free( ptr );
-		} else {
-			nptr = (char *)ptr;
-		} // if
-		return (T *)nptr;
-	} // alloc_align
-
-	T * alloc_align( T ptr[], size_t align, size_t dim ) { // aligned realloc array
-		char * nptr;
-		size_t alignment = malloc_alignment( ptr );
-		if ( align != alignment ) {
-			size_t olen = malloc_usable_size( ptr );	// current allocation
-			nptr = (char *)memalign( align, dim * sizeof(T) );
-			size_t nlen = malloc_usable_size( nptr );	// new allocation
-			size_t lnth = olen < nlen ? olen : nlen;	// min
-			memcpy( nptr, ptr, lnth );					// initialize storage
-			free( ptr );
-		} else {
-			nptr = (char *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
-		} // if
-		return (T *)nptr;
-	} // alloc_align
-
 	T * alloc_align_set( T ptr[], size_t align, char fill ) { // aligned realloc with fill
 		size_t olen = malloc_usable_size( ptr );		// current allocation
-		char * nptr = alloc_align( ptr, align );
+		void * nptr = (void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
+		// char * nptr = alloc_align( ptr, align );
 		size_t nlen = malloc_usable_size( nptr );		// new allocation
 		if ( nlen > olen ) {							// larger ?
-			memset( nptr + olen, (int)fill, nlen - olen ); // initialize added storage
+			memset( (char *)nptr + olen, (int)fill, nlen - olen ); // initialize added storage
 		} // if
 		return (T *)nptr;
Index: libcfa/src/stdlib.hfa
===================================================================
--- libcfa/src/stdlib.hfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ libcfa/src/stdlib.hfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Oct 20 22:57:33 2019
-// Update Count     : 390
+// Last Modified On : Fri Nov 29 23:08:02 2019
+// Update Count     : 400
 //
 
@@ -28,4 +28,6 @@
 } // extern "C"
 
+void * realloc( void * oaddr, size_t nalign, size_t size ); // CFA heap
+
 //---------------------------------------
 
@@ -50,6 +52,5 @@
 	} // calloc
 
-	T * realloc( T * ptr, size_t size ) {
-		if ( unlikely( ptr == 0 ) ) return malloc();
+	T * realloc( T * ptr, size_t size ) {				// CFA realloc, eliminate return-type cast
 		return (T *)(void *)realloc( (void *)ptr, size ); // C realloc
 	} // realloc
@@ -59,4 +60,8 @@
 	} // memalign
 
+	T * cmemalign( size_t align, size_t dim  ) {
+		return (T *)cmemalign( align, dim, sizeof(T) );	// CFA cmemalign
+	} // cmemalign
+
 	T * aligned_alloc( size_t align ) {
 		return (T *)aligned_alloc( align, sizeof(T) );	// C aligned_alloc
@@ -79,5 +84,5 @@
 
 	T * alloc( T ptr[], size_t dim ) {					// realloc
-		return realloc( ptr, dim * sizeof(T) );
+		return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
 	} // alloc
 
@@ -118,4 +123,12 @@
 	} // alloc_align
 
+	T * alloc_align( T ptr[], size_t align ) {			// aligned realloc array
+		return (T *)(void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
+	} // alloc_align
+
+	T * alloc_align( T ptr[], size_t align, size_t dim ) { // aligned realloc array
+		return (T *)(void *)realloc( (void *)ptr, align, dim * sizeof(T) ); // CFA realloc
+	} // alloc_align
+
 	T * alloc_align_set( size_t align, char fill ) {
 		return (T *)memset( (T *)alloc_align( align ), (int)fill, sizeof(T) ); // initialize with fill value
@@ -142,6 +155,4 @@
 
 forall( dtype T | sized(T) ) {
-	T * alloc_align( T ptr[], size_t align );			// realign
-	T * alloc_align( T ptr[], size_t align, size_t dim ); // aligned realloc array
 	T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); // aligned realloc array with fill
 } // distribution
@@ -199,18 +210,18 @@
 
 static inline {
-	int ato( const char * sptr ) { return (int)strtol( sptr, 0, 10 ); }
-	unsigned int ato( const char * sptr ) { return (unsigned int)strtoul( sptr, 0, 10 ); }
-	long int ato( const char * sptr ) { return strtol( sptr, 0, 10 ); }
-	unsigned long int ato( const char * sptr ) { return strtoul( sptr, 0, 10 ); }
-	long long int ato( const char * sptr ) { return strtoll( sptr, 0, 10 ); }
-	unsigned long long int ato( const char * sptr ) { return strtoull( sptr, 0, 10 ); }
-
-	float ato( const char * sptr ) { return strtof( sptr, 0 ); }
-	double ato( const char * sptr ) { return strtod( sptr, 0 ); }
-	long double ato( const char * sptr ) { return strtold( sptr, 0 ); }
-
-	float _Complex ato( const char * sptr ) { return strto( sptr, NULL ); }
-	double _Complex ato( const char * sptr ) { return strto( sptr, NULL ); }
-	long double _Complex ato( const char * sptr ) { return strto( sptr, NULL ); }
+	int ato( const char * sptr ) { return (int)strtol( sptr, 0p, 10 ); }
+	unsigned int ato( const char * sptr ) { return (unsigned int)strtoul( sptr, 0p, 10 ); }
+	long int ato( const char * sptr ) { return strtol( sptr, 0p, 10 ); }
+	unsigned long int ato( const char * sptr ) { return strtoul( sptr, 0p, 10 ); }
+	long long int ato( const char * sptr ) { return strtoll( sptr, 0p, 10 ); }
+	unsigned long long int ato( const char * sptr ) { return strtoull( sptr, 0p, 10 ); }
+
+	float ato( const char * sptr ) { return strtof( sptr, 0p ); }
+	double ato( const char * sptr ) { return strtod( sptr, 0p ); }
+	long double ato( const char * sptr ) { return strtold( sptr, 0p ); }
+
+	float _Complex ato( const char * sptr ) { return strto( sptr, 0p ); }
+	double _Complex ato( const char * sptr ) { return strto( sptr, 0p ); }
+	long double _Complex ato( const char * sptr ) { return strto( sptr, 0p ); }
 } // distribution
 
Index: longrun_tests/Makefile.in
===================================================================
--- longrun_tests/Makefile.in	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ longrun_tests/Makefile.in	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -486,6 +486,5 @@
 LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
-	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) \
-	$(AM_CFLAGS) $(CFLAGS)
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)
 
 AM_V_CFA = $(am__v_CFA_@AM_V@)
Index: src/AST/Convert.cpp
===================================================================
--- src/AST/Convert.cpp	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/AST/Convert.cpp	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -887,5 +887,5 @@
 		auto expr = visitBaseExpr( node,
 			new AsmExpr(
-				get<Expression>().accept1(node->inout),
+				new std::string(node->inout),
 				get<Expression>().accept1(node->constraint),
 				get<Expression>().accept1(node->operand)
@@ -2258,5 +2258,5 @@
 			new ast::AsmExpr(
 				old->location,
-				GET_ACCEPT_1(inout, Expr),
+				old->inout,
 				GET_ACCEPT_1(constraint, Expr),
 				GET_ACCEPT_1(operand, Expr)
Index: src/AST/Expr.hpp
===================================================================
--- src/AST/Expr.hpp	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/AST/Expr.hpp	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -556,9 +556,9 @@
 class AsmExpr final : public Expr {
 public:
-	ptr<Expr> inout;
+	std::string inout;
 	ptr<Expr> constraint;
 	ptr<Expr> operand;
 
-	AsmExpr( const CodeLocation & loc, const Expr * io, const Expr * con, const Expr * op )
+	AsmExpr( const CodeLocation & loc, const std::string & io, const Expr * con, const Expr * op )
 	: Expr( loc ), inout( io ), constraint( con ), operand( op ) {}
 
Index: src/AST/Pass.impl.hpp
===================================================================
--- src/AST/Pass.impl.hpp	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/AST/Pass.impl.hpp	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -1300,5 +1300,4 @@
 			maybe_accept( node, &AsmExpr::result );
 		}
-		maybe_accept( node, &AsmExpr::inout      );
 		maybe_accept( node, &AsmExpr::constraint );
 		maybe_accept( node, &AsmExpr::operand    );
Index: src/AST/Print.cpp
===================================================================
--- src/AST/Print.cpp	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/AST/Print.cpp	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -1011,5 +1011,5 @@
 		os << "Asm Expression:" << endl;
 		++indent;
-		if ( node->inout ) node->inout->accept( *this );
+		if ( !node->inout.empty() ) os << "[" << node->inout << "] ";
 		if ( node->constraint ) node->constraint->accept( *this );
 		if ( node->operand ) node->operand->accept( *this );
Index: src/CodeGen/CodeGenerator.cc
===================================================================
--- src/CodeGen/CodeGenerator.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/CodeGen/CodeGenerator.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -786,12 +786,12 @@
 
 	void CodeGenerator::postvisit( AsmExpr * asmExpr ) {
-		if ( asmExpr->get_inout() ) {
+		if ( !asmExpr->inout.empty() ) {
 			output << "[ ";
-			asmExpr->get_inout()->accept( *visitor );
+			output << asmExpr->inout;
 			output << " ] ";
 		} // if
-		asmExpr->get_constraint()->accept( *visitor );
+		asmExpr->constraint->accept( *visitor );
 		output << " ( ";
-		asmExpr->get_operand()->accept( *visitor );
+		asmExpr->operand->accept( *visitor );
 		output << " )";
 	}
Index: src/Common/PassVisitor.impl.h
===================================================================
--- src/Common/PassVisitor.impl.h	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/Common/PassVisitor.impl.h	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -2452,5 +2452,4 @@
 
 	indexerScopedAccept( node->result    , *this );
-	maybeAccept_impl   ( node->inout     , *this );
 	maybeAccept_impl   ( node->constraint, *this );
 	maybeAccept_impl   ( node->operand   , *this );
@@ -2464,5 +2463,4 @@
 
 	indexerScopedAccept( node->result    , *this );
-	maybeAccept_impl   ( node->inout     , *this );
 	maybeAccept_impl   ( node->constraint, *this );
 	maybeAccept_impl   ( node->operand   , *this );
@@ -2477,5 +2475,4 @@
 	indexerScopedMutate( node->env       , *this );
 	indexerScopedMutate( node->result    , *this );
-	maybeMutate_impl   ( node->inout     , *this );
 	maybeMutate_impl   ( node->constraint, *this );
 	maybeMutate_impl   ( node->operand   , *this );
Index: src/Common/SemanticError.cc
===================================================================
--- src/Common/SemanticError.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/Common/SemanticError.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -149,21 +149,27 @@
 // Helpers
 namespace ErrorHelpers {
+	Colors colors = Colors::Auto;
+
+	static inline bool with_colors() {
+		return colors == Colors::Auto ? isatty( STDERR_FILENO ) : bool(colors);
+	}
+
 	const std::string & error_str() {
-		static std::string str = isatty( STDERR_FILENO ) ? "\e[31merror:\e[39m " : "error: ";
+		static std::string str = with_colors() ? "\e[31merror:\e[39m " : "error: ";
 		return str;
 	}
 
 	const std::string & warning_str() {
-		static std::string str = isatty( STDERR_FILENO ) ? "\e[95mwarning:\e[39m " : "warning: ";
+		static std::string str = with_colors() ? "\e[95mwarning:\e[39m " : "warning: ";
 		return str;
 	}
 
 	const std::string & bold_ttycode() {
-		static std::string str = isatty( STDERR_FILENO ) ? "\e[1m" : "";
+		static std::string str = with_colors() ? "\e[1m" : "";
 		return str;
 	}
 
 	const std::string & reset_font_ttycode() {
-		static std::string str = isatty( STDERR_FILENO ) ? "\e[0m" : "";
+		static std::string str = with_colors() ? "\e[0m" : "";
 		return str;
 	}
Index: src/Common/SemanticError.h
===================================================================
--- src/Common/SemanticError.h	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/Common/SemanticError.h	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -97,4 +97,12 @@
 // Helpers
 namespace ErrorHelpers {
+	enum class Colors {
+		Never = false,
+		Always = true,
+		Auto,
+	};
+
+	extern Colors colors;
+
 	const std::string & error_str();
 	const std::string & warning_str();
Index: src/Concurrency/Keywords.cc
===================================================================
--- src/Concurrency/Keywords.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/Concurrency/Keywords.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -59,4 +59,5 @@
 
 		Declaration * postmutate( StructDecl * decl );
+		DeclarationWithType * postmutate( FunctionDecl * decl );
 
 		void handle( StructDecl * );
@@ -77,5 +78,6 @@
 		KeywordCastExpr::Target cast_target;
 
-		StructDecl* type_decl = nullptr;
+		StructDecl   * type_decl = nullptr;
+		FunctionDecl * dtor_decl = nullptr;
 	};
 
@@ -97,5 +99,5 @@
 			"__thrd",
 			"get_thread",
-			"thread keyword requires threads to be in scope, add #include <thread.hfa>",
+			"thread keyword requires threads to be in scope, add #include <thread.hfa>\n",
 			true,
 			KeywordCastExpr::Thread
@@ -129,5 +131,5 @@
 			"__cor",
 			"get_coroutine",
-			"coroutine keyword requires coroutines to be in scope, add #include <coroutine.hfa>",
+			"coroutine keyword requires coroutines to be in scope, add #include <coroutine.hfa>\n",
 			true,
 			KeywordCastExpr::Coroutine
@@ -161,5 +163,5 @@
 			"__mon",
 			"get_monitor",
-			"monitor keyword requires monitors to be in scope, add #include <monitor.hfa>",
+			"monitor keyword requires monitors to be in scope, add #include <monitor.hfa>\n",
 			false,
 			KeywordCastExpr::Monitor
@@ -284,19 +286,31 @@
 	}
 
+	DeclarationWithType * ConcurrentSueKeyword::postmutate( FunctionDecl * decl ) {
+		if( !type_decl ) return decl;
+		if( !CodeGen::isDestructor( decl->name ) ) return decl;
+
+		auto params = decl->type->parameters;
+		if( params.size() != 1 ) return decl;
+
+		auto type = dynamic_cast<ReferenceType*>( params.front()->get_type() );
+		if( !type ) return decl;
+
+		auto stype = dynamic_cast<StructInstType*>( type->base );
+		if( !stype ) return decl;
+		if( stype->baseStruct != type_decl ) return decl;
+
+		if( !dtor_decl ) dtor_decl = decl;
+		return decl;
+	}
+
 	Expression * ConcurrentSueKeyword::postmutate( KeywordCastExpr * cast ) {
 		if ( cast_target == cast->target ) {
 			// convert (thread &)t to (thread_desc &)*get_thread(t), etc.
 			if( !type_decl ) SemanticError( cast, context_error );
-			Expression * arg = cast->arg;
-			cast->arg = nullptr;
-			delete cast;
-			return new CastExpr(
-				UntypedExpr::createDeref(
-					new UntypedExpr( new NameExpr( getter_name ), { arg } )
-				),
-				new ReferenceType(
-					noQualifiers,
-					new StructInstType( noQualifiers, type_decl ) )
-				);
+			if( !dtor_decl ) SemanticError( cast, context_error );
+			assert( cast->result == nullptr );
+			cast->set_result( new ReferenceType( noQualifiers, new StructInstType( noQualifiers, type_decl ) ) );
+			cast->concrete_target.field  = field_name;
+			cast->concrete_target.getter = getter_name;
 		}
 		return cast;
@@ -308,4 +322,5 @@
 
 		if( !type_decl ) SemanticError( decl, context_error );
+		if( !dtor_decl ) SemanticError( decl, context_error );
 
 		FunctionDecl * func = forwardDeclare( decl );
Index: src/ControlStruct/MLEMutator.cc
===================================================================
--- src/ControlStruct/MLEMutator.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/ControlStruct/MLEMutator.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -231,12 +231,10 @@
 
 	Statement *MLEMutator::mutateLoop( Statement *bodyLoop, Entry &e ) {
+		// only generate these when needed
+		if( !e.isContUsed() && !e.isBreakUsed() ) return bodyLoop;
+
 		// ensure loop body is a block
-		CompoundStmt *newBody;
-		if ( ! (newBody = dynamic_cast<CompoundStmt *>( bodyLoop )) ) {
-			newBody = new CompoundStmt();
-			newBody->get_kids().push_back( bodyLoop );
-		} // if
-
-		// only generate these when needed
+		CompoundStmt * newBody = new CompoundStmt();
+		newBody->get_kids().push_back( bodyLoop );
 
 		if ( e.isContUsed() ) {
Index: src/GenPoly/Lvalue.cc
===================================================================
--- src/GenPoly/Lvalue.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/GenPoly/Lvalue.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -60,5 +60,5 @@
 		}
 
-		struct ReferenceConversions final : public WithStmtsToAdd {
+		struct ReferenceConversions final : public WithStmtsToAdd, public WithGuards {
 			Expression * postmutate( CastExpr * castExpr );
 			Expression * postmutate( AddressExpr * addrExpr );
@@ -71,4 +71,14 @@
 
 		struct FixIntrinsicResult final : public WithGuards {
+			enum {
+				NoSkip,
+				Skip,
+				SkipInProgress
+			} skip = NoSkip;
+
+			void premutate( AsmExpr * ) { GuardValue( skip ); skip = Skip; }
+			void premutate( ApplicationExpr * ) { GuardValue( skip ); skip = (skip == Skip) ? SkipInProgress : NoSkip; }
+
+
 			Expression * postmutate( ApplicationExpr * appExpr );
 			void premutate( FunctionDecl * funcDecl );
@@ -162,5 +172,5 @@
 
 		Expression * FixIntrinsicResult::postmutate( ApplicationExpr * appExpr ) {
-			if ( isIntrinsicReference( appExpr ) ) {
+			if ( skip != SkipInProgress && isIntrinsicReference( appExpr ) ) {
 				// eliminate reference types from intrinsic applications - now they return lvalues
 				ReferenceType * result = strict_dynamic_cast< ReferenceType * >( appExpr->result );
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/Parser/parser.yy	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Aug  4 21:48:23 2019
-// Update Count     : 4364
+// Last Modified On : Sat Dec  7 10:43:44 2019
+// Update Count     : 4394
 //
 
@@ -211,4 +211,15 @@
 } // forCtrl
 
+KeywordCastExpr::Target Aggregate2Target( DeclarationNode::Aggregate aggr ) {
+	KeywordCastExpr::Target target;
+	switch ( aggr ) {
+	  case DeclarationNode::Coroutine: target = KeywordCastExpr::Coroutine; break;
+	  case DeclarationNode::Monitor: target = KeywordCastExpr::Monitor; break;
+	  case DeclarationNode::Thread: target = KeywordCastExpr::Thread; break;
+	  default: abort();
+	} // switch
+	return target;
+} // Aggregate2Target
+
 
 bool forall = false, yyy = false;						// aggregate have one or more forall qualifiers ?
@@ -365,5 +376,5 @@
 %type<decl> abstract_parameter_declaration
 
-%type<aggKey> aggregate_key
+%type<aggKey> aggregate_key aggregate_data aggregate_control
 %type<decl> aggregate_type aggregate_type_nobody
 
@@ -650,4 +661,6 @@
 	| postfix_expression '.' '[' field_name_list ']'	// CFA, tuple field selector
 		{ $$ = new ExpressionNode( build_fieldSel( $1, build_tuple( $4 ) ) ); }
+	| postfix_expression '.' aggregate_control
+		{ $$ = new ExpressionNode( build_keyword_cast( Aggregate2Target( $3 ), $1 ) ); }
 	| postfix_expression ARROW identifier
 		{ $$ = new ExpressionNode( build_pfieldSel( $1, build_varref( $3 ) ) ); }
@@ -793,13 +806,6 @@
 	| '(' type_no_function ')' cast_expression
 		{ $$ = new ExpressionNode( build_cast( $2, $4 ) ); }
-		// keyword cast cannot be grouped because of reduction in aggregate_key
-	| '(' GENERATOR '&' ')' cast_expression				// CFA
-		{ $$ = new ExpressionNode( build_keyword_cast( KeywordCastExpr::Coroutine, $5 ) ); }
-	| '(' COROUTINE '&' ')' cast_expression				// CFA
-		{ $$ = new ExpressionNode( build_keyword_cast( KeywordCastExpr::Coroutine, $5 ) ); }
-	| '(' THREAD '&' ')' cast_expression				// CFA
-		{ $$ = new ExpressionNode( build_keyword_cast( KeywordCastExpr::Thread, $5 ) ); }
-	| '(' MONITOR '&' ')' cast_expression				// CFA
-		{ $$ = new ExpressionNode( build_keyword_cast( KeywordCastExpr::Monitor, $5 ) ); }
+	| '(' aggregate_control '&' ')' cast_expression		// CFA
+		{ $$ = new ExpressionNode( build_keyword_cast( Aggregate2Target( $2 ), $5 ) ); }
 		// VIRTUAL cannot be opt because of look ahead issues
 	| '(' VIRTUAL ')' cast_expression					// CFA
@@ -1423,7 +1429,7 @@
 asm_operand:											// GCC
 	string_literal '(' constant_expression ')'
-		{ $$ = new ExpressionNode( new AsmExpr( maybeMoveBuild< Expression >( (ExpressionNode *)nullptr ), $1, maybeMoveBuild< Expression >( $3 ) ) ); }
-	| '[' constant_expression ']' string_literal '(' constant_expression ')'
-		{ $$ = new ExpressionNode( new AsmExpr( maybeMoveBuild< Expression >( $2 ), $4, maybeMoveBuild< Expression >( $6 ) ) ); }
+		{ $$ = new ExpressionNode( new AsmExpr( nullptr, $1, maybeMoveBuild< Expression >( $3 ) ) ); }
+	| '[' IDENTIFIER ']' string_literal '(' constant_expression ')'
+		{ $$ = new ExpressionNode( new AsmExpr( $2, $4, maybeMoveBuild< Expression >( $6 ) ) ); }
 	;
 
@@ -2059,11 +2065,19 @@
 
 aggregate_key:
+	aggregate_data
+	| aggregate_control
+	;
+
+aggregate_data:
 	STRUCT
 		{ yyy = true; $$ = DeclarationNode::Struct; }
 	| UNION
 		{ yyy = true; $$ = DeclarationNode::Union; }
-	| EXCEPTION
+	| EXCEPTION											// CFA
 		{ yyy = true; $$ = DeclarationNode::Exception; }
-	| GENERATOR
+	;
+
+aggregate_control:										// CFA
+	GENERATOR
 		{ yyy = true; $$ = DeclarationNode::Coroutine; }
 	| COROUTINE
@@ -2096,4 +2110,6 @@
 			distInl( $3 );
 		}
+	| INLINE aggregate_control ';'						// CFA
+	   	{ SemanticError( yylloc, "INLINE aggregate control currently unimplemented." ); $$ = nullptr; }
 	| typedef_declaration ';'							// CFA
 	| cfa_field_declaring_list ';'						// CFA, new style field declaration
Index: src/ResolvExpr/AlternativeFinder.cc
===================================================================
--- src/ResolvExpr/AlternativeFinder.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/ResolvExpr/AlternativeFinder.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -69,4 +69,5 @@
 		void postvisit( CastExpr * castExpr );
 		void postvisit( VirtualCastExpr * castExpr );
+		void postvisit( KeywordCastExpr * castExpr );
 		void postvisit( UntypedMemberExpr * memberExpr );
 		void postvisit( MemberExpr * memberExpr );
@@ -1255,4 +1256,61 @@
 	}
 
+	void AlternativeFinder::Finder::postvisit( KeywordCastExpr * castExpr ) {
+		assertf( castExpr->get_result(), "Cast target should have been set in Validate." );
+		auto ref = dynamic_cast<ReferenceType*>(castExpr->get_result());
+		assert(ref);
+		auto inst = dynamic_cast<StructInstType*>(ref->base);
+		assert(inst);
+		auto target = inst->baseStruct;
+
+		AlternativeFinder finder( indexer, env );
+
+		auto pick_alternatives = [target, this](AltList & found, bool expect_ref) {
+			for(auto & alt : found) {
+				Type * expr = alt.expr->get_result();
+				if(expect_ref) {
+					auto res = dynamic_cast<ReferenceType*>(expr);
+					if(!res) { continue; }
+					expr = res->base;
+				}
+
+				if(auto insttype = dynamic_cast<TypeInstType*>(expr)) {
+					auto td = alt.env.lookup(insttype->name);
+					if(!td) { continue; }
+					expr = td->type;
+				}
+
+				if(auto base = dynamic_cast<StructInstType*>(expr)) {
+					if(base->baseStruct == target) {
+						alternatives.push_back(
+							std::move(alt)
+						);
+					}
+				}
+			}
+		};
+
+		try {
+			// Attempt 1 : turn (thread&)X into (thread_desc&)X.__thrd
+			// Clone is purely for memory management
+			std::unique_ptr<Expression> tech1 { new UntypedMemberExpr(new NameExpr(castExpr->concrete_target.field), castExpr->arg->clone()) };
+
+			// don't prune here, since it's guaranteed all alternatives will have the same type
+			finder.findWithoutPrune( tech1.get() );
+			pick_alternatives(finder.alternatives, false);
+
+			return;
+		} catch(SemanticErrorException & ) {}
+
+		// Fallback : turn (thread&)X into (thread_desc&)get_thread(X)
+		std::unique_ptr<Expression> fallback { UntypedExpr::createDeref( new UntypedExpr(new NameExpr(castExpr->concrete_target.getter), { castExpr->arg->clone() })) };
+		// don't prune here, since it's guaranteed all alternatives will have the same type
+		finder.findWithoutPrune( fallback.get() );
+
+		pick_alternatives(finder.alternatives, true);
+
+		// Whatever happens here, we have no more fallbacks
+	}
+
 	namespace {
 		/// Gets name from untyped member expression (member must be NameExpr)
Index: src/ResolvExpr/Resolver.cc
===================================================================
--- src/ResolvExpr/Resolver.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/ResolvExpr/Resolver.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -485,7 +485,4 @@
 		visit_children = false;
 		findVoidExpression( asmExpr->operand, indexer );
-		if ( asmExpr->get_inout() ) {
-			findVoidExpression( asmExpr->inout, indexer );
-		} // if
 	}
 
@@ -1365,9 +1362,4 @@
 		asmExpr = ast::mutate_field(
 			asmExpr, &ast::AsmExpr::operand, findVoidExpression( asmExpr->operand, symtab ) );
-
-		if ( asmExpr->inout ) {
-			asmExpr = ast::mutate_field(
-				asmExpr, &ast::AsmExpr::inout, findVoidExpression( asmExpr->inout, symtab ) );
-		}
 
 		return asmExpr;
Index: src/SynTree/Expression.cc
===================================================================
--- src/SynTree/Expression.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/SynTree/Expression.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -527,10 +527,10 @@
 }
 
-AsmExpr::AsmExpr( const AsmExpr & other ) : Expression( other ), inout( maybeClone( other.inout ) ), constraint( maybeClone( other.constraint ) ), operand( maybeClone( other.operand ) ) {}
+AsmExpr::AsmExpr( const AsmExpr & other ) : Expression( other ), inout( other.inout ), constraint( maybeClone( other.constraint ) ), operand( maybeClone( other.operand ) ) {}
 
 
 void AsmExpr::print( std::ostream & os, Indenter indent ) const {
 	os << "Asm Expression: " << std::endl;
-	if ( inout ) inout->print( os, indent+1 );
+	if ( !inout.empty() ) os <<  "[" << inout << "] ";
 	if ( constraint ) constraint->print( os, indent+1 );
 	if ( operand ) operand->print( os, indent+1 );
Index: src/SynTree/Expression.h
===================================================================
--- src/SynTree/Expression.h	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/SynTree/Expression.h	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -231,5 +231,11 @@
 	enum Target {
 		Coroutine, Thread, Monitor, NUMBER_OF_TARGETS
-	} target;
+	};
+	struct Concrete {
+		std::string field;
+		std::string getter;
+	};
+	Target target;
+	Concrete concrete_target;
 
 	KeywordCastExpr( Expression * arg, Target target );
@@ -575,20 +581,11 @@
 class AsmExpr : public Expression {
   public:
-	Expression * inout;
+	std::string inout;
 	Expression * constraint;
 	Expression * operand;
 
-	AsmExpr( Expression * inout, Expression * constraint, Expression * operand ) : inout( inout ), constraint( constraint ), operand( operand ) {}
+	AsmExpr( const std::string * _inout, Expression * constraint, Expression * operand ) : inout( _inout ? *_inout : "" ), constraint( constraint ), operand( operand ) { delete _inout; }
 	AsmExpr( const AsmExpr & other );
-	virtual ~AsmExpr() { delete inout; delete constraint; delete operand; };
-
-	Expression * get_inout() const { return inout; }
-	void set_inout( Expression * newValue ) { inout = newValue; }
-
-	Expression * get_constraint() const { return constraint; }
-	void set_constraint( Expression * newValue ) { constraint = newValue; }
-
-	Expression * get_operand() const { return operand; }
-	void set_operand( Expression * newValue ) { operand = newValue; }
+	virtual ~AsmExpr() { delete constraint; delete operand; };
 
 	virtual AsmExpr * clone() const override { return new AsmExpr( * this ); }
Index: src/cfa.make
===================================================================
--- src/cfa.make	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/cfa.make	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -4,6 +4,5 @@
 LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
-	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) \
-	$(AM_CFLAGS) $(CFLAGS)
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)
 
 AM_V_CFA = $(am__v_CFA_@AM_V@)
Index: src/main.cc
===================================================================
--- src/main.cc	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ src/main.cc	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -407,8 +407,10 @@
 
 
-static const char optstring[] = ":hlLmNnpP:S:twW:D:";
+static const char optstring[] = ":c:ghlLmNnpP:S:twW:D:";
 
 enum { PreludeDir = 128 };
 static struct option long_opts[] = {
+	{ "colors", required_argument, nullptr, 'c' },
+	{ "gdb", no_argument, nullptr, 'g' },
 	{ "help", no_argument, nullptr, 'h' },
 	{ "libcfa", no_argument, nullptr, 'l' },
@@ -422,5 +424,4 @@
 	{ "statistics", required_argument, nullptr, 'S' },
 	{ "tree", no_argument, nullptr, 't' },
-	{ "gdb", no_argument, nullptr, 'g' },
 	{ "", no_argument, nullptr, 0 },					// -w
 	{ "", no_argument, nullptr, 0 },					// -W
@@ -430,19 +431,20 @@
 
 static const char * description[] = {
-	"print help message",								// -h
-	"generate libcfa.c",								// -l
-	"generate line marks",								// -L
-	"do not replace main",								// -m
-	"do not generate line marks",						// -N
-	"do not read prelude",								// -n
+	"diagnostic color: never, always, or auto.",          // -c
+	"wait for gdb to attach",                             // -g
+	"print help message",                                 // -h
+	"generate libcfa.c",                                  // -l
+	"generate line marks",                                // -L
+	"do not replace main",                                // -m
+	"do not generate line marks",                         // -N
+	"do not read prelude",                                // -n
 	"generate prototypes for prelude functions",		// -p
-	"print",											// -P
+	"print",                                              // -P
 	"<directory> prelude directory for debug/nodebug",	// no flag
 	"<option-list> enable profiling information:\n          counters,heap,time,all,none", // -S
-	"building cfa standard lib",									// -t
-	"wait for gdb to attach",									// -g
-	"",													// -w
-	"",													// -W
-	"",													// -D
+	"building cfa standard lib",                          // -t
+	"",                                                   // -w
+	"",                                                   // -W
+	"",                                                   // -D
 }; // description
 
@@ -512,4 +514,13 @@
 	while ( (c = getopt_long( argc, argv, optstring, long_opts, nullptr )) != -1 ) {
 		switch ( c ) {
+		  case 'c':										// diagnostic colors
+			if ( strcmp( optarg, "always" ) == 0 ) {
+				ErrorHelpers::colors = ErrorHelpers::Colors::Always;
+			} else if ( strcmp( optarg, "never" ) == 0 ) {
+				ErrorHelpers::colors = ErrorHelpers::Colors::Never;
+			} else if ( strcmp( optarg, "auto" ) == 0 ) {
+				ErrorHelpers::colors = ErrorHelpers::Colors::Auto;
+			} // if
+			break;
 		  case 'h':										// help message
 			usage( argv );								// no return
Index: tests/.expect/alloc.txt
===================================================================
--- tests/.expect/alloc.txt	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/.expect/alloc.txt	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -30,10 +30,10 @@
 CFA resize array alloc
 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 
-CFA resize array alloc, fill
+CFA resize array alloc
 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 
-CFA resize array alloc, fill
+CFA resize array alloc
 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 
 CFA resize array alloc, fill
-0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 
+0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 
 
 C   memalign 42 42.5
Index: tests/.expect/gccExtensions.x64.txt
===================================================================
--- tests/.expect/gccExtensions.x64.txt	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/.expect/gccExtensions.x64.txt	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -12,5 +12,5 @@
     asm volatile ( "mov %1, %0\n\t" "add $1, %0" : "=" "r" ( _X3dsti_2 ) :  :  );
     asm volatile ( "mov %1, %0\n\t" "add $1, %0" : "=r" ( _X3dsti_2 ) : "r" ( _X3srci_2 ) :  );
-    asm ( "mov %1, %0\n\t" "add $1, %0" : "=r" ( _X3dsti_2 ), "=r" ( _X3srci_2 ) : [ _X3srci_2 ] "r" ( _X3dsti_2 ) : "r0" );
+    asm ( "mov %1, %0\n\t" "add $1, %0" : "=r" ( _X3dsti_2 ), "=r" ( _X3srci_2 ) : [ src ] "r" ( _X3dsti_2 ) : "r0" );
     L2: L1: asm goto ( "frob %%r5, %1; jc %l[L1]; mov (%2), %%r5" :  : "r" ( _X3srci_2 ), "r" ( (&_X3dsti_2) ) : "r5", "memory" : L1, L2 );
     double _Complex _X2c1Cd_2;
Index: tests/.expect/gccExtensions.x86.txt
===================================================================
--- tests/.expect/gccExtensions.x86.txt	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/.expect/gccExtensions.x86.txt	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -12,5 +12,5 @@
     asm volatile ( "mov %1, %0\n\t" "add $1, %0" : "=" "r" ( _X3dsti_2 ) :  :  );
     asm volatile ( "mov %1, %0\n\t" "add $1, %0" : "=r" ( _X3dsti_2 ) : "r" ( _X3srci_2 ) :  );
-    asm ( "mov %1, %0\n\t" "add $1, %0" : "=r" ( _X3dsti_2 ), "=r" ( _X3srci_2 ) : [ _X3srci_2 ] "r" ( _X3dsti_2 ) : "r0" );
+    asm ( "mov %1, %0\n\t" "add $1, %0" : "=r" ( _X3dsti_2 ), "=r" ( _X3srci_2 ) : [ src ] "r" ( _X3dsti_2 ) : "r0" );
     L2: L1: asm goto ( "frob %%r5, %1; jc %l[L1]; mov (%2), %%r5" :  : "r" ( _X3srci_2 ), "r" ( (&_X3dsti_2) ) : "r5", "memory" : L1, L2 );
     double _Complex _X2c1Cd_2;
Index: tests/.expect/references.txt
===================================================================
--- tests/.expect/references.txt	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/.expect/references.txt	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -36,4 +36,5 @@
 3
 3 9 { 1., 7. }, [1, 2, 3]
+4
 Destructing a Y
 Destructing a Y
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/Makefile.am	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -46,5 +46,5 @@
 
 # adjust CC to current flags
-CC = $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH),$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CC = $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
 CFACC = $(CC)
 
@@ -53,5 +53,5 @@
 
 # adjusted CC but without the actual distcc call
-CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH),$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
 
 PRETTY_PATH=mkdir -p $(dir $(abspath ${@})) && cd ${srcdir} &&
Index: tests/Makefile.in
===================================================================
--- tests/Makefile.in	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/Makefile.in	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -214,5 +214,5 @@
 
 # adjust CC to current flags
-CC = $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH),$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CC = $(if $(DISTCC_CFA_PATH),distcc $(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
 CCAS = @CCAS@
 CCASDEPMODE = @CCASDEPMODE@
@@ -358,6 +358,5 @@
 LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
-	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) \
-	$(AM_CFLAGS) $(CFLAGS)
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)
 
 AM_V_CFA = $(am__v_CFA_@AM_V@)
@@ -405,5 +404,5 @@
 
 # adjusted CC but without the actual distcc call
-CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH),$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
+CFACCLOCAL = $(if $(DISTCC_CFA_PATH),$(DISTCC_CFA_PATH) ${ARCH_FLAGS},$(TARGET_CFA) ${DEBUG_FLAGS} ${ARCH_FLAGS})
 PRETTY_PATH = mkdir -p $(dir $(abspath ${@})) && cd ${srcdir} &&
 avl_test_SOURCES = avltree/avl_test.cfa avltree/avl0.cfa avltree/avl1.cfa avltree/avl2.cfa avltree/avl3.cfa avltree/avl4.cfa avltree/avl-private.cfa
Index: tests/alloc.cfa
===================================================================
--- tests/alloc.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/alloc.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Wed Feb  3 07:56:22 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Oct 20 21:45:21 2019
-// Update Count     : 391
+// Last Modified On : Fri Nov 22 15:34:19 2019
+// Update Count     : 404
 //
 
@@ -126,5 +126,5 @@
 
 	p = alloc( p, 2 * dim );                            // CFA resize array alloc
-	for ( i; dim ~ 2 * dim ) { p[i] = 0x1010101; }
+	for ( i; dim ~ 2 * dim ) { p[i] = 0x1010101; }		// fill upper part
 	printf( "CFA resize array alloc\n" );
 	for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
@@ -139,5 +139,5 @@
 
 	p = alloc_set( p, 3 * dim, fill );					// CFA resize array alloc, fill
-	printf( "CFA resize array alloc, fill\n" );
+	printf( "CFA resize array alloc\n" );
 	for ( i; 3 * dim ) { printf( "%#x ", p[i] ); }
 	printf( "\n" );
@@ -145,5 +145,5 @@
 
 	p = alloc_set( p, dim, fill );						// CFA resize array alloc, fill
-	printf( "CFA resize array alloc, fill\n" );
+	printf( "CFA resize array alloc\n" );
 	for ( i; dim ) { printf( "%#x ", p[i] ); }
 	printf( "\n" );
Index: tests/builtins/sync.cfa
===================================================================
--- tests/builtins/sync.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/builtins/sync.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -4,222 +4,160 @@
 void foo() {
 	volatile _Bool * vpB = 0; _Bool vB = 0;
-	volatile char * vp1 = 0; char * rp1 = 0; char v1 = 0;
-	volatile short * vp2 = 0; short * rp2 = 0; short v2 = 0;
-	volatile int * vp4 = 0; int * rp4 = 0; int v4 = 0;
-	volatile long long int * vp8 = 0; long long int * rp8 = 0; long long int v8 = 0;
-	#if defined(__SIZEOF_INT128__)
-	volatile __int128 * vp16 = 0; __int128 * rp16 = 0; __int128 v16 = 0;
+	volatile char * vpc = 0; char * rpc = 0; char vc = 0;
+	volatile short * vps = 0; short * rps = 0; short vs = 0;
+	volatile int * vpi = 0; int * rpi = 0; int vi = 0;
+	volatile long int * vpl = 0; long int * rpl = 0; long int vl = 0;
+	volatile long long int * vpll = 0; long long int * rpll = 0; long long int vll = 0;
+	#if defined(__SIZEOF_INT128__)
+	volatile __int128 * vplll = 0; __int128 * rplll = 0; __int128 vlll = 0;
 	#endif
 	struct type * volatile * vpp = 0; struct type ** rpp = 0; struct type * vp = 0;
 
-	{ char ret; ret = __sync_fetch_and_add(vp1, v1); }
-	{ char ret; ret = __sync_fetch_and_add_1(vp1, v1); }
-	{ short ret; ret = __sync_fetch_and_add(vp2, v2); }
-	{ short ret; ret = __sync_fetch_and_add_2(vp2, v2); }
-	{ int ret; ret = __sync_fetch_and_add(vp4, v4); }
-	{ int ret; ret = __sync_fetch_and_add_4(vp4, v4); }
-	{ long long int ret; ret = __sync_fetch_and_add(vp8, v8); }
-	{ long long int ret; ret = __sync_fetch_and_add_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_fetch_and_add(vp16, v16); }
-	{ __int128 ret; ret = __sync_fetch_and_add_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_fetch_and_sub(vp1, v1); }
-	{ char ret; ret = __sync_fetch_and_sub_1(vp1, v1); }
-	{ short ret; ret = __sync_fetch_and_sub(vp2, v2); }
-	{ short ret; ret = __sync_fetch_and_sub_2(vp2, v2); }
-	{ int ret; ret = __sync_fetch_and_sub(vp4, v4); }
-	{ int ret; ret = __sync_fetch_and_sub_4(vp4, v4); }
-	{ long long int ret; ret = __sync_fetch_and_sub(vp8, v8); }
-	{ long long int ret; ret = __sync_fetch_and_sub_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_fetch_and_sub(vp16, v16); }
-	{ __int128 ret; ret = __sync_fetch_and_sub_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_fetch_and_or(vp1, v1); }
-	{ char ret; ret = __sync_fetch_and_or_1(vp1, v1); }
-	{ short ret; ret = __sync_fetch_and_or(vp2, v2); }
-	{ short ret; ret = __sync_fetch_and_or_2(vp2, v2); }
-	{ int ret; ret = __sync_fetch_and_or(vp4, v4); }
-	{ int ret; ret = __sync_fetch_and_or_4(vp4, v4); }
-	{ long long int ret; ret = __sync_fetch_and_or(vp8, v8); }
-	{ long long int ret; ret = __sync_fetch_and_or_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_fetch_and_or(vp16, v16); }
-	{ __int128 ret; ret = __sync_fetch_and_or_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_fetch_and_and(vp1, v1); }
-	{ char ret; ret = __sync_fetch_and_and_1(vp1, v1); }
-	{ short ret; ret = __sync_fetch_and_and(vp2, v2); }
-	{ short ret; ret = __sync_fetch_and_and_2(vp2, v2); }
-	{ int ret; ret = __sync_fetch_and_and(vp4, v4); }
-	{ int ret; ret = __sync_fetch_and_and_4(vp4, v4); }
-	{ long long int ret; ret = __sync_fetch_and_and(vp8, v8); }
-	{ long long int ret; ret = __sync_fetch_and_and_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_fetch_and_and(vp16, v16); }
-	{ __int128 ret; ret = __sync_fetch_and_and_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_fetch_and_xor(vp1, v1); }
-	{ char ret; ret = __sync_fetch_and_xor_1(vp1, v1); }
-	{ short ret; ret = __sync_fetch_and_xor(vp2, v2); }
-	{ short ret; ret = __sync_fetch_and_xor_2(vp2, v2); }
-	{ int ret; ret = __sync_fetch_and_xor(vp4, v4); }
-	{ int ret; ret = __sync_fetch_and_xor_4(vp4, v4); }
-	{ long long int ret; ret = __sync_fetch_and_xor(vp8, v8); }
-	{ long long int ret; ret = __sync_fetch_and_xor_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_fetch_and_xor(vp16, v16); }
-	{ __int128 ret; ret = __sync_fetch_and_xor_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_fetch_and_nand(vp1, v1); }
-	{ char ret; ret = __sync_fetch_and_nand_1(vp1, v1); }
-	{ short ret; ret = __sync_fetch_and_nand(vp2, v2); }
-	{ short ret; ret = __sync_fetch_and_nand_2(vp2, v2); }
-	{ int ret; ret = __sync_fetch_and_nand(vp4, v4); }
-	{ int ret; ret = __sync_fetch_and_nand_4(vp4, v4); }
-	{ long long int ret; ret = __sync_fetch_and_nand(vp8, v8); }
-	{ long long int ret; ret = __sync_fetch_and_nand_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_fetch_and_nand(vp16, v16); }
-	{ __int128 ret; ret = __sync_fetch_and_nand_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_add_and_fetch(vp1, v1); }
-	{ char ret; ret = __sync_add_and_fetch_1(vp1, v1); }
-	{ short ret; ret = __sync_add_and_fetch(vp2, v2); }
-	{ short ret; ret = __sync_add_and_fetch_2(vp2, v2); }
-	{ int ret; ret = __sync_add_and_fetch(vp4, v4); }
-	{ int ret; ret = __sync_add_and_fetch_4(vp4, v4); }
-	{ long long int ret; ret = __sync_add_and_fetch(vp8, v8); }
-	{ long long int ret; ret = __sync_add_and_fetch_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_add_and_fetch(vp16, v16); }
-	{ __int128 ret; ret = __sync_add_and_fetch_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_sub_and_fetch(vp1, v1); }
-	{ char ret; ret = __sync_sub_and_fetch_1(vp1, v1); }
-	{ short ret; ret = __sync_sub_and_fetch(vp2, v2); }
-	{ short ret; ret = __sync_sub_and_fetch_2(vp2, v2); }
-	{ int ret; ret = __sync_sub_and_fetch(vp4, v4); }
-	{ int ret; ret = __sync_sub_and_fetch_4(vp4, v4); }
-	{ long long int ret; ret = __sync_sub_and_fetch(vp8, v8); }
-	{ long long int ret; ret = __sync_sub_and_fetch_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_sub_and_fetch(vp16, v16); }
-	{ __int128 ret; ret = __sync_sub_and_fetch_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_or_and_fetch(vp1, v1); }
-	{ char ret; ret = __sync_or_and_fetch_1(vp1, v1); }
-	{ short ret; ret = __sync_or_and_fetch(vp2, v2); }
-	{ short ret; ret = __sync_or_and_fetch_2(vp2, v2); }
-	{ int ret; ret = __sync_or_and_fetch(vp4, v4); }
-	{ int ret; ret = __sync_or_and_fetch_4(vp4, v4); }
-	{ long long int ret; ret = __sync_or_and_fetch(vp8, v8); }
-	{ long long int ret; ret = __sync_or_and_fetch_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_or_and_fetch(vp16, v16); }
-	{ __int128 ret; ret = __sync_or_and_fetch_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_and_and_fetch(vp1, v1); }
-	{ char ret; ret = __sync_and_and_fetch_1(vp1, v1); }
-	{ short ret; ret = __sync_and_and_fetch(vp2, v2); }
-	{ short ret; ret = __sync_and_and_fetch_2(vp2, v2); }
-	{ int ret; ret = __sync_and_and_fetch(vp4, v4); }
-	{ int ret; ret = __sync_and_and_fetch_4(vp4, v4); }
-	{ long long int ret; ret = __sync_and_and_fetch(vp8, v8); }
-	{ long long int ret; ret = __sync_and_and_fetch_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_and_and_fetch(vp16, v16); }
-	{ __int128 ret; ret = __sync_and_and_fetch_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_xor_and_fetch(vp1, v1); }
-	{ char ret; ret = __sync_xor_and_fetch_1(vp1, v1); }
-	{ short ret; ret = __sync_xor_and_fetch(vp2, v2); }
-	{ short ret; ret = __sync_xor_and_fetch_2(vp2, v2); }
-	{ int ret; ret = __sync_xor_and_fetch(vp4, v4); }
-	{ int ret; ret = __sync_xor_and_fetch_4(vp4, v4); }
-	{ long long int ret; ret = __sync_xor_and_fetch(vp8, v8); }
-	{ long long int ret; ret = __sync_xor_and_fetch_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_xor_and_fetch(vp16, v16); }
-	{ __int128 ret; ret = __sync_xor_and_fetch_16(vp16, v16); }
-	#endif
-
-	{ char ret; ret = __sync_nand_and_fetch(vp1, v1); }
-	{ char ret; ret = __sync_nand_and_fetch_1(vp1, v1); }
-	{ short ret; ret = __sync_nand_and_fetch(vp2, v2); }
-	{ short ret; ret = __sync_nand_and_fetch_2(vp2, v2); }
-	{ int ret; ret = __sync_nand_and_fetch(vp4, v4); }
-	{ int ret; ret = __sync_nand_and_fetch_4(vp4, v4); }
-	{ long long int ret; ret = __sync_nand_and_fetch(vp8, v8); }
-	{ long long int ret; ret = __sync_nand_and_fetch_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_nand_and_fetch(vp16, v16); }
-	{ __int128 ret; ret = __sync_nand_and_fetch_16(vp16, v16); }
-	#endif
-
-	{ _Bool ret; ret = __sync_bool_compare_and_swap(vp1, v1, v1); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap_1(vp1, v1, v1); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap(vp2, v2, v2); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap_2(vp2, v2, v2); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap(vp4, v4, v4); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap_4(vp4, v4, v4); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap(vp8, v8, v8); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap_8(vp8, v8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ _Bool ret; ret = __sync_bool_compare_and_swap(vp16, v16, v16); }
-	{ _Bool ret; ret = __sync_bool_compare_and_swap_16(vp16, v16,v16); }
+	{ char ret; ret = __sync_fetch_and_add(vpc, vc); }
+	{ short ret; ret = __sync_fetch_and_add(vps, vs); }
+	{ int ret; ret = __sync_fetch_and_add(vpi, vi); }
+	{ long int ret; ret = __sync_fetch_and_add(vpl, vl); }
+	{ long long int ret; ret = __sync_fetch_and_add(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_fetch_and_add(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_fetch_and_sub(vpc, vc); }
+	{ short ret; ret = __sync_fetch_and_sub(vps, vs); }
+	{ int ret; ret = __sync_fetch_and_sub(vpi, vi); }
+	{ long int ret; ret = __sync_fetch_and_sub(vpl, vl); }
+	{ long long int ret; ret = __sync_fetch_and_sub(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_fetch_and_sub(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_fetch_and_or(vpc, vc); }
+	{ short ret; ret = __sync_fetch_and_or(vps, vs); }
+	{ int ret; ret = __sync_fetch_and_or(vpi, vi); }
+	{ long int ret; ret = __sync_fetch_and_or(vpl, vl); }
+	{ long long int ret; ret = __sync_fetch_and_or(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_fetch_and_or(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_fetch_and_and(vpc, vc); }
+	{ short ret; ret = __sync_fetch_and_and(vps, vs); }
+	{ int ret; ret = __sync_fetch_and_and(vpi, vi); }
+	{ long int ret; ret = __sync_fetch_and_and(vpl, vl); }
+	{ long long int ret; ret = __sync_fetch_and_and(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_fetch_and_and(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_fetch_and_xor(vpc, vc); }
+	{ short ret; ret = __sync_fetch_and_xor(vps, vs); }
+	{ int ret; ret = __sync_fetch_and_xor(vpi, vi); }
+	{ long int ret; ret = __sync_fetch_and_xor(vpl, vl); }
+	{ long long int ret; ret = __sync_fetch_and_xor(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_fetch_and_xor(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_fetch_and_nand(vpc, vc); }
+	{ short ret; ret = __sync_fetch_and_nand(vps, vs); }
+	{ int ret; ret = __sync_fetch_and_nand(vpi, vi); }
+	{ long int ret; ret = __sync_fetch_and_nand(vpl, vl); }
+	{ long long int ret; ret = __sync_fetch_and_nand(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_fetch_and_nand(vplll, vlll); }
+	{ __int128 ret; ret = __sync_fetch_and_nand_16(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_add_and_fetch(vpc, vc); }
+	{ short ret; ret = __sync_add_and_fetch(vps, vs); }
+	{ int ret; ret = __sync_add_and_fetch(vpi, vi); }
+	{ long int ret; ret = __sync_add_and_fetch(vpl, vl); }
+	{ long long int ret; ret = __sync_add_and_fetch(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_add_and_fetch(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_sub_and_fetch(vpc, vc); }
+	{ short ret; ret = __sync_sub_and_fetch(vps, vs); }
+	{ int ret; ret = __sync_sub_and_fetch(vpi, vi); }
+	{ long int ret; ret = __sync_sub_and_fetch(vpl, vl); }
+	{ long long int ret; ret = __sync_sub_and_fetch(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_sub_and_fetch(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_or_and_fetch(vpc, vc); }
+	{ short ret; ret = __sync_or_and_fetch(vps, vs); }
+	{ int ret; ret = __sync_or_and_fetch(vpi, vi); }
+	{ long int ret; ret = __sync_or_and_fetch(vpl, vl); }
+	{ long long int ret; ret = __sync_or_and_fetch(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_or_and_fetch(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_and_and_fetch(vpc, vc); }
+	{ short ret; ret = __sync_and_and_fetch(vps, vs); }
+	{ int ret; ret = __sync_and_and_fetch(vpi, vi); }
+	{ long int ret; ret = __sync_and_and_fetch(vpl, vl); }
+	{ long long int ret; ret = __sync_and_and_fetch(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_and_and_fetch(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_xor_and_fetch(vpc, vc); }
+	{ short ret; ret = __sync_xor_and_fetch(vps, vs); }
+	{ int ret; ret = __sync_xor_and_fetch(vpi, vi); }
+	{ long int ret; ret = __sync_xor_and_fetch(vpl, vl); }
+	{ long long int ret; ret = __sync_xor_and_fetch(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_xor_and_fetch(vplll, vlll); }
+	#endif
+
+	{ char ret; ret = __sync_nand_and_fetch(vpc, vc); }
+	{ short ret; ret = __sync_nand_and_fetch(vps, vs); }
+	{ int ret; ret = __sync_nand_and_fetch(vpi, vi); }
+	{ long int ret; ret = __sync_nand_and_fetch(vpl, vl); }
+	{ long long int ret; ret = __sync_nand_and_fetch(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_nand_and_fetch(vplll, vlll); }
+	#endif
+
+	{ _Bool ret; ret = __sync_bool_compare_and_swap(vpc, vc, vc); }
+	{ _Bool ret; ret = __sync_bool_compare_and_swap(vps, vs, vs); }
+	{ _Bool ret; ret = __sync_bool_compare_and_swap(vpi, vi, vi); }
+	{ _Bool ret; ret = __sync_bool_compare_and_swap(vpl, vl, vl); }
+	{ _Bool ret; ret = __sync_bool_compare_and_swap(vpll, vll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ _Bool ret; ret = __sync_bool_compare_and_swap(vplll, vlll, vlll); }
 	#endif
 	{ _Bool ret; ret = __sync_bool_compare_and_swap(vpp, vp, vp); }
 
-	{ char ret; ret = __sync_val_compare_and_swap(vp1, v1, v1); }
-	{ char ret; ret = __sync_val_compare_and_swap_1(vp1, v1, v1); }
-	{ short ret; ret = __sync_val_compare_and_swap(vp2, v2, v2); }
-	{ short ret; ret = __sync_val_compare_and_swap_2(vp2, v2, v2); }
-	{ int ret; ret = __sync_val_compare_and_swap(vp4, v4, v4); }
-	{ int ret; ret = __sync_val_compare_and_swap_4(vp4, v4, v4); }
-	{ long long int ret; ret = __sync_val_compare_and_swap(vp8, v8, v8); }
-	{ long long int ret; ret = __sync_val_compare_and_swap_8(vp8, v8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_val_compare_and_swap(vp16, v16, v16); }
-	{ __int128 ret; ret = __sync_val_compare_and_swap_16(vp16, v16,v16); }
+	{ char ret; ret = __sync_val_compare_and_swap(vpc, vc, vc); }
+	{ short ret; ret = __sync_val_compare_and_swap(vps, vs, vs); }
+	{ int ret; ret = __sync_val_compare_and_swap(vpi, vi, vi); }
+	{ long int ret; ret = __sync_val_compare_and_swap(vpl, vl, vl); }
+	{ long long int ret; ret = __sync_val_compare_and_swap(vpll, vll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_val_compare_and_swap(vplll, vlll, vlll); }
 	#endif
 	{ struct type * ret; ret = __sync_val_compare_and_swap(vpp, vp, vp); }
 
 
-	{ char ret; ret = __sync_lock_test_and_set(vp1, v1); }
-	{ char ret; ret = __sync_lock_test_and_set_1(vp1, v1); }
-	{ short ret; ret = __sync_lock_test_and_set(vp2, v2); }
-	{ short ret; ret = __sync_lock_test_and_set_2(vp2, v2); }
-	{ int ret; ret = __sync_lock_test_and_set(vp4, v4); }
-	{ int ret; ret = __sync_lock_test_and_set_4(vp4, v4); }
-	{ long long int ret; ret = __sync_lock_test_and_set(vp8, v8); }
-	{ long long int ret; ret = __sync_lock_test_and_set_8(vp8, v8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __sync_lock_test_and_set(vp16, v16); }
-	{ __int128 ret; ret = __sync_lock_test_and_set_16(vp16, v16); }
-	#endif
-
-	{ __sync_lock_release(vp1); }
-	{ __sync_lock_release_1(vp1); }
-	{ __sync_lock_release(vp2); }
-	{ __sync_lock_release_2(vp2); }
-	{ __sync_lock_release(vp4); }
-	{ __sync_lock_release_4(vp4); }
-	{ __sync_lock_release(vp8); }
-	{ __sync_lock_release_8(vp8); }
-	#if defined(__SIZEOF_INT128__)
-	{ __sync_lock_release(vp16); }
-	{ __sync_lock_release_16(vp16); }
+	{ char ret; ret = __sync_lock_test_and_set(vpc, vc); }
+	{ short ret; ret = __sync_lock_test_and_set(vps, vs); }
+	{ int ret; ret = __sync_lock_test_and_set(vpi, vi); }
+	{ long int ret; ret = __sync_lock_test_and_set(vpl, vl); }
+	{ long long int ret; ret = __sync_lock_test_and_set(vpll, vll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __sync_lock_test_and_set(vplll, vlll); }
+	#endif
+
+	{ __sync_lock_release(vpc); }
+	{ __sync_lock_release(vps); }
+	{ __sync_lock_release(vpi); }
+	{ __sync_lock_release(vpl); }
+	{ __sync_lock_release(vpll); }
+	#if defined(__SIZEOF_INT128__)
+	{ __sync_lock_release(vplll); }
 	#endif
 
@@ -230,246 +168,186 @@
 
 	{ _Bool ret; ret = __atomic_test_and_set(vpB, vB); }
-	{ _Bool ret; ret = __atomic_test_and_set(vp1, v1); }
+	{ _Bool ret; ret = __atomic_test_and_set(vpc, vc); }
 	{ __atomic_clear(vpB, vB); }
-	{ __atomic_clear(vp1, v1); }
-
-	{ char ret; ret = __atomic_exchange_n(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_exchange_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; __atomic_exchange(vp1, &v1, &ret, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_exchange_n(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_exchange_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; __atomic_exchange(vp2, &v2, &ret, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_exchange_n(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_exchange_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; __atomic_exchange(vp4, &v4, &ret, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_exchange_n(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_exchange_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; __atomic_exchange(vp8, &v8, &ret, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_exchange_n(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_exchange_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; __atomic_exchange(vp16, &v16, &ret, __ATOMIC_SEQ_CST); }
+	{ __atomic_clear(vpc, vc); }
+
+	{ char ret; ret = __atomic_exchange_n(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ char ret; __atomic_exchange(vpc, &vc, &ret, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_exchange_n(vps, vs, __ATOMIC_SEQ_CST); }
+	{ short ret; __atomic_exchange(vps, &vs, &ret, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_exchange_n(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ int ret; __atomic_exchange(vpi, &vi, &ret, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_exchange_n(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long int ret; __atomic_exchange(vpl, &vl, &ret, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_exchange_n(vpll, vll, __ATOMIC_SEQ_CST); }
+	{ long long int ret; __atomic_exchange(vpll, &vll, &ret, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_exchange_n(vplll, vlll, __ATOMIC_SEQ_CST); }
+	{ __int128 ret; __atomic_exchange(vplll, &vlll, &ret, __ATOMIC_SEQ_CST); }
 	#endif
 	{ struct type * ret; ret = __atomic_exchange_n(vpp, vp, __ATOMIC_SEQ_CST); }
 	{ struct type * ret; __atomic_exchange(vpp, &vp, &ret, __ATOMIC_SEQ_CST); }
 
-	{ char ret; ret = __atomic_load_n(vp1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_load_1(vp1, __ATOMIC_SEQ_CST); }
-	{ char ret; __atomic_load(vp1, &ret, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_load_n(vp2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_load_2(vp2, __ATOMIC_SEQ_CST); }
-	{ short ret; __atomic_load(vp2, &ret, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_load_n(vp4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_load_4(vp4, __ATOMIC_SEQ_CST); }
-	{ int ret; __atomic_load(vp4, &ret, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_load_n(vp8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_load_8(vp8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; __atomic_load(vp8, &ret, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_load_n(vp16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_load_16(vp16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; __atomic_load(vp16, &ret, __ATOMIC_SEQ_CST); }
+	{ char ret; ret = __atomic_load_n(vpc, __ATOMIC_SEQ_CST); }
+	{ char ret; __atomic_load(vpc, &ret, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_load_n(vps, __ATOMIC_SEQ_CST); }
+	{ short ret; __atomic_load(vps, &ret, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_load_n(vpi, __ATOMIC_SEQ_CST); }
+	{ int ret; __atomic_load(vpi, &ret, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_load_n(vpl, __ATOMIC_SEQ_CST); }
+	{ long int ret; __atomic_load(vpl, &ret, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_load_n(vpll, __ATOMIC_SEQ_CST); }
+	{ long long int ret; __atomic_load(vpll, &ret, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_load_n(vplll, __ATOMIC_SEQ_CST); }
+	{ __int128 ret; __atomic_load(vplll, &ret, __ATOMIC_SEQ_CST); }
 	#endif
 	{ struct type * ret; ret = __atomic_load_n(vpp, __ATOMIC_SEQ_CST); }
 	{ struct type * ret; __atomic_load(vpp, &ret, __ATOMIC_SEQ_CST); }
 
-	{ _Bool ret; ret = __atomic_compare_exchange_n(vp1, rp1, v1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_1(vp1, rp1, v1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange(vp1, rp1, &v1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_n(vp2, rp2, v2, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_2(vp2, rp2, v2, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange(vp2, rp2, &v2, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_n(vp4, rp4, v4, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_4(vp4, rp4, v4, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange(vp4, rp4, &v4, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_n(vp8, rp8, v8, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_8(vp8, rp8, v8, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange(vp8, rp8, &v8, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ _Bool ret; ret = __atomic_compare_exchange_n(vp16, rp16, v16, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange_16(vp16, rp16, v16, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
-	{ _Bool ret; ret = __atomic_compare_exchange(vp16, rp16, &v16, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange_n(vpc, rpc, vc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange(vpc, rpc, &vc, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange_n(vps, rps, vs, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange(vps, rps, &vs, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange_n(vpi, rpi, vi, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange(vpi, rpi, &vi, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange_n(vpl, rpl, vl, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange(vpl, rpl, &vl, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange_n(vpll, rpll, vll, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange(vpll, rpll, &vll, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ _Bool ret; ret = __atomic_compare_exchange_n(vplll, rplll, vlll, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
+	{ _Bool ret; ret = __atomic_compare_exchange(vplll, rplll, &vlll, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
 	#endif
 	{ _Bool ret; ret = __atomic_compare_exchange_n(vpp, rpp, vp, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
 	{ _Bool ret; ret = __atomic_compare_exchange(vpp, rpp, &vp, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }
 
-	{ __atomic_store_n(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ __atomic_store(vp1, &v1, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_n(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ __atomic_store(vp2, &v2, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_n(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ __atomic_store(vp4, &v4, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_n(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ __atomic_store(vp8, &v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __atomic_store_n(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __atomic_store_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __atomic_store(vp16, &v16, __ATOMIC_SEQ_CST); }
+	{ __atomic_store_n(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ __atomic_store(vpc, &vc, __ATOMIC_SEQ_CST); }
+	{ __atomic_store_n(vps, vs, __ATOMIC_SEQ_CST); }
+	{ __atomic_store(vps, &vs, __ATOMIC_SEQ_CST); }
+	{ __atomic_store_n(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ __atomic_store(vpi, &vi, __ATOMIC_SEQ_CST); }
+	{ __atomic_store_n(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ __atomic_store(vpl, &vl, __ATOMIC_SEQ_CST); }
+	{ __atomic_store_n(vpll, vll, __ATOMIC_SEQ_CST); }
+	{ __atomic_store(vpll, &vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __atomic_store_n(vplll, vlll, __ATOMIC_SEQ_CST); }
+	{ __atomic_store(vplll, &vlll, __ATOMIC_SEQ_CST); }
 	#endif
 	{ __atomic_store_n(vpp, vp, __ATOMIC_SEQ_CST); }
 	{ __atomic_store(vpp, &vp, __ATOMIC_SEQ_CST); }
 
-	{ char ret; ret = __atomic_add_fetch(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_add_fetch_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_add_fetch(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_add_fetch_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_add_fetch(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_add_fetch_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_add_fetch(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_add_fetch_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_add_fetch(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_add_fetch_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_sub_fetch(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_sub_fetch_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_sub_fetch(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_sub_fetch_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_sub_fetch(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_sub_fetch_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_sub_fetch(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_sub_fetch_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_sub_fetch(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_sub_fetch_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_and_fetch(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_and_fetch_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_and_fetch(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_and_fetch_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_and_fetch(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_and_fetch_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_and_fetch(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_and_fetch_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_and_fetch(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_and_fetch_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_nand_fetch(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_nand_fetch_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_nand_fetch(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_nand_fetch_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_nand_fetch(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_nand_fetch_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_nand_fetch(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_nand_fetch_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_nand_fetch(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_nand_fetch_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_xor_fetch(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_xor_fetch_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_xor_fetch(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_xor_fetch_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_xor_fetch(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_xor_fetch_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_xor_fetch(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_xor_fetch_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_xor_fetch(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_xor_fetch_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_or_fetch(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_or_fetch_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_or_fetch(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_or_fetch_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_or_fetch(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_or_fetch_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_or_fetch(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_or_fetch_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_or_fetch(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_or_fetch_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_fetch_add(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_fetch_add_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_add(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_add_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_add(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_add_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_add(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_add_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_fetch_add(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_fetch_add_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_fetch_sub(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_fetch_sub_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_sub(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_sub_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_sub(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_sub_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_sub(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_sub_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_fetch_sub(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_fetch_sub_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_fetch_and(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_fetch_and_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_and(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_and_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_and(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_and_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_and(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_and_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_fetch_and(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_fetch_and_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_fetch_nand(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_fetch_nand_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_nand(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_nand_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_nand(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_nand_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_nand(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_nand_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_fetch_nand(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_fetch_nand_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_fetch_xor(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_fetch_xor_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_xor(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_xor_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_xor(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_xor_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_xor(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_xor_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_fetch_xor(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_fetch_xor_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ char ret; ret = __atomic_fetch_or(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ char ret; ret = __atomic_fetch_or_1(vp1, v1, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_or(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ short ret; ret = __atomic_fetch_or_2(vp2, v2, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_or(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ int ret; ret = __atomic_fetch_or_4(vp4, v4, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_or(vp8, v8, __ATOMIC_SEQ_CST); }
-	{ long long int ret; ret = __atomic_fetch_or_8(vp8, v8, __ATOMIC_SEQ_CST); }
-	#if defined(__SIZEOF_INT128__)
-	{ __int128 ret; ret = __atomic_fetch_or(vp16, v16, __ATOMIC_SEQ_CST); }
-	{ __int128 ret; ret = __atomic_fetch_or_16(vp16, v16, __ATOMIC_SEQ_CST); }
-	#endif
-
-	{ _Bool ret; ret = __atomic_always_lock_free(sizeof(int), vp4); }
-	{ _Bool ret; ret = __atomic_is_lock_free(sizeof(int), vp4); }
+	{ char ret; ret = __atomic_add_fetch(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_add_fetch(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_add_fetch(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_add_fetch(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_add_fetch(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_add_fetch(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_sub_fetch(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_sub_fetch(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_sub_fetch(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_sub_fetch(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_sub_fetch(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_sub_fetch(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_and_fetch(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_and_fetch(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_and_fetch(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_and_fetch(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_and_fetch(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_and_fetch(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_nand_fetch(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_nand_fetch(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_nand_fetch(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_nand_fetch(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_nand_fetch(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_nand_fetch(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_xor_fetch(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_xor_fetch(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_xor_fetch(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_xor_fetch(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_xor_fetch(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_xor_fetch(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_or_fetch(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_or_fetch(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_or_fetch(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_or_fetch(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_or_fetch(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_or_fetch(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_fetch_add(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_fetch_add(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_fetch_add(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_fetch_add(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_fetch_add(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_fetch_add(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_fetch_sub(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_fetch_sub(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_fetch_sub(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_fetch_sub(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_fetch_sub(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_fetch_sub(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_fetch_and(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_fetch_and(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_fetch_and(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_fetch_and(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_fetch_and(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_fetch_and(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_fetch_nand(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_fetch_nand(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_fetch_nand(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_fetch_nand(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_fetch_nand(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_fetch_nand(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_fetch_xor(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_fetch_xor(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_fetch_xor(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_fetch_xor(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_fetch_xor(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_fetch_xor(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ char ret; ret = __atomic_fetch_or(vpc, vc, __ATOMIC_SEQ_CST); }
+	{ short ret; ret = __atomic_fetch_or(vps, vs, __ATOMIC_SEQ_CST); }
+	{ int ret; ret = __atomic_fetch_or(vpi, vi, __ATOMIC_SEQ_CST); }
+	{ long int ret; ret = __atomic_fetch_or(vpl, vl, __ATOMIC_SEQ_CST); }
+	{ long long int ret; ret = __atomic_fetch_or(vpll, vll, __ATOMIC_SEQ_CST); }
+	#if defined(__SIZEOF_INT128__)
+	{ __int128 ret; ret = __atomic_fetch_or(vplll, vlll, __ATOMIC_SEQ_CST); }
+	#endif
+
+	{ _Bool ret; ret = __atomic_always_lock_free(sizeof(int), vpi); }
+	{ _Bool ret; ret = __atomic_is_lock_free(sizeof(int), vpi); }
 	{ __atomic_thread_fence(__ATOMIC_SEQ_CST); }
 	{ __atomic_signal_fence(__ATOMIC_SEQ_CST); }
Index: tests/concurrent/.expect/keywordErrors.txt
===================================================================
--- tests/concurrent/.expect/keywordErrors.txt	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
+++ tests/concurrent/.expect/keywordErrors.txt	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -0,0 +1,6 @@
+concurrent/keywordErrors.cfa:1:1 error: thread keyword requires threads to be in scope, add #include <thread.hfa>
+struct A: with body 1
+
+concurrent/keywordErrors.cfa:6:1 error: thread keyword requires threads to be in scope, add #include <thread.hfa>
+struct B: with body 1
+
Index: tests/concurrent/keywordErrors.cfa
===================================================================
--- tests/concurrent/keywordErrors.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
+++ tests/concurrent/keywordErrors.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -0,0 +1,6 @@
+thread A {};
+
+// This include isn't enough but used to fool the check
+#include <invoke.h>
+
+thread B {};
Index: tests/concurrent/preempt.cfa
===================================================================
--- tests/concurrent/preempt.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/concurrent/preempt.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -36,5 +36,5 @@
 		if( (counter % 7) == this.value ) {
 			__cfaabi_check_preemption();
-			int next = __atomic_add_fetch_4(&counter, 1, __ATOMIC_SEQ_CST);
+			int next = __atomic_add_fetch( &counter, 1, __ATOMIC_SEQ_CST );
 			__cfaabi_check_preemption();
 			if( (next % 100) == 0 ) printf("%d\n", (int)next);
Index: tests/concurrent/signal/wait.cfa
===================================================================
--- tests/concurrent/signal/wait.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/concurrent/signal/wait.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -98,5 +98,5 @@
 	}
 
-	__sync_fetch_and_sub_4( &waiter_left, 1);
+	__atomic_fetch_sub( &waiter_left, 1,  __ATOMIC_SEQ_CST );
 }
 
@@ -109,5 +109,5 @@
 	}
 
-	__sync_fetch_and_sub_4( &waiter_left, 1);
+	__atomic_fetch_sub( &waiter_left, 1,  __ATOMIC_SEQ_CST );
 }
 
@@ -120,5 +120,5 @@
 	}
 
-	__sync_fetch_and_sub_4( &waiter_left, 1);
+	__atomic_fetch_sub( &waiter_left, 1,  __ATOMIC_SEQ_CST );
 }
 
@@ -131,5 +131,5 @@
 	}
 
-	__sync_fetch_and_sub_4( &waiter_left, 1);
+	__atomic_fetch_sub( &waiter_left, 1,  __ATOMIC_SEQ_CST );
 }
 
Index: tests/concurrent/thread.cfa
===================================================================
--- tests/concurrent/thread.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/concurrent/thread.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -7,6 +7,6 @@
 thread Second { semaphore* lock; };
 
-void ?{}( First  & this, semaphore & lock ) { ((thread&)this){"Thread 1"}; this.lock = &lock; }
-void ?{}( Second & this, semaphore & lock ) { ((thread&)this){"Thread 2"}; this.lock = &lock; }
+void ?{}( First  & this, semaphore & lock ) { ((thread&)this){ "Thread 1" }; this.lock = &lock; }
+void ?{}( Second & this, semaphore & lock ) { ((thread&)this){ "Thread 2" }; this.lock = &lock; }
 
 void main(First& this) {
Index: tests/heap.cfa
===================================================================
--- tests/heap.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/heap.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Nov  6 17:54:56 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Jul 19 08:22:34 2019
-// Update Count     : 19
+// Last Modified On : Sun Nov 24 12:34:51 2019
+// Update Count     : 28
 // 
 
@@ -38,5 +38,6 @@
 	enum { NoOfAllocs = 5000, NoOfMmaps = 10 };
 	char * locns[NoOfAllocs];
-	int i;
+	size_t amount;
+	enum { limit = 64 * 1024 };							// check alignments up to here
 
 	// check alloc/free
@@ -74,5 +75,5 @@
 		size_t s = (i + 1) * 20;
 		char * area = (char *)malloc( s );
-		if ( area == 0 ) abort( "malloc/free out of memory" );
+		if ( area == 0p ) abort( "malloc/free out of memory" );
 		area[0] = '\345'; area[s - 1] = '\345';			// fill first/last
 		area[malloc_usable_size( area ) - 1] = '\345';	// fill ultimate byte
@@ -83,5 +84,5 @@
 		size_t s = i + 1;								// +1 to make initialization simpler
 		locns[i] = (char *)malloc( s );
-		if ( locns[i] == 0 ) abort( "malloc/free out of memory" );
+		if ( locns[i] == 0p ) abort( "malloc/free out of memory" );
 		locns[i][0] = '\345'; locns[i][s - 1] = '\345';	// fill first/last
 		locns[i][malloc_usable_size( locns[i] ) - 1] = '\345'; // fill ultimate byte
@@ -99,5 +100,5 @@
 		size_t s = i + default_mmap_start();			// cross over point
 		char * area = (char *)malloc( s );
-		if ( area == 0 ) abort( "malloc/free out of memory" );
+		if ( area == 0p ) abort( "malloc/free out of memory" );
 		area[0] = '\345'; area[s - 1] = '\345';			// fill first/last
 		area[malloc_usable_size( area ) - 1] = '\345';	// fill ultimate byte
@@ -108,5 +109,5 @@
 		size_t s = i + default_mmap_start();			// cross over point
 		locns[i] = (char *)malloc( s );
-		if ( locns[i] == 0 ) abort( "malloc/free out of memory" );
+		if ( locns[i] == 0p ) abort( "malloc/free out of memory" );
 		locns[i][0] = '\345'; locns[i][s - 1] = '\345';	// fill first/last
 		locns[i][malloc_usable_size( locns[i] ) - 1] = '\345'; // fill ultimate byte
@@ -124,5 +125,5 @@
 		size_t s = (i + 1) * 20;
 		char * area = (char *)calloc( 5, s );
-		if ( area == 0 ) abort( "calloc/free out of memory" );
+		if ( area == 0p ) abort( "calloc/free out of memory" );
 		if ( area[0] != '\0' || area[s - 1] != '\0' ||
 			 area[malloc_usable_size( area ) - 1] != '\0' ||
@@ -136,5 +137,5 @@
 		size_t s = i + 1;
 		locns[i] = (char *)calloc( 5, s );
-		if ( locns[i] == 0 ) abort( "calloc/free out of memory" );
+		if ( locns[i] == 0p ) abort( "calloc/free out of memory" );
 		if ( locns[i][0] != '\0' || locns[i][s - 1] != '\0' ||
 			 locns[i][malloc_usable_size( locns[i] ) - 1] != '\0' ||
@@ -155,5 +156,5 @@
 		size_t s = i + default_mmap_start();			// cross over point
 		char * area = (char *)calloc( 1, s );
-		if ( area == 0 ) abort( "calloc/free out of memory" );
+		if ( area == 0p ) abort( "calloc/free out of memory" );
 		if ( area[0] != '\0' || area[s - 1] != '\0' ) abort( "calloc/free corrupt storage4.1" );
 		if ( area[malloc_usable_size( area ) - 1] != '\0' ) abort( "calloc/free corrupt storage4.2" );
@@ -167,5 +168,5 @@
 		size_t s = i + default_mmap_start();			// cross over point
 		locns[i] = (char *)calloc( 1, s );
-		if ( locns[i] == 0 ) abort( "calloc/free out of memory" );
+		if ( locns[i] == 0p ) abort( "calloc/free out of memory" );
 		if ( locns[i][0] != '\0' || locns[i][s - 1] != '\0' ||
 			 locns[i][malloc_usable_size( locns[i] ) - 1] != '\0' ||
@@ -183,16 +184,14 @@
 	// check memalign/free (sbrk)
 
-	enum { limit = 64 * 1024 };							// check alignments up to here
-
 	for ( a; libAlign() ~= limit ~ a ) {				// generate powers of 2
 		//sout | alignments[a];
 		for ( s; 1 ~ NoOfAllocs ) {						// allocation of size 0 can return null
 			char * area = (char *)memalign( a, s );
-			if ( area == 0 ) abort( "memalign/free out of memory" );
-			//sout | i | " " | area;
+			if ( area == 0p ) abort( "memalign/free out of memory" );
+			//sout | i | area;
 			if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
 				abort( "memalign/free bad alignment : memalign(%d,%d) = %p", (int)a, s, area );
 			} // if
-			area[0] = '\345'; area[s - 1] = '\345';	// fill first/last byte
+			area[0] = '\345'; area[s - 1] = '\345';		// fill first/last byte
 			area[malloc_usable_size( area ) - 1] = '\345'; // fill ultimate byte
 			free( area );
@@ -207,6 +206,6 @@
 			size_t s = i + default_mmap_start();		// cross over point
 			char * area = (char *)memalign( a, s );
-			if ( area == 0 ) abort( "memalign/free out of memory" );
-			//sout | i | " " | area;
+			if ( area == 0p ) abort( "memalign/free out of memory" );
+			//sout | i | area;
 			if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
 				abort( "memalign/free bad alignment : memalign(%d,%d) = %p", (int)a, (int)s, area );
@@ -223,5 +222,5 @@
 		// initial N byte allocation
 		char * area = (char *)calloc( 5, i );
-		if ( area == 0 ) abort( "calloc/realloc/free out of memory" );
+		if ( area == 0p ) abort( "calloc/realloc/free out of memory" );
 		if ( area[0] != '\0' || area[i - 1] != '\0' ||
 			 area[malloc_usable_size( area ) - 1] != '\0' ||
@@ -231,5 +230,5 @@
 		for ( s; i ~ 256 * 1024 ~ 26 ) {				// start at initial memory request
 			area = (char *)realloc( area, s );			// attempt to reuse storage
-			if ( area == 0 ) abort( "calloc/realloc/free out of memory" );
+			if ( area == 0p ) abort( "calloc/realloc/free out of memory" );
 			if ( area[0] != '\0' || area[s - 1] != '\0' ||
 				 area[malloc_usable_size( area ) - 1] != '\0' ||
@@ -245,5 +244,5 @@
 		size_t s = i + default_mmap_start();			// cross over point
 		char * area = (char *)calloc( 1, s );
-		if ( area == 0 ) abort( "calloc/realloc/free out of memory" );
+		if ( area == 0p ) abort( "calloc/realloc/free out of memory" );
 		if ( area[0] != '\0' || area[s - 1] != '\0' ||
 			 area[malloc_usable_size( area ) - 1] != '\0' ||
@@ -253,5 +252,5 @@
 		for ( r; i ~ 256 * 1024 ~ 26 ) {				// start at initial memory request
 			area = (char *)realloc( area, r );			// attempt to reuse storage
-			if ( area == 0 ) abort( "calloc/realloc/free out of memory" );
+			if ( area == 0p ) abort( "calloc/realloc/free out of memory" );
 			if ( area[0] != '\0' || area[r - 1] != '\0' ||
 				 area[malloc_usable_size( area ) - 1] != '\0' ||
@@ -263,10 +262,10 @@
 	// check memalign/realloc/free
 
-	size_t amount = 2;
+	amount = 2;
 	for ( a; libAlign() ~= limit ~ a ) {				// generate powers of 2
 		// initial N byte allocation
 		char * area = (char *)memalign( a, amount );	// aligned N-byte allocation
-		if ( area == 0 ) abort( "memalign/realloc/free out of memory" ); // no storage ?
-		//sout | alignments[a] | " " | area;
+		if ( area == 0p ) abort( "memalign/realloc/free out of memory" ); // no storage ?
+		//sout | alignments[a] | area;
 		if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
 			abort( "memalign/realloc/free bad alignment : memalign(%d,%d) = %p", (int)a, (int)amount, area );
@@ -278,6 +277,6 @@
 			if ( area[0] != '\345' || area[s - 2] != '\345' ) abort( "memalign/realloc/free corrupt storage" );
 			area = (char *)realloc( area, s );			// attempt to reuse storage
-			if ( area == 0 ) abort( "memalign/realloc/free out of memory" ); // no storage ?
-			//sout | i | " " | area;
+			if ( area == 0p ) abort( "memalign/realloc/free out of memory" ); // no storage ?
+			//sout | i | area;
 			if ( (size_t)area % a != 0 ) {				// check for initial alignment
 				abort( "memalign/realloc/free bad alignment %p", area );
@@ -294,6 +293,6 @@
 		for ( s; 1 ~ limit ) {							// allocation of size 0 can return null
 			char * area = (char *)cmemalign( a, 1, s );
-			if ( area == 0 ) abort( "cmemalign/free out of memory" );
-			//sout | i | " " | area;
+			if ( area == 0p ) abort( "cmemalign/free out of memory" );
+			//sout | i | area;
 			if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
 				abort( "cmemalign/free bad alignment : cmemalign(%d,%d) = %p", (int)a, s, area );
@@ -313,6 +312,6 @@
 		// initial N byte allocation
 		char * area = (char *)cmemalign( a, 1, amount ); // aligned N-byte allocation
-		if ( area == 0 ) abort( "cmemalign/realloc/free out of memory" ); // no storage ?
-		//sout | alignments[a] | " " | area;
+		if ( area == 0p ) abort( "cmemalign/realloc/free out of memory" ); // no storage ?
+		//sout | alignments[a] | area;
 		if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
 			abort( "cmemalign/realloc/free bad alignment : cmemalign(%d,%d) = %p", (int)a, (int)amount, area );
@@ -327,6 +326,6 @@
 			if ( area[0] != '\345' || area[s - 2] != '\345' ) abort( "cmemalign/realloc/free corrupt storage2" );
 			area = (char *)realloc( area, s );			// attempt to reuse storage
-			if ( area == 0 ) abort( "cmemalign/realloc/free out of memory" ); // no storage ?
-			//sout | i | " " | area;
+			if ( area == 0p ) abort( "cmemalign/realloc/free out of memory" ); // no storage ?
+			//sout | i | area;
 			if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
 				abort( "cmemalign/realloc/free bad alignment %p", area );
@@ -339,4 +338,65 @@
 		free( area );
 	} // for
+
+	// check memalign/realloc with align/free
+
+	amount = 2;
+	for ( a; libAlign() ~= limit ~ a ) {				// generate powers of 2
+		// initial N byte allocation
+		char * area = (char *)memalign( a, amount );	// aligned N-byte allocation
+		if ( area == 0p ) abort( "memalign/realloc with align/free out of memory" ); // no storage ?
+		//sout | alignments[a] | area | endl;
+		if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
+			abort( "memalign/realloc with align/free bad alignment : memalign(%d,%d) = %p", (int)a, (int)amount, area );
+		} // if
+		area[0] = '\345'; area[amount - 2] = '\345';	// fill first/penultimate byte
+
+		// Do not start this loop index at 0 because realloc of 0 bytes frees the storage.
+		for ( s; amount ~ 256 * 1024 ) {				// start at initial memory request
+			if ( area[0] != '\345' || area[s - 2] != '\345' ) abort( "memalign/realloc/free corrupt storage" );
+			area = (char *)realloc( area, a * 2, s );	// attempt to reuse storage
+			if ( area == 0p ) abort( "memalign/realloc with align/free out of memory" ); // no storage ?
+			//sout | i | area | endl;
+			if ( (size_t)area % a * 2 != 0 ) {			// check for initial alignment
+				abort( "memalign/realloc with align/free bad alignment %p", area );
+			} // if
+			area[s - 1] = '\345';						// fill last byte
+		} // for
+		free( area );
+	} // for
+
+	// check cmemalign/realloc with align/free
+
+	amount = 2;
+	for ( size_t a = libAlign() + libAlign(); a <= limit; a += a ) { // generate powers of 2
+		// initial N byte allocation
+		char *area = (char *)cmemalign( a, 1, amount );	// aligned N-byte allocation
+		if ( area == 0p ) abort( "cmemalign/realloc with align/free out of memory" ); // no storage ?
+		//sout | alignments[a] | area | endl;
+		if ( (size_t)area % a != 0 || malloc_alignment( area ) != a ) { // check for initial alignment
+			abort( "cmemalign/realloc with align/free bad alignment : cmemalign(%d,%d) = %p", (int)a, (int)amount, area );
+		} // if
+		if ( area[0] != '\0' || area[amount - 1] != '\0' ||
+			 area[malloc_usable_size( area ) - 1] != '\0' ||
+			 ! malloc_zero_fill( area ) ) abort( "cmemalign/realloc with align/free corrupt storage1" );
+		area[0] = '\345'; area[amount - 2] = '\345';	// fill first/penultimate byte
+
+		// Do not start this loop index at 0 because realloc of 0 bytes frees the storage.
+		for ( int s = amount; s < 256 * 1024; s += 1 ) { // start at initial memory request
+			if ( area[0] != '\345' || area[s - 2] != '\345' ) abort( "cmemalign/realloc with align/free corrupt storage2" );
+			area = (char *)realloc( area, a * 2, s );	// attempt to reuse storage
+			if ( area == 0p ) abort( "cmemalign/realloc with align/free out of memory" ); // no storage ?
+			//sout | i | area | endl;
+			if ( (size_t)area % a * 2 != 0 || malloc_alignment( area ) != a * 2 ) { // check for initial alignment
+				abort( "cmemalign/realloc with align/free bad alignment %p %jd %jd", area, malloc_alignment( area ), a * 2 );
+			} // if
+			if ( area[s - 1] != '\0' || area[s - 1] != '\0' ||
+				 area[malloc_usable_size( area ) - 1] != '\0' ||
+				 ! malloc_zero_fill( area ) ) abort( "cmemalign/realloc/free corrupt storage3" );
+			area[s - 1] = '\345';						// fill last byte
+		} // for
+		free( area );
+	} // for
+
 	//sout | "worker" | thisTask() | "successful completion";
 } // Worker main
Index: tests/linking/withthreads.cfa
===================================================================
--- tests/linking/withthreads.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/linking/withthreads.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -34,4 +34,4 @@
 // Local Variables: //
 // tab-width: 4 //
-// compile-command: "cfa nothreads.cfa" //
+// compile-command: "cfa withthreads.cfa" //
 // End: //
Index: tests/raii/dtor-early-exit.cfa
===================================================================
--- tests/raii/dtor-early-exit.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/raii/dtor-early-exit.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -217,4 +217,12 @@
 }
 
+void i() {
+	// potential loop
+	for() {
+		if(true) continue;
+		int t = 0;
+	}
+}
+
 // TODO: implement __label__ and uncomment these lines
 void computedGoto() {
Index: tests/references.cfa
===================================================================
--- tests/references.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/references.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -119,4 +119,14 @@
 		f( 3, a + b, (S){ 1.0, 7.0 }, (int [3]){ 1, 2, 3 } ); // two rvalue to reference
 	}
+
+	{
+		int a = 3;
+		int *p = &a;
+		asm (
+			"incl %[p]\n\t"
+			: [p] "+m" (*p)
+		);
+		printf("%d\n", a);
+	}
 }
 
Index: tests/test.py
===================================================================
--- tests/test.py	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/test.py	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,4 +10,8 @@
 import tempfile
 import time
+
+import os
+import psutil
+import signal
 
 ################################################################################
@@ -221,8 +225,20 @@
 	make('clean', output_file=subprocess.DEVNULL, error=subprocess.DEVNULL)
 
+	# since python prints stacks by default on a interrupt, redo the interrupt handling to be silent
+	def worker_init():
+		def sig_int(signal_num, frame):
+			pass
+
+		signal.signal(signal.SIGINT, sig_int)
+
 	# create the executor for our jobs and handle the signal properly
-	pool = multiprocessing.Pool(jobs)
+	pool = multiprocessing.Pool(jobs, worker_init)
 
 	failed = False
+
+	def stop(x, y):
+		print("Tests interrupted by user", file=sys.stderr)
+		sys.exit(1)
+	signal.signal(signal.SIGINT, stop)
 
 	# for each test to run
Index: tests/time.cfa
===================================================================
--- tests/time.cfa	(revision aca6a54c0c0604121cda1b4ecf7c0062e40d4553)
+++ tests/time.cfa	(revision 2fa5bd2f4e4db5f90913cf36b7cbabea394c69ce)
@@ -10,6 +10,6 @@
 // Created On       : Tue Mar 27 17:24:56 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Dec 20 23:09:21 2018
-// Update Count     : 23
+// Last Modified On : Fri Nov 29 23:05:30 2019
+// Update Count     : 24
 //
 
@@ -20,5 +20,4 @@
 	Duration d1 = 3`h, d2 = 2`s, d3 = 3.375`s, d4 = 12`s, d5 = 1`s + 10_000`ns;
 	sout | d1 | d2 | d3 | d4 | d5;
-	int i;
 	d1 = 0;
 	sout | d1 | d2 | d3;
