Index: benchmark/io/http/main.cfa
===================================================================
--- benchmark/io/http/main.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ benchmark/io/http/main.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -150,4 +150,5 @@
 			if(errno == EADDRINUSE) {
 				if(waited == 0) {
+					if(!options.interactive) abort | "Port already in use in non-interactive mode. Aborting";
 					sout | "Waiting for port";
 				} else {
Index: benchmark/io/http/protocol.cfa
===================================================================
--- benchmark/io/http/protocol.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ benchmark/io/http/protocol.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -332,10 +332,4 @@
 	wait(this.f);
 
-	// Did something crazy happen?
-	if(this.f.result > this.len) {
-		mutex(serr) serr | "SPLICE IN spliced too much!";
-		return error(this.res, -ERANGE);
-	}
-
 	// Something failed?
 	if(this.f.result < 0) {
@@ -351,4 +345,10 @@
 			return error(this.res, -ECONNRESET);
 		}
+	}
+
+	// Did something crazy happen?
+	if(this.f.result > this.len) {
+		mutex(serr) serr | "SPLICE IN spliced too much!";
+		return error(this.res, -ERANGE);
 	}
 
@@ -401,10 +401,4 @@
 	wait(this.f);
 
-	// Did something crazy happen?
-	if(this.f.result > this.len) {
-		mutex(serr) serr | "SPLICE OUT spliced too much!";
-		return error(this.res, -ERANGE);
-	}
-
 	// Something failed?
 	if(this.f.result < 0) {
@@ -420,4 +414,10 @@
 			return error(this, -ECONNRESET);
 		}
+	}
+
+	// Did something crazy happen?
+	if(this.f.result > this.len) {
+		mutex(serr) serr | "SPLICE OUT spliced too much!" | this.f.result | ">" | this.len;
+		return error(this.res, -ERANGE);
 	}
 
@@ -544,5 +544,5 @@
 const char * original_http_msgs[] = {
 	"HTTP/1.1 200 OK\nServer: HttpForall\nDate: %s \nContent-Type: text/plain\nContent-Length: ",
-	"HTTP/1.1 200 OK\r\nServer: HttpForall\r\nDate\r\nConnection: keep-alive\r\nContent-Length: 15\r\nContent-Type: text/html: %s \r\n\r\nHello, World!\r\n",
+	"HTTP/1.1 200 OK\r\nServer: HttpForall\r\nConnection: keep-alive\r\nContent-Length: 15\r\nContent-Type: text/html\r\nDate: %s \r\n\r\nHello, World!\r\n",
 	"HTTP/1.1 400 Bad Request\nServer: HttpForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
 	"HTTP/1.1 404 Not Found\nServer: HttpForall\nDate: %s \nContent-Type: text/plain\nContent-Length: 0 \n\n",
Index: benchmark/io/http/worker.cfa
===================================================================
--- benchmark/io/http/worker.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ benchmark/io/http/worker.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -18,5 +18,5 @@
 void ?{}( Worker & this ) {
 	size_t cli = rand() % options.clopts.cltr_cnt;
-	((thread&)this){ "Server Worker Thread", *options.clopts.instance[cli], 512000 };
+	((thread&)this){ "Server Worker Thread", *options.clopts.instance[cli], 64000 };
 	options.clopts.thrd_cnt[cli]++;
 	this.pipe[0] = -1;
Index: driver/as.cc
===================================================================
--- driver/as.cc	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ driver/as.cc	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -5,12 +5,12 @@
 // file "LICENCE" distributed with Cforall.
 //
-// as.c -- map assembler file, scan for debug information. If found, expand file by one character and insert Cforall
-//         language code on the N line from the start of the debug information.
+// as.c -- map assembler file, scan for debug information, then language code, and skip N lines forward to code. If
+//         code is C dialect, possibly expand file by one character, and replace with Cforall language code.
 //
 // Author           : Peter A. Buhr
 // Created On       : Wed Aug  1 10:49:42 2018
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Sep  8 08:40:16 2018
-// Update Count     : 97
+// Last Modified On : Wed Dec  8 07:56:12 2021
+// Update Count     : 136
 //
 
@@ -18,20 +18,15 @@
 #include <cstdlib>										// exit
 #include <fcntl.h>										// open
-#include <unistd.h>
-#include <sys/stat.h>
+#include <cstring>										// strstr, memmove
+#include <unistd.h>										// ftruncate,execvp
+#include <sys/stat.h>									// fstat
 #include <sys/mman.h>									// mmap
-#include <string.h>
 
 //#define __DEBUG_H__
-
-#ifdef __DEBUG_H__
-#include <iostream>
-using namespace std;
-#endif // __DEBUG_H__
 
 int main( const int argc, const char * argv[] ) {
 	#ifdef __DEBUG_H__
 	for ( int i = 0; i < argc; i += 1 ) {
-		cerr << argv[i] << endl;
+		fprintf( stderr, "%s\n", argv[i] );
 	} // for
 	#endif // __DEBUG_H__
@@ -48,19 +43,33 @@
 		if ( start == (void *)-1 ) { perror( "mmap" ); exit( EXIT_FAILURE ); };
 
-		if ( char * cursor = strstr( start, ".Ldebug_info0:" ) ) { // debug information ?
-			// Expand file by one byte to hold 2 character Cforall language code.
-			if ( ftruncate( fd, size + 1 ) ) { perror( "ftruncate" ); exit( EXIT_FAILURE ); };
+		char * dcursor;
+		if ( (dcursor = strstr( start, ".Ldebug_info0:" ) ) ) { // debug information ?
 
-			for ( int i = 0; i < 8; i += 1 ) {			// move N (magic) lines forward
-				cursor = strstr( cursor, "\n" ) + 1;
-			} // for
+			if ( char * cursor = strstr( dcursor, ".long\t.LASF" ) ) { // language code ?
+				for ( int i = 0; i < 2; i += 1 ) {		// move N (magic) lines forward
+					cursor = strstr( cursor, "\n" ) + 1;
+				} // for
+				cursor -= 2;							// backup over "d\n", where d is a hex digit
+				// From elfcpp/dwarf.h in the binutils source tree.
+				// DW_LANG_C89 = 0x1, DW_LANG_C = 0x2, DW_LANG_C99 = 0xc, DW_LANG_C11 = 0x1d
+				if ( *(cursor - 2) == '0' && *(cursor - 1) == 'x' &&
+					 (*cursor == 'c' || *cursor == '1' || *cursor == '2') ) { // C99/C89/C
+					// Expand file by one byte to hold 2 character Cforall language code.
+					if ( ftruncate( fd, size + 1 ) ) { perror( "ftruncate" ); exit( EXIT_FAILURE ); };
+					memmove( cursor + 2, cursor + 1, start + size - cursor - 1 ); // move remaining text 1 character right
+				} else if ( *(cursor - 3) == '0' && *(cursor - 2) == 'x' && *(cursor - 1) == '1' && *cursor == 'd' ) { // C11
+				} else {
+					for ( int i = 0; i < 6; i += 1 ) {	// move N (magic) lines forward
+						cursor = strstr( cursor, "\n" ) + 1;
+					} // for
+					fprintf( stderr, "*** ERROR *** Invalid C language code found in assembler file: %s\n"
+							 "Assembler debug information:\n%.*s",
+							 argv[argc - 1], (int)(cursor - dcursor), dcursor );
+					exit( EXIT_FAILURE );
+				} // if
 
-			cursor -= 2;								// backup over "c\n" language value
-			if ( *(cursor - 1) != 'x' ) { fprintf( stderr, "invalid C language code\n" ); exit( EXIT_FAILURE ); };
-
-			memmove( cursor + 2, cursor + 1, start + size - cursor - 1 ); // move remaining text 1 character right
-
-			*(cursor) = '2';							// replace C language value with CFA
-			*(cursor + 1) = '5';
+				*(cursor - 1) = '2';					// replace C89/C/C99/C11 language code with CFA code
+				*cursor = '5';
+			} // if
 		} // if
 
Index: example/io/eventfds.c
===================================================================
--- example/io/eventfds.c	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ example/io/eventfds.c	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,116 @@
+/*
+This is a simple "eventfd" example that uses io_uring
+It demonstrates that reads work as expected on eventfds.
+It uses liburing for simplicity.
+*/
+
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <fcntl.h>
+#include <liburing.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/eventfd.h>
+
+struct io_uring ring;
+
+
+int main(int argc,  char * argv[]) {
+      int fd = eventfd(0, 0);
+      if(fd < 0) {
+            printf("Could not open event fd.\n");
+            return 2;
+      }
+
+      /* prep the array */
+	char buf[sizeof(uint64_t)] = { 0 };
+      struct iovec iov = { buf, 8 };
+
+	{
+            /* Do one write so we can compare blocking behaviour */
+		eventfd_t val;
+		val = 1;
+		eventfd_write( fd, val );
+	}
+
+      /* init liburing */
+      io_uring_queue_init(256, &ring, 0);
+
+      /* declare required structs */
+      struct io_uring_sqe * sqe;
+      struct io_uring_cqe * cqe;
+
+      /* get an sqe and fill in a READ operation */
+      sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, buf, 8, 0);
+
+      sqe->user_data = fd;
+
+      /* tell the kernel we have an sqe ready for consumption */
+      io_uring_submit(&ring);
+
+	printf("First wait\n");
+
+      /* wait for the sqe to complete */
+      int ret = io_uring_wait_cqe(&ring, &cqe);
+
+      /* read and process cqe event */
+      if(ret == 0) {
+            signed int len = cqe->res;
+            io_uring_cqe_seen(&ring, cqe);
+		printf("%d ", len);
+            if(len > 0) {
+                  printf("%.*s", len, buf);
+            }
+            else if( len < 0 ) {
+                  fprintf(stderr, "readv/read returned error : %s\n", strerror(-len));
+            }
+      }
+      else {
+            printf("%d\n", ret);
+            io_uring_cqe_seen(&ring, cqe);
+      }
+
+      /* ============================================ */
+      /* DO it again so the we can compare behaviour. */
+      /* ============================================ */
+
+      /* get an sqe and fill in a READ operation */
+      sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read(sqe, fd, buf, 8, 0);
+
+      sqe->user_data = fd;
+
+	printf("Second wait\n");
+
+      /* tell the kernel we have an sqe ready for consumption */
+      io_uring_submit(&ring);
+
+      /* wait for the sqe to complete */
+      ret = io_uring_wait_cqe(&ring, &cqe);
+
+      /* read and process cqe event */
+      if(ret == 0) {
+            signed int len = cqe->res;
+            io_uring_cqe_seen(&ring, cqe);
+		printf("%d ", len);
+            if(len > 0) {
+                  printf("%.*s", len, buf);
+            }
+            else if( len < 0 ) {
+                  fprintf(stderr, "readv/read returned error : %s\n", strerror(-len));
+            }
+      }
+      else {
+            printf("%d\n", ret);
+            io_uring_cqe_seen(&ring, cqe);
+      }
+
+      io_uring_queue_exit(&ring);
+
+      close(fd);
+}
Index: example/range_parser.cfa
===================================================================
--- example/range_parser.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ example/range_parser.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,71 @@
+#include <stdio.h>
+#include <fstream.hfa>
+#include <coroutine.hfa>
+
+coroutine RangeParser {
+	const char * text;
+	int com;
+};
+
+void ?{}(RangeParser & this, const char * text) {
+	this.text = text;
+}
+
+static inline void push(RangeParser & this, int val) { this.com = val; suspend; }
+static inline int next(RangeParser & this) { return resume(this).com; }
+
+void main(RangeParser & this) {
+	for() {
+		int start = -1, stop = -1;
+		int start_len = -1, stop_len = -1;
+		int ret = sscanf(this.text, "%u%n-%u%n", &start, &start_len, &stop, &stop_len);
+		switch(ret) {
+		case 0:
+			// Not a range, maybe a comma?
+			if(this.text[0] == ',') {
+				this.text ++;
+				continue;
+			}
+
+			serr | "Error: unexpected character in next range: '" | this.text |"'";
+			exit(2);
+		case 1:
+			// Only one value, push it!
+			push(this, start);
+			this.text += start_len;
+			break;
+		case 2:
+			if(start > stop) {
+				serr | "Error: next range out of order '" | this.text |"'";
+				exit(2);
+			}
+			for(int i = start; i <= stop; i++) {
+				push(this, i);
+			}
+			this.text += stop_len;
+			break;
+		default:
+			serr | "Error reading next block: '" | this.text |"', returned" | ret;
+			exit(2);
+		}
+
+		if(this.text[0] == '\0') break;
+	}
+	this.com = -1;
+}
+
+int main(int argc, char * argv[]) {
+	if(argc != 2) {
+		serr | "Usage:" | argv[0] | "range";
+		return 1;
+	}
+
+	RangeParser rp = { argv[1] };
+
+	for() {
+		int i = next(rp);
+		if(i < 0) break;
+		sout | i | nonl;
+	}
+	sout | nl;
+}
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/Makefile.am	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -84,4 +84,5 @@
 	time.hfa \
 	bits/weakso_locks.hfa \
+	algorithms/range_iterator.hfa \
 	containers/maybe.hfa \
 	containers/pair.hfa \
Index: libcfa/src/algorithms/range_iterator.cfa
===================================================================
--- libcfa/src/algorithms/range_iterator.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ libcfa/src/algorithms/range_iterator.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,62 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// range_iterator.cfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Nov 30 13:06:22 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include "range_iterator.hfa"
+
+#include <stdio.h>
+
+#include <fstream.hfa>
+
+void main(RangeIter & this) {
+	for() {
+		this._start = -1;
+		this._stop = -1;
+		int start_len = -1, stop_len = -1;
+		int ret = sscanf(this.text, "%u%n-%u%n", &this._start, &start_len, &this._stop, &stop_len);
+		switch(ret) {
+		case 0:
+			// Not a range, maybe a comma?
+			if(this.text[0] == ',') {
+				this.text ++;
+				continue;
+			}
+
+			serr | "Error: unexpected character in next range: '" | this.text |"'";
+			exit(2);
+		case 1:
+			this.text += start_len;
+			// Only one value, push it!
+			this.com = this._start;
+			suspend;
+			break;
+		case 2:
+			if(this._start > this._stop) {
+				serr | "Error: next range out of order '" | this.text |"'";
+				exit(2);
+			}
+			this.text += stop_len;
+			for(this.com = this._start; this.com <= this._stop; this.com++) {
+				suspend;
+			}
+			break;
+		default:
+			serr | "Error reading next block: '" | this.text |"', returned" | ret;
+			exit(2);
+		}
+
+		if(this.text[0] == '\0') break;
+	}
+	this.com = -1;
+}
Index: libcfa/src/algorithms/range_iterator.hfa
===================================================================
--- libcfa/src/algorithms/range_iterator.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ libcfa/src/algorithms/range_iterator.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,27 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// range_iterator.hfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Tue Nov 30 13:06:22 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+generator RangeIter {
+	const char * text;
+	int com;
+	int _start;
+	int _stop;
+};
+
+static inline void ?{}(RangeIter & this, const char * text) {
+	this.text = text;
+}
+
+static inline bool moveNext(RangeIter & this) { resume(this); return this.com >= 0; }
Index: libcfa/src/concurrency/io.cfa
===================================================================
--- libcfa/src/concurrency/io.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/io.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -33,4 +33,5 @@
 		#include <sys/syscall.h>
 		#include <sys/eventfd.h>
+		#include <sys/uio.h>
 
 		#include <linux/io_uring.h>
@@ -133,5 +134,5 @@
 	}
 
-	void __cfa_io_flush( processor * proc ) {
+	bool __cfa_io_flush( processor * proc, int min_comp ) {
 		/* paranoid */ verify( ! __preemption_enabled() );
 		/* paranoid */ verify( proc );
@@ -141,13 +142,8 @@
 		$io_context & ctx = *proc->io.ctx;
 
-		// for(i; 2) {
-		// 	unsigned idx = proc->rdq.id + i;
-		// 	cltr->ready_queue.lanes.tscs[idx].tv = -1ull;
-		// }
-
 		__ioarbiter_flush( ctx );
 
 		__STATS__( true, io.calls.flush++; )
-		int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, 0, 0, (sigset_t *)0p, _NSIG / 8);
+		int ret = syscall( __NR_io_uring_enter, ctx.fd, ctx.sq.to_submit, min_comp, min_comp > 0 ? IORING_ENTER_GETEVENTS : 0, (sigset_t *)0p, _NSIG / 8);
 		if( ret < 0 ) {
 			switch((int)errno) {
@@ -157,9 +153,5 @@
 				// Update statistics
 				__STATS__( false, io.calls.errors.busy ++; )
-				// for(i; 2) {
-				// 	unsigned idx = proc->rdq.id + i;
-				// 	cltr->ready_queue.lanes.tscs[idx].tv = rdtscl();
-				// }
-				return;
+				return false;
 			default:
 				abort( "KERNEL ERROR: IO_URING SYSCALL - (%d) %s\n", (int)errno, strerror(errno) );
@@ -182,10 +174,8 @@
 
 		ctx.proc->io.pending = false;
-
-		__cfa_io_drain( proc );
-		// for(i; 2) {
-		// 	unsigned idx = proc->rdq.id + i;
-		// 	cltr->ready_queue.lanes.tscs[idx].tv = rdtscl();
-		// }
+		ready_schedule_lock();
+		bool ret = __cfa_io_drain( proc );
+		ready_schedule_unlock();
+		return ret;
 	}
 
@@ -291,5 +281,4 @@
 	}
 
-
 	//=============================================================================================
 	// submission
@@ -314,7 +303,5 @@
 		ctx->proc->io.dirty   = true;
 		if(sq.to_submit > 30 || !lazy) {
-			ready_schedule_lock();
-			__cfa_io_flush( ctx->proc );
-			ready_schedule_unlock();
+			__cfa_io_flush( ctx->proc, 0 );
 		}
 	}
@@ -515,3 +502,51 @@
 		}
 	}
+
+	#if defined(CFA_WITH_IO_URING_IDLE)
+		bool __kernel_read(processor * proc, io_future_t & future, iovec & iov, int fd) {
+			$io_context * ctx = proc->io.ctx;
+			/* paranoid */ verify( ! __preemption_enabled() );
+			/* paranoid */ verify( proc == __cfaabi_tls.this_processor );
+			/* paranoid */ verify( ctx );
+
+			__u32 idx;
+			struct io_uring_sqe * sqe;
+
+			// We can proceed to the fast path
+			if( !__alloc(ctx, &idx, 1) ) return false;
+
+			// Allocation was successful
+			__fill( &sqe, 1, &idx, ctx );
+
+			sqe->user_data = (uintptr_t)&future;
+			sqe->flags = 0;
+			sqe->fd = fd;
+			sqe->off = 0;
+			sqe->ioprio = 0;
+			sqe->fsync_flags = 0;
+			sqe->__pad2[0] = 0;
+			sqe->__pad2[1] = 0;
+			sqe->__pad2[2] = 0;
+
+			#if defined(CFA_HAVE_IORING_OP_READ)
+				sqe->opcode = IORING_OP_READ;
+				sqe->addr = (uint64_t)iov.iov_base;
+				sqe->len = iov.iov_len;
+			#elif defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV)
+				sqe->opcode = IORING_OP_READV;
+				sqe->addr = (uintptr_t)&iov;
+				sqe->len = 1;
+			#else
+				#error CFA_WITH_IO_URING_IDLE but none of CFA_HAVE_READV, CFA_HAVE_IORING_OP_READV or CFA_HAVE_IORING_OP_READ defined
+			#endif
+
+			asm volatile("": : :"memory");
+
+			/* paranoid */ verify( sqe->user_data == (uintptr_t)&future );
+			__submit( ctx, &idx, 1, true );
+
+			/* paranoid */ verify( proc == __cfaabi_tls.this_processor );
+			/* paranoid */ verify( ! __preemption_enabled() );
+		}
+	#endif
 #endif
Index: libcfa/src/concurrency/io/setup.cfa
===================================================================
--- libcfa/src/concurrency/io/setup.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/io/setup.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -32,5 +32,5 @@
 
 	void __cfa_io_start( processor * proc ) {}
-	void __cfa_io_flush( processor * proc ) {}
+	bool __cfa_io_flush( processor * proc, int ) {}
 	void __cfa_io_stop ( processor * proc ) {}
 
@@ -111,5 +111,5 @@
 		this.ext_sq.empty = true;
 		(this.ext_sq.queue){};
-		__io_uring_setup( this, cl.io.params, proc->idle );
+		__io_uring_setup( this, cl.io.params, proc->idle_fd );
 		__cfadbg_print_safe(io_core, "Kernel I/O : Created ring for io_context %u (%p)\n", this.fd, &this);
 	}
@@ -220,19 +220,21 @@
 		cq.cqes = (struct io_uring_cqe *)(((intptr_t)cq.ring_ptr) + params.cq_off.cqes);
 
-		// Step 4 : eventfd
-		// io_uring_register is so f*cking slow on some machine that it
-		// will never succeed if preemption isn't hard blocked
-		__cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd);
-
-		__disable_interrupts_hard();
-
-		int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1);
-		if (ret < 0) {
-			abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));
-		}
-
-		__enable_interrupts_hard();
-
-		__cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd);
+		#if !defined(CFA_WITH_IO_URING_IDLE)
+			// Step 4 : eventfd
+			// io_uring_register is so f*cking slow on some machine that it
+			// will never succeed if preemption isn't hard blocked
+			__cfadbg_print_safe(io_core, "Kernel I/O : registering %d for completion with ring %d\n", procfd, fd);
+
+			__disable_interrupts_hard();
+
+			int ret = syscall( __NR_io_uring_register, fd, IORING_REGISTER_EVENTFD, &procfd, 1);
+			if (ret < 0) {
+				abort("KERNEL ERROR: IO_URING EVENTFD REGISTER - %s\n", strerror(errno));
+			}
+
+			__enable_interrupts_hard();
+
+			__cfadbg_print_safe(io_core, "Kernel I/O : registered %d for completion with ring %d\n", procfd, fd);
+		#endif
 
 		// some paranoid checks
Index: libcfa/src/concurrency/io/types.hfa
===================================================================
--- libcfa/src/concurrency/io/types.hfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/io/types.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -185,10 +185,6 @@
 
 	// Wait for the future to be fulfilled
-	bool wait( io_future_t & this ) {
-		return wait(this.self);
-	}
-
-	void reset( io_future_t & this ) {
-		return reset(this.self);
-	}
+	bool wait     ( io_future_t & this ) { return wait     (this.self); }
+	void reset    ( io_future_t & this ) { return reset    (this.self); }
+	bool available( io_future_t & this ) { return available(this.self); }
 }
Index: libcfa/src/concurrency/kernel.cfa
===================================================================
--- libcfa/src/concurrency/kernel.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/kernel.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -27,4 +27,5 @@
 extern "C" {
 	#include <sys/eventfd.h>
+	#include <sys/uio.h>
 }
 
@@ -34,4 +35,5 @@
 #include "strstream.hfa"
 #include "device/cpu.hfa"
+#include "io/types.hfa"
 
 //Private includes
@@ -124,13 +126,17 @@
 static void __wake_one(cluster * cltr);
 
-static void mark_idle (__cluster_proc_list & idles, processor & proc);
+static void idle_sleep(processor * proc, io_future_t & future, iovec & iov);
+static bool mark_idle (__cluster_proc_list & idles, processor & proc);
 static void mark_awake(__cluster_proc_list & idles, processor & proc);
-static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list idles );
 
 extern void __cfa_io_start( processor * );
 extern bool __cfa_io_drain( processor * );
-extern void __cfa_io_flush( processor * );
+extern bool __cfa_io_flush( processor *, int min_comp );
 extern void __cfa_io_stop ( processor * );
 static inline bool __maybe_io_drain( processor * );
+
+#if defined(CFA_WITH_IO_URING_IDLE)
+	extern bool __kernel_read(processor * proc, io_future_t & future, iovec &, int fd);
+#endif
 
 extern void __disable_interrupts_hard();
@@ -148,4 +154,5 @@
 	/* paranoid */ verify( __preemption_enabled() );
 }
+
 
 //=============================================================================================
@@ -163,4 +170,9 @@
 	verify(this);
 
+	io_future_t future; // used for idle sleep when io_uring is present
+	future.self.ptr = 1p;  // mark it as already fulfilled so we know if there is a pending request or not
+	eventfd_t idle_val;
+	iovec idle_iovec = { &idle_val, sizeof(idle_val) };
+
 	__cfa_io_start( this );
 
@@ -196,7 +208,5 @@
 
 			if( !readyThread ) {
-				ready_schedule_lock();
-				__cfa_io_flush( this );
-				ready_schedule_unlock();
+				__cfa_io_flush( this, 0 );
 
 				readyThread = __next_thread_slow( this->cltr );
@@ -213,5 +223,5 @@
 
 				// Push self to idle stack
-				mark_idle(this->cltr->procs, * this);
+				if(!mark_idle(this->cltr->procs, * this)) continue MAIN_LOOP;
 
 				// Confirm the ready-queue is empty
@@ -229,15 +239,98 @@
 				}
 
-				#if !defined(__CFA_NO_STATISTICS__)
-					if(this->print_halts) {
-						__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
+				idle_sleep( this, future, idle_iovec );
+
+				// We were woken up, remove self from idle
+				mark_awake(this->cltr->procs, * this);
+
+				// DON'T just proceed, start looking again
+				continue MAIN_LOOP;
+			}
+
+			/* paranoid */ verify( readyThread );
+
+			// Reset io dirty bit
+			this->io.dirty = false;
+
+			// We found a thread run it
+			__run_thread(this, readyThread);
+
+			// Are we done?
+			if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
+
+			if(this->io.pending && !this->io.dirty) {
+				__cfa_io_flush( this, 0 );
+			}
+
+			#else
+				#warning new kernel loop
+			SEARCH: {
+				/* paranoid */ verify( ! __preemption_enabled() );
+
+				// First, lock the scheduler since we are searching for a thread
+				ready_schedule_lock();
+
+				// Try to get the next thread
+				readyThread = pop_fast( this->cltr );
+				if(readyThread) { ready_schedule_unlock(); break SEARCH; }
+
+				// If we can't find a thread, might as well flush any outstanding I/O
+				if(this->io.pending) { __cfa_io_flush( this, 0 ); }
+
+				// Spin a little on I/O, just in case
+				for(5) {
+					__maybe_io_drain( this );
+					readyThread = pop_fast( this->cltr );
+					if(readyThread) { ready_schedule_unlock(); break SEARCH; }
+				}
+
+				// no luck, try stealing a few times
+				for(5) {
+					if( __maybe_io_drain( this ) ) {
+						readyThread = pop_fast( this->cltr );
+					} else {
+						readyThread = pop_slow( this->cltr );
 					}
-				#endif
-
-				__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
+					if(readyThread) { ready_schedule_unlock(); break SEARCH; }
+				}
+
+				// still no luck, search for a thread
+				readyThread = pop_search( this->cltr );
+				if(readyThread) { ready_schedule_unlock(); break SEARCH; }
+
+				// Don't block if we are done
+				if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) {
+					ready_schedule_unlock();
+					break MAIN_LOOP;
+				}
+
+				__STATS( __tls_stats()->ready.sleep.halts++; )
+
+				// Push self to idle stack
+				ready_schedule_unlock();
+				if(!mark_idle(this->cltr->procs, * this)) goto SEARCH;
+				ready_schedule_lock();
+
+				// Confirm the ready-queue is empty
+				__maybe_io_drain( this );
+				readyThread = pop_search( this->cltr );
+				ready_schedule_unlock();
+
+				if( readyThread ) {
+					// A thread was found, cancel the halt
+					mark_awake(this->cltr->procs, * this);
+
+					__STATS( __tls_stats()->ready.sleep.cancels++; )
+
+					// continue the main loop
+					break SEARCH;
+				}
+
+				__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); )
+				__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle_fd);
 
 				{
 					eventfd_t val;
-					ssize_t ret = read( this->idle, &val, sizeof(val) );
+					ssize_t ret = read( this->idle_fd, &val, sizeof(val) );
 					if(ret < 0) {
 						switch((int)errno) {
@@ -255,9 +348,5 @@
 				}
 
-				#if !defined(__CFA_NO_STATISTICS__)
-					if(this->print_halts) {
-						__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
-					}
-				#endif
+					__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); )
 
 				// We were woken up, remove self from idle
@@ -268,115 +357,4 @@
 			}
 
-			/* paranoid */ verify( readyThread );
-
-			// Reset io dirty bit
-			this->io.dirty = false;
-
-			// We found a thread run it
-			__run_thread(this, readyThread);
-
-			// Are we done?
-			if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) break MAIN_LOOP;
-
-			if(this->io.pending && !this->io.dirty) {
-				ready_schedule_lock();
-				__cfa_io_flush( this );
-				ready_schedule_unlock();
-			}
-
-			#else
-				#warning new kernel loop
-			SEARCH: {
-				/* paranoid */ verify( ! __preemption_enabled() );
-
-				// First, lock the scheduler since we are searching for a thread
-				ready_schedule_lock();
-
-				// Try to get the next thread
-				readyThread = pop_fast( this->cltr );
-				if(readyThread) { ready_schedule_unlock(); break SEARCH; }
-
-				// If we can't find a thread, might as well flush any outstanding I/O
-				if(this->io.pending) { __cfa_io_flush( this ); }
-
-				// Spin a little on I/O, just in case
-				for(5) {
-					__maybe_io_drain( this );
-					readyThread = pop_fast( this->cltr );
-					if(readyThread) { ready_schedule_unlock(); break SEARCH; }
-				}
-
-				// no luck, try stealing a few times
-				for(5) {
-					if( __maybe_io_drain( this ) ) {
-						readyThread = pop_fast( this->cltr );
-					} else {
-						readyThread = pop_slow( this->cltr );
-					}
-					if(readyThread) { ready_schedule_unlock(); break SEARCH; }
-				}
-
-				// still no luck, search for a thread
-				readyThread = pop_search( this->cltr );
-				if(readyThread) { ready_schedule_unlock(); break SEARCH; }
-
-				// Don't block if we are done
-				if( __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST) ) {
-					ready_schedule_unlock();
-					break MAIN_LOOP;
-				}
-
-				__STATS( __tls_stats()->ready.sleep.halts++; )
-
-				// Push self to idle stack
-				ready_schedule_unlock();
-				mark_idle(this->cltr->procs, * this);
-				ready_schedule_lock();
-
-				// Confirm the ready-queue is empty
-				__maybe_io_drain( this );
-				readyThread = pop_search( this->cltr );
-				ready_schedule_unlock();
-
-				if( readyThread ) {
-					// A thread was found, cancel the halt
-					mark_awake(this->cltr->procs, * this);
-
-					__STATS( __tls_stats()->ready.sleep.cancels++; )
-
-					// continue the main loop
-					break SEARCH;
-				}
-
-				__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl()); )
-				__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle);
-
-				{
-					eventfd_t val;
-					ssize_t ret = read( this->idle, &val, sizeof(val) );
-					if(ret < 0) {
-						switch((int)errno) {
-						case EAGAIN:
-						#if EAGAIN != EWOULDBLOCK
-							case EWOULDBLOCK:
-						#endif
-						case EINTR:
-							// No need to do anything special here, just assume it's a legitimate wake-up
-							break;
-						default:
-							abort( "KERNEL : internal error, read failure on idle eventfd, error(%d) %s.", (int)errno, strerror( (int)errno ) );
-						}
-					}
-				}
-
-					__STATS( if(this->print_halts) __cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl()); )
-
-				// We were woken up, remove self from idle
-				mark_awake(this->cltr->procs, * this);
-
-				// DON'T just proceed, start looking again
-				continue MAIN_LOOP;
-			}
-
 		RUN_THREAD:
 			/* paranoid */ verify( ! __preemption_enabled() );
@@ -393,5 +371,5 @@
 
 			if(this->io.pending && !this->io.dirty) {
-				__cfa_io_flush( this );
+				__cfa_io_flush( this, 0 );
 			}
 
@@ -403,4 +381,9 @@
 
 		__cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
+	}
+
+	for(int i = 0; !available(future); i++) {
+		if(i > 1000) __cfaabi_dbg_write( "ERROR: kernel has bin spinning on a flush after exit loop.\n", 60);
+		__cfa_io_flush( this, 1 );
 	}
 
@@ -766,16 +749,13 @@
 
 	// Check if there is a sleeping processor
-	processor * p;
-	unsigned idle;
-	unsigned total;
-	[idle, total, p] = query_idles(this->procs);
+	int fd = __atomic_load_n(&this->procs.fd, __ATOMIC_SEQ_CST);
 
 	// If no one is sleeping, we are done
-	if( idle == 0 ) return;
+	if( fd == 0 ) return;
 
 	// We found a processor, wake it up
 	eventfd_t val;
 	val = 1;
-	eventfd_write( p->idle, val );
+	eventfd_write( fd, val );
 
 	#if !defined(__CFA_NO_STATISTICS__)
@@ -802,17 +782,67 @@
 		eventfd_t val;
 		val = 1;
-		eventfd_write( this->idle, val );
+		eventfd_write( this->idle_fd, val );
 	__enable_interrupts_checked();
 }
 
-static void mark_idle(__cluster_proc_list & this, processor & proc) {
-	/* paranoid */ verify( ! __preemption_enabled() );
-	lock( this );
+static void idle_sleep(processor * this, io_future_t & future, iovec & iov) {
+	#if !defined(CFA_WITH_IO_URING_IDLE)
+		#if !defined(__CFA_NO_STATISTICS__)
+			if(this->print_halts) {
+				__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 0\n", this->unique_id, rdtscl());
+			}
+		#endif
+
+		__cfadbg_print_safe(runtime_core, "Kernel : core %p waiting on eventfd %d\n", this, this->idle_fd);
+
+		{
+			eventfd_t val;
+			ssize_t ret = read( this->idle_fd, &val, sizeof(val) );
+			if(ret < 0) {
+				switch((int)errno) {
+				case EAGAIN:
+				#if EAGAIN != EWOULDBLOCK
+					case EWOULDBLOCK:
+				#endif
+				case EINTR:
+					// No need to do anything special here, just assume it's a legitimate wake-up
+					break;
+				default:
+					abort( "KERNEL : internal error, read failure on idle eventfd, error(%d) %s.", (int)errno, strerror( (int)errno ) );
+				}
+			}
+		}
+
+		#if !defined(__CFA_NO_STATISTICS__)
+			if(this->print_halts) {
+				__cfaabi_bits_print_safe( STDOUT_FILENO, "PH:%d - %lld 1\n", this->unique_id, rdtscl());
+			}
+		#endif
+	#else
+		// Do we already have a pending read
+		if(available(future)) {
+			// There is no pending read, we need to add one
+			reset(future);
+
+			__kernel_read(this, future, iov, this->idle_fd );
+		}
+
+		__cfa_io_flush( this, 1 );
+	#endif
+}
+
+static bool mark_idle(__cluster_proc_list & this, processor & proc) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+	if(!try_lock( this )) return false;
 		this.idle++;
 		/* paranoid */ verify( this.idle <= this.total );
 		remove(proc);
 		insert_first(this.idles, proc);
+
+		__atomic_store_n(&this.fd, proc.idle_fd, __ATOMIC_SEQ_CST);
 	unlock( this );
 	/* paranoid */ verify( ! __preemption_enabled() );
+
+	return true;
 }
 
@@ -824,25 +854,12 @@
 		remove(proc);
 		insert_last(this.actives, proc);
+
+		{
+			int fd = 0;
+			if(!this.idles`isEmpty) fd = this.idles`first.idle_fd;
+			__atomic_store_n(&this.fd, fd, __ATOMIC_SEQ_CST);
+		}
+
 	unlock( this );
-	/* paranoid */ verify( ! __preemption_enabled() );
-}
-
-static [unsigned idle, unsigned total, * processor] query_idles( & __cluster_proc_list this ) {
-	/* paranoid */ verify( ! __preemption_enabled() );
-	/* paranoid */ verify( ready_schedule_islocked() );
-
-	for() {
-		uint64_t l = __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST);
-		if( 1 == (l % 2) ) { Pause(); continue; }
-		unsigned idle    = this.idle;
-		unsigned total   = this.total;
-		processor * proc = &this.idles`first;
-		// Compiler fence is unnecessary, but gcc-8 and older incorrectly reorder code without it
-		asm volatile("": : :"memory");
-		if(l != __atomic_load_n(&this.lock, __ATOMIC_SEQ_CST)) { Pause(); continue; }
-		return [idle, total, proc];
-	}
-
-	/* paranoid */ verify( ready_schedule_islocked() );
 	/* paranoid */ verify( ! __preemption_enabled() );
 }
@@ -906,10 +923,10 @@
 		if(head == tail) return false;
 		#if OLD_MAIN
-		ready_schedule_lock();
-		ret = __cfa_io_drain( proc );
-		ready_schedule_unlock();
+			ready_schedule_lock();
+			ret = __cfa_io_drain( proc );
+			ready_schedule_unlock();
 		#else
 			ret = __cfa_io_drain( proc );
-	#endif
+		#endif
 	#endif
 	return ret;
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/kernel.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -100,5 +100,5 @@
 
 	// Idle lock (kernel semaphore)
-	int idle;
+	int idle_fd;
 
 	// Termination synchronisation (user semaphore)
@@ -195,5 +195,8 @@
 struct __cluster_proc_list {
 	// Spin lock protecting the queue
-	volatile uint64_t lock;
+	__spinlock_t lock;
+
+	// FD to use to wake a processor
+	volatile int fd;
 
 	// Total number of processors
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -527,6 +527,6 @@
 	this.local_data = 0p;
 
-	this.idle = eventfd(0, 0);
-	if (idle < 0) {
+	this.idle_fd = eventfd(0, 0);
+	if (idle_fd < 0) {
 		abort("KERNEL ERROR: PROCESSOR EVENTFD - %s\n", strerror(errno));
 	}
@@ -542,5 +542,5 @@
 // Not a ctor, it just preps the destruction but should not destroy members
 static void deinit(processor & this) {
-	close(this.idle);
+	close(this.idle_fd);
 }
 
@@ -584,5 +584,5 @@
 // Cluster
 static void ?{}(__cluster_proc_list & this) {
-	this.lock  = 0;
+	this.fd    = 0;
 	this.idle  = 0;
 	this.total = 0;
Index: libcfa/src/concurrency/kernel_private.hfa
===================================================================
--- libcfa/src/concurrency/kernel_private.hfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/concurrency/kernel_private.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -39,4 +39,14 @@
 }
 
+// Defines whether or not we *want* to use io_uring_enter as the idle_sleep blocking call
+#define CFA_WANT_IO_URING_IDLE
+
+// Defines whether or not we *can* use io_uring_enter as the idle_sleep blocking call
+#if defined(CFA_WANT_IO_URING_IDLE) && defined(CFA_HAVE_LINUX_IO_URING_H)
+	#if defined(CFA_HAVE_IORING_OP_READ) || (defined(CFA_HAVE_READV) && defined(CFA_HAVE_IORING_OP_READV))
+		#define CFA_WITH_IO_URING_IDLE
+	#endif
+#endif
+
 //-----------------------------------------------------------------------------
 // Scheduler
@@ -149,8 +159,4 @@
 	__atomic_store_n(ll, (bool)false, __ATOMIC_RELEASE);
 }
-
-
-
-
 
 //-----------------------------------------------------------------------
@@ -268,16 +274,27 @@
 	ready_schedule_lock();
 
-	// Simple counting lock, acquired, acquired by incrementing the counter
-	// to an odd number
-	for() {
-		uint64_t l = this.lock;
-		if(
-			(0 == (l % 2))
-			&& __atomic_compare_exchange_n(&this.lock, &l, l + 1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-		) return;
-		Pause();
-	}
-
-	/* paranoid */ verify( ! __preemption_enabled() );
+	lock( this.lock __cfaabi_dbg_ctx2 );
+
+	/* paranoid */ verify( ! __preemption_enabled() );
+}
+
+static inline bool try_lock(__cluster_proc_list & this) {
+	/* paranoid */ verify( ! __preemption_enabled() );
+
+	// Start by locking the global RWlock so that we know no-one is
+	// adding/removing processors while we mess with the idle lock
+	ready_schedule_lock();
+
+	if(try_lock( this.lock __cfaabi_dbg_ctx2 )) {
+		// success
+		/* paranoid */ verify( ! __preemption_enabled() );
+		return true;
+	}
+
+	// failed to lock
+	ready_schedule_unlock();
+
+	/* paranoid */ verify( ! __preemption_enabled() );
+	return false;
 }
 
@@ -285,7 +302,5 @@
 	/* paranoid */ verify( ! __preemption_enabled() );
 
-	/* paranoid */ verify( 1 == (this.lock % 2) );
-	// Simple couting lock, release by incrementing to an even number
-	__atomic_fetch_add( &this.lock, 1, __ATOMIC_SEQ_CST );
+	unlock(this.lock);
 
 	// Release the global lock, which we acquired when locking
Index: libcfa/src/device/cpu.cfa
===================================================================
--- libcfa/src/device/cpu.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/device/cpu.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -30,4 +30,6 @@
 	#include <fcntl.h>
 }
+
+#include "algorithms/range_iterator.hfa"
 
 // search a string for character 'character' but looking atmost at len
@@ -135,5 +137,6 @@
 		count++;
 	}
-	iterate_dir(path, lambda);
+	int ret = iterate_dir(path, lambda);
+	if(ret == ENOTDIR) return 0;
 
 	/* paranoid */ verifyf(count == max + 1, "Inconsistent %s count, counted %d, but max %s was %d", prefix, count, prefix, (int)max);
@@ -143,5 +146,5 @@
 
 // Count number of cpus in the system
-static int count_cpus(void) {
+static [int, const char *] count_cpus(void) {
 	const char * fpath = "/sys/devices/system/cpu/online";
 	int fd = open(fpath, 0, O_RDONLY);
@@ -159,7 +162,5 @@
 
 	const char * _;
-	int cnt = read_width(buff, r - 1, &_);
-	/* paranoid */ verify(cnt == count_prefix_dirs("/sys/devices/system/cpu", "cpu"));
-	return cnt;
+	return [read_width(buff, r - 1, &_), strndup(buff, r - 1)];
 }
 
@@ -226,7 +227,7 @@
 
 struct raw_cache_instance {
-	idx_range_t range;
-	unsigned width;
-	unsigned char level;
+	idx_range_t range;	// A text description of the cpus covered
+	unsigned width;		// The number of cpus covered
+	unsigned char level;	// the cache level
 	// FIXME add at least size and type
 };
@@ -235,8 +236,13 @@
 static void ^?{}(raw_cache_instance & this) { free(this.range);}
 
-raw_cache_instance ** build_raw_cache_table(unsigned cpus, unsigned idxs, unsigned cache_levels)
+// Returns a 2D array of instances of size [cpu count][cache levels]
+// where cache level doesn't include instruction caches
+raw_cache_instance ** build_raw_cache_table(unsigned cpus_c, idx_range_t cpus, unsigned idxs, unsigned cache_levels)
 {
-	raw_cache_instance ** raw = alloc(cpus);
-	for(i; cpus) {
+	raw_cache_instance ** raw = alloc(cpus_c, '\0'`fill);
+
+	RangeIter rc = { cpus };
+	while(moveNext(rc)) {
+		unsigned i = rc.com;
 		raw[i] = alloc(cache_levels);
 		void addcache(unsigned fidx, unsigned char level, idx_range_t range, size_t len) {
@@ -263,16 +269,23 @@
 
 // returns an allocate list of all the different distinct last level caches
-static [*llc_map_t, size_t cnt] distinct_llcs(unsigned cpus, unsigned llc_idx, raw_cache_instance ** raw) {
+static [*llc_map_t, size_t cnt] distinct_llcs(idx_range_t cpus, unsigned llc_idx, raw_cache_instance ** raw) {
 	// Allocate at least one element
 	llc_map_t* ranges = alloc();
 	size_t range_cnt = 1;
 
+	RangeIter rc = { cpus };
+	__attribute__((unused)) bool ret =
+	moveNext(rc);
+	/* paranoid */ verify( ret );
+	/* paranoid */ verify( rc.com >= 0 );
+
 	// Initialize with element 0
-	ranges->raw = &raw[0][llc_idx];
+	ranges->raw = &raw[rc.com][llc_idx];
 	ranges->count = 0;
 	ranges->start = -1u;
 
 	// Go over all other cpus
-	CPU_LOOP: for(i; 1~cpus) {
+	CPU_LOOP: while(moveNext(rc)) {
+		unsigned i = rc.com;
 		// Check if the range is already there
 		raw_cache_instance * candidate = &raw[i][llc_idx];
@@ -304,8 +317,10 @@
 }
 
-static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
-	cpu_pairing_t * pairings = alloc(cpus);
-
-	CPU_LOOP: for(i; cpus) {
+static [[]cpu_pairing_t] get_cpu_pairings(unsigned cpus_c, idx_range_t cpus, raw_cache_instance ** raw, llc_map_t * maps, size_t map_cnt) {
+	cpu_pairing_t * pairings = alloc(cpus_c);
+
+	RangeIter rc = { cpus };
+	CPU_LOOP: while(moveNext(rc)) {
+		unsigned i = rc.com;
 		pairings[i].cpu = i;
 		idx_range_t want = raw[i][0].range;
@@ -327,5 +342,18 @@
 extern "C" {
 	void __cfaabi_device_startup( void ) {
-		int cpus = count_cpus();
+		int cpus_c;
+		const char * cpus;
+		[cpus_c, cpus] = count_cpus();
+		#if defined(__CFA_WITH_VERIFY__)
+		// Verify that the mapping is self consistant.
+		{
+			RangeIter rc = { cpus };
+			while(moveNext(rc)) {
+				unsigned i = rc.com;
+				verify(cpus_c > i);
+			}
+		}
+		#endif
+
 		int idxs = count_cache_indexes();
 
@@ -333,5 +361,5 @@
 		unsigned cache_levels = 0;
 		unsigned llc = 0;
-		{
+		if (idxs != 0) {
 			unsigned char prev = -1u;
 			void first(unsigned idx, unsigned char level, const char * map, size_t len) {
@@ -345,5 +373,5 @@
 
 		// Read in raw data
-		raw_cache_instance ** raw = build_raw_cache_table(cpus, idxs, cache_levels);
+		raw_cache_instance ** raw = build_raw_cache_table(cpus_c, cpus, idxs, cache_levels);
 
 		// Find number of distinct cache instances
@@ -362,18 +390,20 @@
 				width2 += maps[i].raw->width;
 			}
-			verify(width1 == cpus);
-			verify(width2 == cpus);
+			verify(width1 == cpus_c);
+			verify(width2 == cpus_c);
 		}
 		#endif
 
 		// Get mappings from cpu to cache instance
-		cpu_pairing_t * pairings = get_cpu_pairings(cpus, raw, maps, map_cnt);
+		cpu_pairing_t * pairings = get_cpu_pairings(cpus_c, cpus, raw, maps, map_cnt);
 
 		// Sort by cache instance
-		qsort(pairings, cpus);
+		qsort(pairings, cpus_c);
 
 		{
 			unsigned it = 0;
-			for(i; cpus) {
+			RangeIter rc = { cpus };
+			while(moveNext(rc)) {
+				unsigned i = rc.com;
 				unsigned llc_id = pairings[i].id;
 				if(maps[llc_id].start == -1u) {
@@ -384,11 +414,14 @@
 				}
 			}
-			/* paranoid */ verify(it == cpus);
+			/* paranoid */ verify(it == cpus_c);
 		}
 
 		// From the mappings build the actual cpu map we want
-		struct cpu_map_entry_t * entries = alloc(cpus);
-		for(i; cpus) { entries[i].count = 0; }
-		for(i; cpus) {
+		struct cpu_map_entry_t * entries = alloc(cpus_c);
+		for(i; cpus_c) { entries[i].count = 0; }
+
+		RangeIter rc = { cpus };
+		while(moveNext(rc)) {
+			unsigned i = rc.com;
 			/* paranoid */ verify(pairings[i].id < map_cnt);
 			unsigned c = pairings[i].cpu;
@@ -406,6 +439,6 @@
 		free(pairings);
 
-		for(i; cpus) {
-			for(j; cache_levels) {
+		for(i; cpus_c) {
+			if( raw[i] ) for(j; cache_levels) {
 				^(raw[i][j]){};
 			}
@@ -415,5 +448,6 @@
 
 		cpu_info.llc_map = entries;
-		cpu_info.hthrd_count = cpus;
+		cpu_info.hthrd_count = cpus_c;
+		cpu_info.llc_count = map_cnt;
 	}
 
Index: libcfa/src/device/cpu.hfa
===================================================================
--- libcfa/src/device/cpu.hfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/device/cpu.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -23,9 +23,12 @@
 
 struct cpu_info_t {
-	 // array of size [hthrd_count]
+	// Array of size [hthrd_count]
 	const cpu_map_entry_t * llc_map;
 
-	 // Number of _hardware_ threads present in the system
+	// Number of _hardware_ threads present in the system
 	size_t hthrd_count;
+
+	// Number of distinct last level caches
+	size_t llc_count;
 };
 
Index: libcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/heap.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -10,6 +10,6 @@
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Mon Aug  9 19:03:02 2021
-// Update Count     : 1040
+// Last Modified On : Sun Jan  2 23:29:41 2022
+// Update Count     : 1058
 //
 
@@ -263,26 +263,26 @@
 #ifdef __STATISTICS__
 // Heap statistics counters.
-static unsigned int malloc_zero_calls, malloc_calls;
-static unsigned long long int malloc_storage;
-static unsigned int aalloc_zero_calls, aalloc_calls;
-static unsigned long long int aalloc_storage;
-static unsigned int calloc_zero_calls, calloc_calls;
-static unsigned long long int calloc_storage;
-static unsigned int memalign_zero_calls, memalign_calls;
-static unsigned long long int memalign_storage;
-static unsigned int amemalign_zero_calls, amemalign_calls;
-static unsigned long long int amemalign_storage;
-static unsigned int cmemalign_zero_calls, cmemalign_calls;
-static unsigned long long int cmemalign_storage;
-static unsigned int resize_zero_calls, resize_calls;
-static unsigned long long int resize_storage;
-static unsigned int realloc_zero_calls, realloc_calls;
-static unsigned long long int realloc_storage;
-static unsigned int free_zero_calls, free_calls;
-static unsigned long long int free_storage;
+static unsigned int malloc_calls, malloc_0_calls;
+static unsigned long long int malloc_storage_request, malloc_storage_alloc;
+static unsigned int aalloc_calls, aalloc_0_calls;
+static unsigned long long int aalloc_storage_request, aalloc_storage_alloc;
+static unsigned int calloc_calls, calloc_0_calls;
+static unsigned long long int calloc_storage_request, calloc_storage_alloc;
+static unsigned int memalign_calls, memalign_0_calls;
+static unsigned long long int memalign_storage_request, memalign_storage_alloc;
+static unsigned int amemalign_calls, amemalign_0_calls;
+static unsigned long long int amemalign_storage_request, amemalign_storage_alloc;
+static unsigned int cmemalign_calls, cmemalign_0_calls;
+static unsigned long long int cmemalign_storage_request, cmemalign_storage_alloc;
+static unsigned int resize_calls, resize_0_calls;
+static unsigned long long int resize_storage_request, resize_storage_alloc;
+static unsigned int realloc_calls, realloc_0_calls;
+static unsigned long long int realloc_storage_request, realloc_storage_alloc;
+static unsigned int free_calls, free_null_calls;
+static unsigned long long int free_storage_request, free_storage_alloc;
 static unsigned int mmap_calls;
-static unsigned long long int mmap_storage;
+static unsigned long long int mmap_storage_request, mmap_storage_alloc;
 static unsigned int munmap_calls;
-static unsigned long long int munmap_storage;
+static unsigned long long int munmap_storage_request, munmap_storage_alloc;
 static unsigned int sbrk_calls;
 static unsigned long long int sbrk_storage;
@@ -294,29 +294,29 @@
 	char helpText[1024];
 	__cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
-								"\nHeap statistics:\n"
-								"  malloc    0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  aalloc    0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  calloc    0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  memalign  0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  amemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  cmemalign 0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  resize    0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  realloc   0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  free      0-calls %'u; >0-calls %'u; storage %'llu bytes\n"
-								"  mmap      calls %'u; storage %'llu bytes\n"
-								"  munmap    calls %'u; storage %'llu bytes\n"
-								"  sbrk      calls %'u; storage %'llu bytes\n",
-								malloc_zero_calls, malloc_calls, malloc_storage,
-								aalloc_zero_calls, aalloc_calls, aalloc_storage,
-								calloc_zero_calls, calloc_calls, calloc_storage,
-								memalign_zero_calls, memalign_calls, memalign_storage,
-								amemalign_zero_calls, amemalign_calls, amemalign_storage,
-								cmemalign_zero_calls, cmemalign_calls, cmemalign_storage,
-								resize_zero_calls, resize_calls, resize_storage,
-								realloc_zero_calls, realloc_calls, realloc_storage,
-								free_zero_calls, free_calls, free_storage,
-								mmap_calls, mmap_storage,
-								munmap_calls, munmap_storage,
-								sbrk_calls, sbrk_storage
+								"\nHeap statistics: (storage request / allocation + header)\n"
+								"  malloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  aalloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  calloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  memalign  >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  amemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  cmemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  resize    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  realloc   >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+								"  free      !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n"
+								"  sbrk      calls %'u; storage %'llu bytes\n"
+								"  mmap      calls %'u; storage %'llu / %'llu bytes\n"
+								"  munmap    calls %'u; storage %'llu / %'llu bytes\n",
+								malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
+								aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
+								calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
+								memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
+								amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
+								cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
+								resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
+								realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
+								free_calls, free_null_calls, free_storage_request, free_storage_alloc,
+								sbrk_calls, sbrk_storage,
+								mmap_calls, mmap_storage_request, mmap_storage_alloc,
+								munmap_calls, munmap_storage_request, munmap_storage_alloc
 		);
 } // printStats
@@ -329,29 +329,29 @@
 						"<sizes>\n"
 						"</sizes>\n"
-						"<total type=\"malloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"aalloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"calloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"memalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"amemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"cmemalign\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"resize\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"realloc\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"free\" 0 count=\"%'u;\" >0 count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
+						"<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
 						"<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
+						"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n"
+						"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
 						"</malloc>",
-						malloc_zero_calls, malloc_calls, malloc_storage,
-						aalloc_zero_calls, aalloc_calls, aalloc_storage,
-						calloc_zero_calls, calloc_calls, calloc_storage,
-						memalign_zero_calls, memalign_calls, memalign_storage,
-						amemalign_zero_calls, amemalign_calls, amemalign_storage,
-						cmemalign_zero_calls, cmemalign_calls, cmemalign_storage,
-						resize_zero_calls, resize_calls, resize_storage,
-						realloc_zero_calls, realloc_calls, realloc_storage,
-						free_zero_calls, free_calls, free_storage,
-						mmap_calls, mmap_storage,
-						munmap_calls, munmap_storage,
-						sbrk_calls, sbrk_storage
+						malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
+						aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
+						calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
+						memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
+						amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
+						cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
+						resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
+						realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
+						free_calls, free_null_calls, free_storage_request, free_storage_alloc,
+						sbrk_calls, sbrk_storage,
+						mmap_calls, mmap_storage_request, mmap_storage_alloc,
+						munmap_calls, munmap_storage_request, munmap_storage_alloc
 		);
 	__cfaabi_bits_write( fileno( stream ), helpText, len );	// ensures all bytes written or exit
@@ -577,5 +577,6 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &mmap_storage, tsize, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &mmap_storage_request, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -626,5 +627,6 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &munmap_storage, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &munmap_storage_alloc, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		if ( munmap( header, size ) == -1 ) {
@@ -642,5 +644,6 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &free_storage, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &free_storage_alloc, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -819,7 +822,7 @@
 		if ( likely( size > 0 ) ) {
 			__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &malloc_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &malloc_storage_request, size, __ATOMIC_SEQ_CST );
 		} else {
-			__atomic_add_fetch( &malloc_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &malloc_0_calls, 1, __ATOMIC_SEQ_CST );
 		} // if
 		#endif // __STATISTICS__
@@ -835,7 +838,7 @@
 		if ( likely( size > 0 ) ) {
 			__atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &aalloc_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &aalloc_storage_request, size, __ATOMIC_SEQ_CST );
 		} else {
-			__atomic_add_fetch( &aalloc_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &aalloc_0_calls, 1, __ATOMIC_SEQ_CST );
 		} // if
 		#endif // __STATISTICS__
@@ -850,5 +853,5 @@
 	  if ( unlikely( size ) == 0 ) {			// 0 BYTE ALLOCATION RETURNS NULL POINTER
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &calloc_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &calloc_0_calls, 1, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 			return 0p;
@@ -856,5 +859,5 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &calloc_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -891,5 +894,5 @@
 	  if ( unlikely( size == 0 ) ) {					// special cases
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 			free( oaddr );
@@ -902,5 +905,5 @@
 	  if ( unlikely( oaddr == 0p ) ) {
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 			return mallocNoStats( size );
@@ -921,5 +924,5 @@
 
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -936,5 +939,5 @@
 	  if ( unlikely( size == 0 ) ) {					// special cases
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 			free( oaddr );
@@ -947,5 +950,5 @@
 	  if ( unlikely( oaddr == 0p ) ) {
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 			return mallocNoStats( size );
@@ -969,5 +972,5 @@
 
 		#ifdef __STATISTICS__
-	  	__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+	  	__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -1000,7 +1003,7 @@
 		if ( likely( size > 0 ) ) {
 			__atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &memalign_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &memalign_storage_request, size, __ATOMIC_SEQ_CST );
 		} else {
-			__atomic_add_fetch( &memalign_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &memalign_0_calls, 1, __ATOMIC_SEQ_CST );
 		} // if
 		#endif // __STATISTICS__
@@ -1016,7 +1019,7 @@
 		if ( likely( size > 0 ) ) {
 			__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &cmemalign_storage, size, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &cmemalign_storage_request, size, __ATOMIC_SEQ_CST );
 		} else {
-			__atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
 		} // if
 		#endif // __STATISTICS__
@@ -1031,5 +1034,5 @@
 	  if ( unlikely( size ) == 0 ) {					// 0 BYTE ALLOCATION RETURNS NULL POINTER
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &cmemalign_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 			return 0p;
@@ -1037,5 +1040,5 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &cmemalign_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 
@@ -1101,5 +1104,5 @@
 	  if ( unlikely( addr == 0p ) ) {					// special case
 			#ifdef __STATISTICS__
-			__atomic_add_fetch( &free_zero_calls, 1, __ATOMIC_SEQ_CST );
+			__atomic_add_fetch( &free_null_calls, 1, __ATOMIC_SEQ_CST );
 			#endif // __STATISTICS__
 
@@ -1280,5 +1283,5 @@
   if ( unlikely( size == 0 ) ) {						// special cases
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &resize_zero_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		free( oaddr );
@@ -1294,5 +1297,5 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		return memalignNoStats( nalign, size );
@@ -1329,5 +1332,5 @@
 
 	#ifdef __STATISTICS__
-	__atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
+	__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
 	#endif // __STATISTICS__
 
@@ -1342,5 +1345,5 @@
   if ( unlikely( size == 0 ) ) {						// special cases
 		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_zero_calls, 1, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		free( oaddr );
@@ -1356,5 +1359,5 @@
 		#ifdef __STATISTICS__
 		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+		__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
 		#endif // __STATISTICS__
 		return memalignNoStats( nalign, size );
@@ -1380,5 +1383,5 @@
 	#ifdef __STATISTICS__
 	__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
-	__atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
+	__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
 	#endif // __STATISTICS__
 
Index: libcfa/src/stdlib.cfa
===================================================================
--- libcfa/src/stdlib.cfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/stdlib.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:10:29 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Nov 12 07:46:09 2020
-// Update Count     : 503
+// Last Modified On : Mon Jan  3 09:36:27 2022
+// Update Count     : 519
 //
 
@@ -221,7 +221,34 @@
 //---------------------------------------
 
-bool threading_enabled(void) __attribute__((weak)) {
-	return false;
-}
+static uint32_t seed = 0;								// current seed
+static thread_local uint32_t state;						// random state
+
+void set_seed( uint32_t seed_ ) { state = seed = seed_; }
+uint32_t get_seed() { return seed; }
+
+#define GENERATOR LCG
+
+inline uint32_t MarsagliaXor( uint32_t & state ) {
+	if ( unlikely( seed == 0 ) ) set_seed( rdtscl() );
+	else if ( unlikely( state == 0 ) ) state = seed;
+	state ^= state << 6;
+	state ^= state >> 21;
+	state ^= state << 7;
+	return state;
+} // MarsagliaXor
+
+inline uint32_t LCG( uint32_t & state ) {				// linear congruential generator
+	if ( unlikely( seed == 0 ) ) set_seed( rdtscl() );
+	else if ( unlikely( state == 0 ) ) state = seed;
+	return state = 36969 * (state & 65535) + (state >> 16); // 36969 is NOT prime!
+} // LCG
+
+uint32_t prng( PRNG & prng ) with( prng ) { callcnt += 1; return GENERATOR( state ); }
+
+uint32_t prng( void ) { return GENERATOR( state ); }
+
+//---------------------------------------
+
+bool threading_enabled( void ) __attribute__(( weak )) { return false; }
 
 // Local Variables: //
Index: libcfa/src/stdlib.hfa
===================================================================
--- libcfa/src/stdlib.hfa	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ libcfa/src/stdlib.hfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Apr 20 21:20:03 2021
-// Update Count     : 575
+// Last Modified On : Sun Jan  2 22:53:57 2022
+// Update Count     : 594
 //
 
@@ -43,33 +43,30 @@
 //---------------------------------------
 
-// Macro because of returns
-#define ARRAY_ALLOC$( allocation, alignment, dim ) \
-	if ( _Alignof(T) <= libAlign() ) return (T *)(void *)allocation( dim, (size_t)sizeof(T) ); /* C allocation */ \
-	else return (T *)alignment( _Alignof(T), dim, sizeof(T) )
-
 static inline forall( T & | sized(T) ) {
 	// CFA safe equivalents, i.e., implicit size specification
 
 	T * malloc( void ) {
-		if ( _Alignof(T) <= libAlign() ) return (T *)(void *)malloc( (size_t)sizeof(T) ); // C allocation
+		if ( _Alignof(T) <= libAlign() ) return (T *)malloc( sizeof(T) ); // C allocation
 		else return (T *)memalign( _Alignof(T), sizeof(T) );
 	} // malloc
 
 	T * aalloc( size_t dim ) {
-		ARRAY_ALLOC$( aalloc, amemalign, dim );
+		if ( _Alignof(T) <= libAlign() ) return (T *)aalloc( dim, sizeof(T) ); // C allocation
+		else return (T *)amemalign( _Alignof(T), dim, sizeof(T) );
 	} // aalloc
 
 	T * calloc( size_t dim ) {
-		ARRAY_ALLOC$( calloc, cmemalign, dim );
+		if ( _Alignof(T) <= libAlign() ) return (T *)calloc( dim, sizeof(T) ); // C allocation
+		else return (T *)cmemalign( _Alignof(T), dim, sizeof(T) );
 	} // calloc
 
 	T * resize( T * ptr, size_t size ) {				// CFA resize, eliminate return-type cast
-		if ( _Alignof(T) <= libAlign() ) return (T *)(void *)resize( (void *)ptr, size ); // CFA resize
-		else return (T *)(void *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize
+		if ( _Alignof(T) <= libAlign() ) return (T *)resize( (void *)ptr, size ); // CFA resize
+		else return (T *)resize( (void *)ptr, _Alignof(T), size ); // CFA resize
 	} // resize
 
 	T * realloc( T * ptr, size_t size ) {				// CFA realloc, eliminate return-type cast
-		if ( _Alignof(T) <= libAlign() ) return (T *)(void *)realloc( (void *)ptr, size ); // C realloc
-		else return (T *)(void *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc
+		if ( _Alignof(T) <= libAlign() ) return (T *)realloc( (void *)ptr, size ); // C realloc
+		else return (T *)realloc( (void *)ptr, _Alignof(T), size ); // CFA realloc
 	} // realloc
 
@@ -169,6 +166,7 @@
 		return ret;
 	}
+	S_fill(T) 		?`fill ( zero_t ) = void; // FIX ME: remove this once ticket 214 is resolved
+	S_fill(T) 		?`fill ( T * a ) 				{ return (S_fill(T)){ 'T', '0', 0, a }; } // FIX ME: remove this once ticket 214 is resolved
 	S_fill(T) 		?`fill ( char c ) 				{ return (S_fill(T)){ 'c', c };	}
-	S_fill(T) 		?`fill ( T * a ) 				{ return (S_fill(T)){ 'T', '0', 0, a }; }
 	S_fill(T) 		?`fill ( T a[], size_t nmemb ) 	{ return (S_fill(T)){ 'a', '0', nmemb * sizeof(T), a }; }
 
@@ -362,9 +360,9 @@
 
 static inline {
-	long int random( long int l, long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l) + l; } // [l,u)
-	long int random( long int u ) { if ( u < 0 ) return random( u, 0 ); else return random( 0, u ); } // [0,u)
+	long int random( long int l, long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l + 1) + l; } // [l,u]
+	long int random( long int u ) { return random( 0, u - 1 ); } // [0,u)
 	unsigned long int random( void ) { return lrand48(); }
 	unsigned long int random( unsigned long int u ) { return lrand48() % u; } // [0,u)
-	unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l) + l; } // [l,u)
+	unsigned long int random( unsigned long int l, unsigned long int u ) { if ( u < l ) [u, l] = [l, u]; return lrand48() % (u - l + 1) + l; } // [l,u]
 
 	char random( void ) { return (unsigned long int)random(); }
@@ -387,4 +385,33 @@
 //---------------------------------------
 
+struct PRNG {
+	uint32_t callcnt;									// call count
+	uint32_t seed;										// current seed
+	uint32_t state;										// random state
+}; // PRNG
+
+extern uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )); // [0,UINT_MAX]
+static inline {
+	void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; } // set seed
+	void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); }	// random seed
+	void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed
+	uint32_t get_seed( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed
+	uint32_t prng( PRNG & prng, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng ) % u; } // [0,u)
+	uint32_t prng( PRNG & prng, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( prng, u - l + 1 ) + l; } // [l,u]
+	uint32_t calls( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return callcnt; }
+} // distribution
+
+extern void set_seed( uint32_t seed );					// set per thread seed
+extern uint32_t get_seed();								// get seed
+extern uint32_t prng( void ) __attribute__(( warn_unused_result )); // [0,UINT_MAX]
+static inline {
+	uint32_t prng( uint32_t u ) __attribute__(( warn_unused_result ));
+	uint32_t prng( uint32_t u ) { return prng() % u; }	// [0,u)
+	uint32_t prng( uint32_t l, uint32_t u ) __attribute__(( warn_unused_result ));
+	uint32_t prng( uint32_t l, uint32_t u ) { return prng( u - l + 1 ) + l; } // [l,u]
+} // distribution
+
+//---------------------------------------
+
 extern bool threading_enabled( void ) OPTIONAL_THREAD;
 
Index: src/AST/Decl.hpp
===================================================================
--- src/AST/Decl.hpp	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/AST/Decl.hpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -270,8 +270,8 @@
 	: AggregateDecl( loc, name, std::move(attrs), linkage ), kind( kind ) {}
 
-	bool is_coroutine() { return kind == Coroutine; }
-	bool is_generator() { return kind == Generator; }
-	bool is_monitor  () { return kind == Monitor  ; }
-	bool is_thread   () { return kind == Thread   ; }
+	bool is_coroutine() const { return kind == Coroutine; }
+	bool is_generator() const { return kind == Generator; }
+	bool is_monitor  () const { return kind == Monitor  ; }
+	bool is_thread   () const { return kind == Thread   ; }
 
 	const Decl * accept( Visitor & v ) const override { return v.visit( this ); }
Index: src/AST/Expr.cpp
===================================================================
--- src/AST/Expr.cpp	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/AST/Expr.cpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -9,7 +9,7 @@
 // Author           : Aaron B. Moss
 // Created On       : Wed May 15 17:00:00 2019
-// Last Modified By : Peter A. Buhr
-// Created On       : Thr Jun 13 13:38:00 2019
-// Update Count     : 6
+// Last Modified By : Andrew Beach
+// Created On       : Tue Nov 30 14:23:00 2021
+// Update Count     : 7
 //
 
@@ -141,32 +141,35 @@
 	/// The type of the address of a type.
 	/// Caller is responsible for managing returned memory
-	Type * addrType( const Type * type ) {
-		if ( const ReferenceType * refType = dynamic_cast< const ReferenceType * >( type ) ) {
-			return new ReferenceType{ addrType( refType->base ), refType->qualifiers };
+	Type * addrType( const ptr<Type> & type ) {
+		if ( auto refType = type.as< ReferenceType >() ) {
+			return new ReferenceType( addrType( refType->base ), refType->qualifiers );
 		} else {
-			return new PointerType{ type };
+			return new PointerType( type );
 		}
 	}
-}
-
-AddressExpr::AddressExpr( const CodeLocation & loc, const Expr * a ) : Expr( loc ), arg( a ) {
-	if ( arg->result ) {
-		if ( arg->get_lvalue() ) {
-			// lvalue, retains all levels of reference, and gains a pointer inside the references
-			Type * res = addrType( arg->result );
-			result = res;
+
+	/// The type of the address of an expression.
+	/// Caller is responsible for managing returned memory
+	Type * addrExprType( const CodeLocation & loc, const Expr * arg ) {
+		assert( arg );
+		// If the expression's type is unknown, the address type is unknown.
+		if ( nullptr == arg->result ) {
+			return nullptr;
+		// An lvalue is transformed directly.
+		} else if ( arg->get_lvalue() ) {
+			return addrType( arg->result );
+		// Strip a layer of reference to "create" an lvalue expression.
+		} else if ( auto refType = arg->result.as< ReferenceType >() ) {
+			return addrType( refType->base );
 		} else {
-			// taking address of non-lvalue, must be a reference, loses one layer of reference
-			if ( const ReferenceType * refType =
-					dynamic_cast< const ReferenceType * >( arg->result.get() ) ) {
-				Type * res = addrType( refType->base );
-				result = res;
-			} else {
-				SemanticError( loc, arg->result.get(),
-					"Attempt to take address of non-lvalue expression: " );
-			}
+			SemanticError( loc, arg->result.get(),
+				"Attempt to take address of non-lvalue expression: " );
 		}
 	}
 }
+
+AddressExpr::AddressExpr( const CodeLocation & loc, const Expr * a ) :
+	Expr( loc, addrExprType( loc, a ) ), arg( a )
+{}
 
 // --- LabelAddressExpr
Index: src/AST/Print.cpp
===================================================================
--- src/AST/Print.cpp	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/AST/Print.cpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -5,5 +5,5 @@
 // file "LICENCE" distributed with Cforall.
 //
-// Print.cpp --
+// Print.cpp -- Print an AST (or sub-tree) to a stream.
 //
 // Author           : Thierry Delisle
Index: src/AST/Print.hpp
===================================================================
--- src/AST/Print.hpp	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/AST/Print.hpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -5,5 +5,5 @@
 // file "LICENCE" distributed with Cforall.
 //
-// Print.hpp --
+// Print.hpp -- Print an AST (or sub-tree) to a stream.
 //
 // Author           : Thierry Delisle
@@ -35,11 +35,11 @@
 template< typename Coll >
 void printAll( std::ostream & os, const Coll & c, Indenter indent = {} ) {
-    for ( const auto & i : c ) {
-        if ( ! i ) continue;
-        
-        os << indent;
-        print( os, i, indent );
-        os << std::endl;
-    }
+	for ( const auto & i : c ) {
+		if ( ! i ) continue;
+
+		os << indent;
+		print( os, i, indent );
+		os << std::endl;
+	}
 }
 
Index: src/Concurrency/Keywords.cc
===================================================================
--- src/Concurrency/Keywords.cc	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/Concurrency/Keywords.cc	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -979,4 +979,5 @@
 			// If this is the destructor for a monitor it must be mutex
 			if(isDtor) {
+				// This reflects MutexKeyword::validate, except does not produce an error.
 				Type* ty = decl->get_functionType()->get_parameters().front()->get_type();
 
Index: src/Concurrency/Keywords.h
===================================================================
--- src/Concurrency/Keywords.h	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/Concurrency/Keywords.h	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -5,5 +5,5 @@
 // file "LICENCE" distributed with Cforall.
 //
-// Keywords.h --
+// Keywords.h -- Implement concurrency constructs from their keywords.
 //
 // Author           : Thierry Delisle
@@ -19,4 +19,7 @@
 
 class Declaration;
+namespace ast {
+	class TranslationUnit;
+}
 
 namespace Concurrency {
@@ -24,4 +27,11 @@
 	void implementMutexFuncs( std::list< Declaration * > & translationUnit );
 	void implementThreadStarter( std::list< Declaration * > & translationUnit );
+
+/// Implement the sue-like keywords and the suspend keyword.
+void implementKeywords( ast::TranslationUnit & translationUnit );
+/// Implement the mutex parameters and mutex statement.
+void implementMutex( ast::TranslationUnit & translationUnit );
+/// Add the thread starter code to constructors.
+void implementThreadStarter( ast::TranslationUnit & translationUnit );
 };
 
Index: src/Concurrency/KeywordsNew.cpp
===================================================================
--- src/Concurrency/KeywordsNew.cpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ src/Concurrency/KeywordsNew.cpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,586 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// KeywordsNew.cpp -- Implement concurrency constructs from their keywords.
+//
+// Author           : Andrew Beach
+// Created On       : Tue Nov 16  9:53:00 2021
+// Last Modified By : Andrew Beach
+// Last Modified On : Wed Dec  1 11:24:00 2021
+// Update Count     : 1
+//
+
+#include "Concurrency/Keywords.h"
+
+#include "AST/Copy.hpp"
+#include "AST/Decl.hpp"
+#include "AST/Pass.hpp"
+#include "AST/Stmt.hpp"
+#include "AST/TranslationUnit.hpp"
+#include "CodeGen/OperatorTable.h"
+#include "Common/utility.h"
+#include "InitTweak/InitTweak.h"
+
+namespace Concurrency {
+
+namespace {
+
+inline static bool isThread( const ast::DeclWithType * decl ) {
+	auto baseType = decl->get_type()->stripDeclarator();
+	auto instType = dynamic_cast<const ast::StructInstType *>( baseType );
+	if ( nullptr == instType ) { return false; }
+	return instType->base->is_thread();
+}
+
+// --------------------------------------------------------------------------
+struct MutexKeyword final {
+	const ast::FunctionDecl * postvisit( const ast::FunctionDecl * decl );
+	void postvisit( const ast::StructDecl * decl );
+	const ast::Stmt * postvisit( const ast::MutexStmt * stmt );
+
+	static std::vector<const ast::DeclWithType *> findMutexArgs(
+			const ast::FunctionDecl * decl, bool & first );
+	static void validate( const ast::DeclWithType * decl );
+
+	ast::CompoundStmt * addDtorStatements( const ast::FunctionDecl* func, const ast::CompoundStmt *, const std::vector<const ast::DeclWithType *> &);
+	ast::CompoundStmt * addStatements( const ast::FunctionDecl* func, const ast::CompoundStmt *, const std::vector<const ast::DeclWithType *> &);
+	ast::CompoundStmt * addStatements( const ast::CompoundStmt * body, const std::vector<ast::ptr<ast::Expr>> & args );
+	ast::CompoundStmt * addThreadDtorStatements( const ast::FunctionDecl* func, const ast::CompoundStmt * body, const std::vector<const ast::DeclWithType *> & args );
+
+private:
+	const ast::StructDecl * monitor_decl = nullptr;
+	const ast::StructDecl * guard_decl = nullptr;
+	const ast::StructDecl * dtor_guard_decl = nullptr;
+	const ast::StructDecl * thread_guard_decl = nullptr;
+	const ast::StructDecl * lock_guard_decl = nullptr;
+
+	static ast::ptr<ast::Type> generic_func;
+};
+
+const ast::FunctionDecl * MutexKeyword::postvisit(
+		const ast::FunctionDecl * decl ) {
+	bool is_first_argument_mutex = false;
+	const std::vector<const ast::DeclWithType *> mutexArgs =
+		findMutexArgs( decl, is_first_argument_mutex );
+	bool const isDtor = CodeGen::isDestructor( decl->name );
+
+	// Does this function have any mutex arguments that connect to monitors?
+	if ( mutexArgs.empty() ) {
+		// If this is the destructor for a monitor it must be mutex.
+		if ( isDtor ) {
+			// This reflects MutexKeyword::validate, but no error messages.
+			const ast::Type * type = decl->type->params.front();
+
+			// If it's a copy, it's not a mutex.
+			const ast::ReferenceType * refType = dynamic_cast<const ast::ReferenceType *>( type );
+			if ( nullptr == refType ) {
+				return decl;
+			}
+
+			// If it is not pointing directly to a type, it's not a mutex.
+			auto base = refType->base;
+			if ( base.as<ast::ReferenceType>() ) return decl;
+			if ( base.as<ast::PointerType>() ) return decl;
+
+			// If it is not a struct, it's not a mutex.
+			auto baseStruct = base.as<ast::StructInstType>();
+			if ( nullptr == baseStruct ) return decl;
+
+			// If it is a monitor, then it is a monitor.
+			if( baseStruct->base->is_monitor() || baseStruct->base->is_thread() ) {
+				SemanticError( decl, "destructors for structures declared as \"monitor\" must use mutex parameters\n" );
+			}
+		}
+		return decl;
+	}
+
+	// Monitors can't be constructed with mutual exclusion.
+	if ( CodeGen::isConstructor( decl->name ) && is_first_argument_mutex ) {
+		SemanticError( decl, "constructors cannot have mutex parameters\n" );
+	}
+
+	// It makes no sense to have multiple mutex parameters for the destructor.
+	if ( isDtor && mutexArgs.size() != 1 ) {
+		SemanticError( decl, "destructors can only have 1 mutex argument\n" );
+	}
+
+	// Make sure all the mutex arguments are monitors.
+	for ( auto arg : mutexArgs ) {
+		validate( arg );
+	}
+
+	// Check to see if the body needs to be instrument the body.
+	const ast::CompoundStmt * body = decl->stmts;
+	if ( !body ) return decl;
+
+	// Check to if the required headers have been seen.
+	if ( !monitor_decl || !guard_decl || !dtor_guard_decl ) {
+		SemanticError( decl, "mutex keyword requires monitors to be in scope, add #include <monitor.hfa>\n" );
+	}
+
+	// Instrument the body.
+	ast::CompoundStmt * newBody = nullptr;
+	if ( isDtor && isThread( mutexArgs.front() ) ) {
+		if ( !thread_guard_decl ) {
+			SemanticError( decl, "thread destructor requires threads to be in scope, add #include <thread.hfa>\n" );
+		}
+		newBody = addThreadDtorStatements( decl, body, mutexArgs );
+	} else if ( isDtor ) {
+		newBody = addDtorStatements( decl, body, mutexArgs );
+	} else {
+		newBody = addStatements( decl, body, mutexArgs );
+	}
+	assert( newBody );
+	return ast::mutate_field( decl, &ast::FunctionDecl::stmts, newBody );
+}
+
+void MutexKeyword::postvisit( const ast::StructDecl * decl ) {
+	if ( !decl->body ) {
+		return;
+	} else if ( decl->name == "monitor$" ) {
+		assert( !monitor_decl );
+		monitor_decl = decl;
+	} else if ( decl->name == "monitor_guard_t" ) {
+		assert( !guard_decl );
+		guard_decl = decl;
+	} else if ( decl->name == "monitor_dtor_guard_t" ) {
+		assert( !dtor_guard_decl );
+		dtor_guard_decl = decl;
+	} else if ( decl->name == "thread_dtor_guard_t" ) {
+		assert( !thread_guard_decl );
+		thread_guard_decl = decl;
+	} else if ( decl->name == "__mutex_stmt_lock_guard" ) {
+		assert( !lock_guard_decl );
+		lock_guard_decl = decl;
+	}
+}
+
+const ast::Stmt * MutexKeyword::postvisit( const ast::MutexStmt * stmt ) {
+	ast::CompoundStmt * body =
+			new ast::CompoundStmt( stmt->location, { stmt->stmt } );
+	addStatements( body, stmt->mutexObjs );
+	return body;
+}
+
+std::vector<const ast::DeclWithType *> MutexKeyword::findMutexArgs(
+		const ast::FunctionDecl * decl, bool & first ) {
+	std::vector<const ast::DeclWithType *> mutexArgs;
+
+	bool once = true;
+	for ( auto arg : decl->params ) {
+		const ast::Type * type = arg->get_type();
+		if ( type->is_mutex() ) {
+			if ( once ) first = true;
+			mutexArgs.push_back( arg.get() );
+		}
+		once = false;
+	}
+	return mutexArgs;
+}
+
+void MutexKeyword::validate( const ast::DeclWithType * decl ) {
+	const ast::Type * type = decl->get_type();
+
+	// If it's a copy, it's not a mutex.
+	const ast::ReferenceType * refType = dynamic_cast<const ast::ReferenceType *>( type );
+	if ( nullptr == refType ) {
+		SemanticError( decl, "Mutex argument must be of reference type " );
+	}
+
+	// If it is not pointing directly to a type, it's not a mutex.
+	auto base = refType->base;
+	if ( base.as<ast::ReferenceType>() || base.as<ast::PointerType>() ) {
+		SemanticError( decl, "Mutex argument have exactly one level of indirection " );
+	}
+
+	// If it is not a struct, it's not a mutex.
+	auto baseStruct = base.as<ast::StructInstType>();
+	if ( nullptr == baseStruct ) return;
+
+	// Make sure that only the outer reference is mutex.
+	if( baseStruct->is_mutex() ) {
+		SemanticError( decl, "mutex keyword may only appear once per argument " );
+	}
+}
+
+ast::CompoundStmt * MutexKeyword::addDtorStatements(
+		const ast::FunctionDecl* func, const ast::CompoundStmt * body,
+		const std::vector<const ast::DeclWithType *> & args ) {
+	ast::Type * argType = ast::shallowCopy( args.front()->get_type() );
+	argType->set_mutex( false );
+
+	ast::CompoundStmt * mutBody = ast::mutate( body );
+
+	// Generated code goes near the beginning of body:
+	const CodeLocation & location = mutBody->location;
+
+	const ast::ObjectDecl * monitor = new ast::ObjectDecl(
+		location,
+		"__monitor",
+		new ast::PointerType( new ast::StructInstType( monitor_decl ) ),
+		new ast::SingleInit(
+			location,
+			new ast::UntypedExpr(
+				location,
+				new ast::NameExpr( location, "get_monitor" ),
+				{ new ast::CastExpr(
+					location,
+					new ast::VariableExpr( location, args.front() ),
+					argType, ast::ExplicitCast
+				) }
+			)
+		),
+		ast::Storage::Classes(),
+		ast::Linkage::Cforall
+	);
+
+	assert( generic_func );
+
+	// In reverse order:
+	// monitor_dtor_guard_t __guard = { __monitor, func, false };
+	mutBody->push_front(
+		new ast::DeclStmt( location, new ast::ObjectDecl(
+			location,
+			"__guard",
+			new ast::StructInstType( dtor_guard_decl ),
+			new ast::ListInit(
+				location,
+				{
+					new ast::SingleInit( location,
+						new ast::AddressExpr(
+							new ast::VariableExpr( location, monitor ) ) ),
+					new ast::SingleInit( location,
+						new ast::CastExpr( location,
+							new ast::VariableExpr( location, func ),
+							generic_func,
+							ast::ExplicitCast ) ),
+					new ast::SingleInit( location,
+						ast::ConstantExpr::from_bool( location, false ) ),
+				},
+				{},
+				ast::MaybeConstruct
+			),
+			ast::Storage::Classes(),
+			ast::Linkage::Cforall
+		))
+	);
+
+	// monitor$ * __monitor = get_monitor(a);
+	mutBody->push_front( new ast::DeclStmt( location, monitor ) );
+
+	return mutBody;
+}
+
+ast::CompoundStmt * MutexKeyword::addStatements(
+		const ast::FunctionDecl* func, const ast::CompoundStmt * body,
+		const std::vector<const ast::DeclWithType * > & args ) {
+	ast::CompoundStmt * mutBody = ast::mutate( body );
+
+	// Code is generated near the beginning of the compound statement.
+	const CodeLocation & location = mutBody->location;
+
+	// Make pointer to the monitors.
+	ast::ObjectDecl * monitors = new ast::ObjectDecl(
+		location,
+		"__monitors",
+		new ast::ArrayType(
+			new ast::PointerType(
+				new ast::StructInstType( monitor_decl )
+			),
+			ast::ConstantExpr::from_ulong( location, args.size() ),
+			ast::FixedLen,
+			ast::DynamicDim
+		),
+		new ast::ListInit(
+			location,
+			map_range<std::vector<ast::ptr<ast::Init>>>(
+				args,
+				[]( const ast::DeclWithType * decl ) {
+					return new ast::SingleInit(
+						decl->location,
+						new ast::UntypedExpr(
+							decl->location,
+							new ast::NameExpr( decl->location, "get_monitor" ),
+							{
+								new ast::CastExpr(
+									decl->location,
+									new ast::VariableExpr( decl->location, decl ),
+									decl->get_type(),
+									ast::ExplicitCast
+								)
+							}
+						)
+					);
+				}
+			)
+		),
+		ast::Storage::Classes(),
+		ast::Linkage::Cforall
+	);
+
+	assert( generic_func );
+
+	// In Reverse Order:
+	mutBody->push_front(
+		new ast::DeclStmt( location, new ast::ObjectDecl(
+			location,
+			"__guard",
+			new ast::StructInstType( guard_decl ),
+			new ast::ListInit(
+				location,
+				{
+					new ast::SingleInit( location,
+						new ast::VariableExpr( location, monitors ) ),
+					new ast::SingleInit( location,
+						ast::ConstantExpr::from_ulong( location, args.size() ) ),
+					new ast::SingleInit( location, new ast::CastExpr(
+						location,
+						new ast::VariableExpr( location, func ),
+						generic_func,
+						ast::ExplicitCast
+					) ),
+				},
+				{},
+				ast::MaybeConstruct
+			),
+			ast::Storage::Classes(),
+			ast::Linkage::Cforall
+		))
+	);
+
+	// monitor$ * __monitors[] = { get_monitor(a), get_monitor(b) };
+	mutBody->push_front( new ast::DeclStmt( location, monitors ) );
+
+	return mutBody;
+}
+
+ast::CompoundStmt * MutexKeyword::addStatements(
+		const ast::CompoundStmt * body,
+		const std::vector<ast::ptr<ast::Expr>> & args ) {
+	ast::CompoundStmt * mutBody = ast::mutate( body );
+
+	// Code is generated near the beginning of the compound statement.
+	const CodeLocation & location = mutBody->location;
+
+	// Make pointer to the monitors.
+	ast::ObjectDecl * monitors = new ast::ObjectDecl(
+		location,
+		"__monitors",
+		new ast::ArrayType(
+			new ast::PointerType(
+				new ast::TypeofType(
+					new ast::UntypedExpr(
+						location,
+						new ast::NameExpr( location, "__get_type" ),
+						{ args.front() }
+					)
+				)
+			),
+			ast::ConstantExpr::from_ulong( location, args.size() ),
+			ast::FixedLen,
+			ast::DynamicDim
+		),
+		new ast::ListInit(
+			location,
+			map_range<std::vector<ast::ptr<ast::Init>>>(
+				args, [](const ast::Expr * expr) {
+					return new ast::SingleInit(
+						expr->location,
+						new ast::UntypedExpr(
+							expr->location,
+							new ast::NameExpr( expr->location, "__get_ptr" ),
+							{ expr }
+						)
+					);
+				}
+			)
+		),
+		ast::Storage::Classes(),
+		ast::Linkage::Cforall
+	);
+
+	ast::StructInstType * lock_guard_struct =
+			new ast::StructInstType( lock_guard_decl );
+	ast::TypeExpr * lock_type_expr = new ast::TypeExpr(
+		location,
+		new ast::TypeofType(
+			new ast::UntypedExpr(
+				location,
+				new ast::NameExpr( location, "__get_type" ),
+				{ args.front() }
+			)
+		)
+	);
+
+	lock_guard_struct->params.push_back( lock_type_expr );
+
+	// In reverse order:
+	// monitor_guard_t __guard = { __monitors, # };
+	mutBody->push_front(
+		new ast::DeclStmt(
+			location,
+			new ast::ObjectDecl(
+				location,
+				"__guard",
+				lock_guard_struct,
+				new ast::ListInit(
+					location,
+					{
+						new ast::SingleInit(
+							location,
+							new ast::VariableExpr( location, monitors ) ),
+						new ast::SingleInit(
+							location,
+							ast::ConstantExpr::from_ulong( location, args.size() ) ),
+					},
+					{},
+					ast::MaybeConstruct
+				),
+				ast::Storage::Classes(),
+				ast::Linkage::Cforall
+			)
+		)
+	);
+
+	// monitor$ * __monitors[] = { get_monitor(a), get_monitor(b) };
+	mutBody->push_front( new ast::DeclStmt( location, monitors ) );
+
+	return mutBody;
+}
+
+ast::CompoundStmt * MutexKeyword::addThreadDtorStatements(
+		const ast::FunctionDecl*, const ast::CompoundStmt * body,
+		const std::vector<const ast::DeclWithType * > & args ) {
+	assert( args.size() == 1 );
+	const ast::DeclWithType * arg = args.front();
+	const ast::Type * argType = arg->get_type();
+	assert( argType->is_mutex() );
+
+	ast::CompoundStmt * mutBody = ast::mutate( body );
+
+	// The code is generated near the front of the body.
+	const CodeLocation & location = mutBody->location;
+
+	// thread_dtor_guard_t __guard = { this, intptr( 0 ) };
+	mutBody->push_front( new ast::DeclStmt(
+		location,
+		new ast::ObjectDecl(
+			location,
+			"__guard",
+			new ast::StructInstType( thread_guard_decl ),
+			new ast::ListInit(
+				location,
+				{
+					new ast::SingleInit( location,
+						new ast::CastExpr( location,
+							new ast::VariableExpr( location, arg ), argType ) ),
+					new ast::SingleInit(
+						location,
+						new ast::UntypedExpr(
+							location,
+							new ast::NameExpr( location, "intptr" ), {
+								ast::ConstantExpr::from_int( location, 0 ),
+							}
+						) ),
+				},
+				{},
+				ast::MaybeConstruct
+			),
+			ast::Storage::Classes(),
+			ast::Linkage::Cforall
+		)
+	));
+
+	return mutBody;
+}
+
+ast::ptr<ast::Type> MutexKeyword::generic_func =
+	new ast::FunctionType( ast::VariableArgs );
+
+// --------------------------------------------------------------------------
+struct ThreadStarter final {
+	void previsit( const ast::StructDecl * decl );
+	const ast::FunctionDecl * postvisit( const ast::FunctionDecl * decl );
+
+private:
+	bool thread_ctor_seen = false;
+	const ast::StructDecl * thread_decl = nullptr;
+};
+
+void ThreadStarter::previsit( const ast::StructDecl * decl ) {
+	if ( decl->body && decl->name == "thread$" ) {
+		assert( !thread_decl );
+		thread_decl = decl;
+	}
+}
+
+const ast::FunctionDecl * ThreadStarter::postvisit( const ast::FunctionDecl * decl ) {
+	if ( !CodeGen::isConstructor( decl->name ) ) return decl;
+
+	// Seach for the thread constructor.
+	// (Are the "prefixes" of these to blocks the same?)
+	const ast::Type * typeof_this = InitTweak::getTypeofThis( decl->type );
+	auto ctored_type = dynamic_cast<const ast::StructInstType *>( typeof_this );
+	if ( ctored_type && ctored_type->base == thread_decl ) {
+		thread_ctor_seen = true;
+	}
+
+	// Modify this declaration, the extra checks to see if we will are first.
+	const ast::ptr<ast::DeclWithType> & param = decl->params.front();
+	auto type = dynamic_cast<const ast::StructInstType *>(
+		InitTweak::getPointerBase( param->get_type() ) );
+	if ( nullptr == type ) return decl;
+	if ( !type->base->is_thread() ) return decl;
+	if ( !thread_decl || !thread_ctor_seen ) {
+		SemanticError( type->base->location, "thread keyword requires threads to be in scope, add #include <thread.hfa>" );
+	}
+	const ast::CompoundStmt * stmt = decl->stmts;
+	if ( nullptr == stmt ) return decl;
+
+	// Now do the actual modification:
+	ast::CompoundStmt * mutStmt = ast::mutate( stmt );
+	const CodeLocation & location = mutStmt->location;
+	mutStmt->push_back(
+		new ast::ExprStmt(
+			location,
+			new ast::UntypedExpr(
+				location,
+				new ast::NameExpr( location, "__thrd_start" ),
+				{
+					new ast::VariableExpr( location, param ),
+					new ast::NameExpr( location, "main" ),
+				}
+			)
+		)
+	);
+
+	return ast::mutate_field( decl, &ast::FunctionDecl::stmts, mutStmt );
+}
+
+} // namespace
+
+// --------------------------------------------------------------------------
+
+void implementKeywords( ast::TranslationUnit & translationUnit ) {
+	(void)translationUnit;
+	assertf(false, "Apply Keywords not implemented." );
+}
+
+void implementMutex( ast::TranslationUnit & translationUnit ) {
+	ast::Pass<MutexKeyword>::run( translationUnit );
+}
+
+void implementThreadStarter( ast::TranslationUnit & translationUnit ) {
+	ast::Pass<ThreadStarter>::run( translationUnit );
+}
+
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// mode: c++ //
+// compile-command: "make install" //
+// End: //
Index: src/Concurrency/module.mk
===================================================================
--- src/Concurrency/module.mk	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/Concurrency/module.mk	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -15,5 +15,13 @@
 ###############################################################################
 
-SRC += Concurrency/Keywords.cc Concurrency/Keywords.h Concurrency/Waitfor.cc Concurrency/Waitfor.h
-SRCDEMANGLE += Concurrency/Keywords.cc
+SRC_CONCURRENCY = \
+	Concurrency/KeywordsNew.cpp \
+	Concurrency/Keywords.cc
 
+SRC += $(SRC_CONCURRENCY) \
+	Concurrency/Keywords.h \
+	Concurrency/Waitfor.cc \
+	Concurrency/Waitfor.h
+
+SRCDEMANGLE += $(SRC_CONCURRENCY)
+
Index: src/InitTweak/InitTweak.cc
===================================================================
--- src/InitTweak/InitTweak.cc	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/InitTweak/InitTweak.cc	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -9,7 +9,7 @@
 // Author           : Rob Schluntz
 // Created On       : Fri May 13 11:26:36 2016
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Jun 16 20:57:22 2021
-// Update Count     : 18
+// Last Modified By : Andrew Beach
+// Last Modified On : Fri Nov 19 19:22:00 2021
+// Update Count     : 19
 //
 
@@ -540,4 +540,14 @@
 	}
 
+	const ast::Type * getTypeofThis( const ast::FunctionType * ftype ) {
+		assertf( ftype, "getTypeofThis: nullptr ftype" );
+		const std::vector<ast::ptr<ast::Type>> & params = ftype->params;
+		assertf( !params.empty(), "getTypeofThis: ftype with 0 parameters: %s",
+				toString( ftype ).c_str() );
+		const ast::ReferenceType * refType =
+			params.front().strict_as<ast::ReferenceType>();
+		return refType->base;
+	}
+
 	ObjectDecl * getParamThis( FunctionType * ftype ) {
 		assertf( ftype, "getParamThis: nullptr ftype" );
Index: src/InitTweak/InitTweak.h
===================================================================
--- src/InitTweak/InitTweak.h	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/InitTweak/InitTweak.h	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -10,6 +10,6 @@
 // Created On       : Fri May 13 11:26:36 2016
 // Last Modified By : Andrew Beach
-// Last Modified On : Fri Jul 19 14:18:00 2019
-// Update Count     : 6
+// Last Modified On : Fri Nov 19 14:18:00 2021
+// Update Count     : 7
 //
 
@@ -35,4 +35,5 @@
 	/// returns the base type of the first parameter to a constructor/destructor/assignment function
 	Type * getTypeofThis( FunctionType * ftype );
+	const ast::Type * getTypeofThis( const ast::FunctionType * ftype );
 
 	/// returns the first parameter of a constructor/destructor/assignment function
Index: src/Validate/CompoundLiteral.cpp
===================================================================
--- src/Validate/CompoundLiteral.cpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ src/Validate/CompoundLiteral.cpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,70 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2018 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// CompoundLiteral.cpp -- Use variables to implement compound literals.
+//
+// Author           : Andrew Beach
+// Created On       : Mon Nov 15 16:33:00 2021
+// Last Modified By : Andrew Beach
+// Last Modified On : Mon Nov 16  9:47:00 2021
+// Update Count     : 0
+//
+
+#include "CompoundLiteral.hpp"
+
+#include "AST/Decl.hpp"
+#include "AST/Expr.hpp"
+#include "AST/Pass.hpp"
+#include "AST/TranslationUnit.hpp"
+#include "Common/UniqueName.h"
+
+namespace Validate {
+
+namespace {
+
+struct CompoundLiteral final :
+		public ast::WithDeclsToAdd<> {
+	ast::Storage::Classes storageClasses;
+
+	void previsit( const ast::ObjectDecl * decl );
+	const ast::Expr * postvisit( const ast::CompoundLiteralExpr * expr );
+};
+
+void CompoundLiteral::previsit( const ast::ObjectDecl * decl ) {
+	storageClasses = decl->storage;
+}
+
+const ast::Expr * CompoundLiteral::postvisit(
+		const ast::CompoundLiteralExpr * expr ) {
+	static UniqueName litName( "_compLit" );
+
+	// Transform: [storageClasses] ... (struct S){...} ...
+	// Into:      [storageClasses] struct S _compLit = {...};
+	//                             ... _compLit ...
+	ast::ObjectDecl * temp = new ast::ObjectDecl(
+		expr->location,
+		litName.newName(),
+		expr->result,
+		expr->init,
+		storageClasses
+		);
+	declsToAddBefore.push_back( temp );
+	return new ast::VariableExpr( expr->location, temp );
+}
+
+} // namespace
+
+void handleCompoundLiterals( ast::TranslationUnit & translationUnit ) {
+	ast::Pass<CompoundLiteral>::run( translationUnit );
+}
+
+} // namespace Validate
+
+// Local Variables: //
+// tab-width: 4 //
+// mode: c++ //
+// compile-command: "make install" //
+// End: //
Index: src/Validate/CompoundLiteral.hpp
===================================================================
--- src/Validate/CompoundLiteral.hpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ src/Validate/CompoundLiteral.hpp	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,33 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2018 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// CompoundLiteral.hpp -- Use variables to implement compound literals.
+//
+// Author           : Andrew Beach
+// Created On       : Mon Nov 15 16:37:00 2021
+// Last Modified By : Andrew Beach
+// Last Modified On : Mon Nov 15 17:56:00 2021
+// Update Count     : 0
+//
+
+#pragma once
+
+namespace ast {
+	class TranslationUnit;
+}
+
+namespace Validate {
+
+/// Use variables to implement compound literals.
+void handleCompoundLiterals( ast::TranslationUnit & translationUnit );
+
+}
+
+// Local Variables: //
+// tab-width: 4 //
+// mode: c++ //
+// compile-command: "make install" //
+// End: //
Index: src/Validate/module.mk
===================================================================
--- src/Validate/module.mk	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/Validate/module.mk	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -16,4 +16,6 @@
 
 SRC_VALIDATE = \
+	Validate/CompoundLiteral.cpp \
+	Validate/CompoundLiteral.hpp \
 	Validate/HandleAttributes.cc \
 	Validate/HandleAttributes.h \
Index: src/main.cc
===================================================================
--- src/main.cc	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ src/main.cc	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -10,6 +10,6 @@
 // Created On       : Fri May 15 23:12:02 2015
 // Last Modified By : Andrew Beach
-// Last Modified On : Fri Nov 12 11:06:00 2021
-// Update Count     : 658
+// Last Modified On : Tue Nov 30 10:25:00 2021
+// Update Count     : 659
 //
 
@@ -50,4 +50,5 @@
 #include "Common/UnimplementedError.h"      // for UnimplementedError
 #include "Common/utility.h"                 // for deleteAll, filter, printAll
+#include "Concurrency/Keywords.h"           // for implementMutex, implement...
 #include "Concurrency/Waitfor.h"            // for generateWaitfor
 #include "ControlStruct/ExceptDecl.h"       // for translateExcept
@@ -73,4 +74,5 @@
 #include "Tuples/Tuples.h"                  // for expandMemberTuples, expan...
 #include "Validate/FindSpecialDecls.h"      // for findGlobalDecls
+#include "Validate/CompoundLiteral.hpp"     // for handleCompoundLiterals
 #include "Validate/InitializerLength.hpp"   // for setLengthFromInitializer
 #include "Validate/LabelAddressFixer.hpp"   // for fixLabelAddresses
@@ -325,5 +327,4 @@
 		PASS( "Validate-C", SymTab::validate_C( translationUnit ) );
 		PASS( "Validate-D", SymTab::validate_D( translationUnit ) );
-		PASS( "Validate-E", SymTab::validate_E( translationUnit ) );
 
 		CodeTools::fillLocations( translationUnit );
@@ -338,4 +339,7 @@
 			forceFillCodeLocations( transUnit );
 
+			PASS( "Implement Mutex", Concurrency::implementMutex( transUnit ) );
+			PASS( "Implement Thread Start", Concurrency::implementThreadStarter( transUnit ) );
+			PASS( "Compound Literal", Validate::handleCompoundLiterals( transUnit ) );
 			PASS( "Set Length From Initializer", Validate::setLengthFromInitializer( transUnit ) );
 			PASS( "Find Global Decls", Validate::findGlobalDecls( transUnit ) );
@@ -402,4 +406,5 @@
 			translationUnit = convert( move( transUnit ) );
 		} else {
+			PASS( "Validate-E", SymTab::validate_E( translationUnit ) );
 			PASS( "Validate-F", SymTab::validate_F( translationUnit ) );
 
Index: tests/.expect/random.x64.txt
===================================================================
--- tests/.expect/random.x64.txt	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ tests/.expect/random.x64.txt	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -1,17 +1,17 @@
 õ
 =
-V
+K
 -911259971
 6
--4
+11
 1232105397
 0
-18
+11
 -914096085
 1
-15
+20
 2077092859
 1
-11
+12
 0.677254
 0.678106775246139
Index: tests/.expect/random.x86.txt
===================================================================
--- tests/.expect/random.x86.txt	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ tests/.expect/random.x86.txt	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -1,17 +1,17 @@
 õ
 =
-V
+K
 -911259971
 6
--4
+11
 1232105397
 0
-18
+11
 -914096085
 1
-15
+20
 2077092859
 1
-11
+12
 0.677254
 0.678106775246139
Index: tests/algorithms/.expect/range_test.txt
===================================================================
--- tests/algorithms/.expect/range_test.txt	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ tests/algorithms/.expect/range_test.txt	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,6 @@
+0 1 2 3 4 5 6
+0
+0 1 2 3 4 5 6 22 23 24 25 26 27 28 29 30
+0 2 4 6 8 10
+0 1 2 3 4 5 6 8 9 10 11 12 13 14 16 32
+0 1 2 3 4 6 8 9 10 11 12 14 16 17 18 19 20
Index: tests/algorithms/range_test.cfa
===================================================================
--- tests/algorithms/range_test.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ tests/algorithms/range_test.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,34 @@
+//
+// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
+//
+// The contents of this file are covered under the licence agreement in the
+// file "LICENCE" distributed with Cforall.
+//
+// range_test.cfa --
+//
+// Author           : Thierry Delisle
+// Created On       : Thu Dec 02 17:13:24 2021
+// Last Modified By :
+// Last Modified On :
+// Update Count     :
+//
+
+#include <fstream.hfa>
+#include <algorithms/range_iterator.hfa>
+
+void print(const char * range){
+	RangeIter r = { range };
+	while(moveNext(r)) {
+		sout | r.com | nonl;
+	}
+	sout | nl;
+}
+
+int main() {
+	print("0-6");
+	print("0");
+	print("0-6,22-30");
+	print("0,2,4,6,8,10");
+	print("0-6,8-14,16,32");
+	print("0-4,6,8-12,14,16-20");
+}
Index: tests/concurrent/.expect/ctor-check.txt
===================================================================
--- tests/concurrent/.expect/ctor-check.txt	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ tests/concurrent/.expect/ctor-check.txt	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,8 @@
+concurrent/ctor-check.cfa:11:1 error: constructors cannot have mutex parameters
+?{}: function
+... with parameters
+  lvalue reference to instance of struct Empty with body
+... returning nothing
+ with body
+  Compound Statement:
+
Index: tests/concurrent/ctor-check.cfa
===================================================================
--- tests/concurrent/ctor-check.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
+++ tests/concurrent/ctor-check.cfa	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -0,0 +1,15 @@
+#include <monitor.hfa>
+
+monitor Empty {};
+
+struct Test {};
+
+// Should work:
+void ?{}(Test & this, Empty & mutex mon) {}
+
+// Should not work:
+void ?{}(Empty & mutex this) {}
+
+int main(void) {
+	printf("done\n");
+}
Index: tests/pybin/tools.py
===================================================================
--- tests/pybin/tools.py	(revision e2853eb86b033c1a5f683bbe898f3733235771dc)
+++ tests/pybin/tools.py	(revision 6c53a93454697f8b549b141d01803272ff074931)
@@ -374,5 +374,5 @@
 
 	if not os.path.isfile(core):
-		return 1, "ERR No core dump"
+		return 1, "ERR No core dump (limit soft: {} hard: {})".format(*resource.getrlimit(resource.RLIMIT_CORE))
 
 	try:
