Index: Jenkinsfile
===================================================================
--- Jenkinsfile	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ Jenkinsfile	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -161,7 +161,11 @@
 		Tools.BuildStage('Test: full', Settings.RunAllTests) {
 			dir (BuildDir) {
+					jopt = ""
+					if( Settings.Architecture.node == 'x86' ) {
+						jopt = "-j2"
+					}
 					//Run the tests from the tests directory
-					sh """make --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=yes archiveerrors=${BuildDir}/tests/crashes/full-debug"""
-					sh """make --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=no  archiveerrors=${BuildDir}/tests/crashes/full-nodebug"""
+					sh """make ${jopt} --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=yes archiveerrors=${BuildDir}/tests/crashes/full-debug"""
+					sh """make ${jopt} --no-print-directory -C tests timeouts="--timeout=600 --global-timeout=14400" all-tests debug=no  archiveerrors=${BuildDir}/tests/crashes/full-nodebug"""
 			}
 		}
Index: benchmark/io/http/Makefile.am
===================================================================
--- benchmark/io/http/Makefile.am	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ benchmark/io/http/Makefile.am	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -50,2 +50,18 @@
 .dummy_hackxx.cpp:
 	@echo "int bar() { return 0; }" > ${@}
+
+# add dependency of cfa files
+nodist_httpforall_OBJECTS = $(addsuffix .o, $(basename $(filter %.cfa,$(nodist_httpforall_SOURCES))))
+$(nodist_httpforall_OBJECTS) : @CFACC@ @CFACPP@
+
+# .deps inclusion is not done automatically by automake for new languages
+nodist_httpforall_DEPENDS = $(join \
+	$(addsuffix $(DEPDIR)/ , $(dir $(nodist_httpforall_OBJECTS) ) ), \
+	$(notdir ${nodist_httpforall_OBJECTS:.o=.Po}) \
+)
+
+-include $(nodist_httpforall_DEPENDS)
+
+list_libdeps:
+	echo "objects: " $(nodist_httpforall_OBJECTS)
+	echo "depends: " $(nodist_httpforall_DEPENDS)
Index: benchmark/io/sendfile/consumer.c
===================================================================
--- benchmark/io/sendfile/consumer.c	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
+++ benchmark/io/sendfile/consumer.c	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -0,0 +1,142 @@
+// Simple sink program that opens a tcp socket on a random port
+// and then reads everything the socket has to write and then ends.
+
+#define _GNU_SOURCE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <errno.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+enum {
+	USAGE_ERROR = 1,
+	SOCKET_ERROR,
+	BIND_INUSE,
+	BIND_ERROR,
+	GETNAME_ERROR,
+	LISTEN_ERROR,
+	ACCEPT_ERROR,
+	READ_ERROR
+};
+
+enum { buffer_len = 10240 };
+char buffer[buffer_len];
+
+enum { TIMEGRAN = 1000000000LL };
+
+int main(int argc, char * argv[]) {
+	int port = 0;
+	switch(argc) {
+	case 1: break;
+	case 2:
+		{
+			const char * const str = argv[1];
+			char* endptr;
+			unsigned long long ret = strtoul(str, &endptr, 10);
+			if('\0' != *endptr) { fprintf( stderr, "Invalid argument \"%s\" (not a number)\n", str ); goto USAGE;}
+			if(errno == ERANGE || ret > 65535)  { fprintf( stderr, "Invalid argument \"%s\" (value too large)\n", str ); goto USAGE;}
+
+			port = ret;
+			break;
+		}
+	USAGE:
+	default:
+		fprintf( stderr, "USAGE: %s [port]\n", argv[0] );
+		exit( USAGE_ERROR );
+	}
+
+	printf( "%d: Listening on port %d\n", getpid(), port );
+	int listenfd = socket(AF_INET, SOCK_STREAM, 0);
+	if(listenfd < 0) {
+		fprintf( stderr, "socket error: (%d) %s\n", (int)errno, strerror(errno) );
+		exit( SOCKET_ERROR );
+	}
+
+	int ret = 0;
+	struct sockaddr_in address;
+	socklen_t addrlen = sizeof(address);
+	memset( (char *)&address, '\0', addrlen );
+	address.sin_family = AF_INET;
+	address.sin_addr.s_addr = htonl( INADDR_ANY );
+	address.sin_port = htons( port );
+
+	ret = bind( listenfd, (struct sockaddr *) &address, addrlen );
+	if(ret < 0) {
+		if(errno == EADDRINUSE) {
+			fprintf( stderr, "Port already in use in non-interactive mode. Aborting\n" );
+			exit( BIND_INUSE );
+		}
+		fprintf( stderr, "bind error: (%d) %s\n", (int)errno, strerror(errno) );
+		exit( BIND_ERROR );
+	}
+
+	ret = getsockname( listenfd, (struct sockaddr *) &address, &addrlen );
+	if(ret < 0) {
+		fprintf( stderr, "getname error: (%d) %s\n", (int)errno, strerror(errno) );
+		exit(GETNAME_ERROR);
+	}
+
+	printf( "actial port: %d\n", ntohs(address.sin_port) );
+
+	ret = listen( listenfd, 1 );
+	if(ret < 0) {
+		fprintf( stderr, "listen error: (%d) %s\n", (int)errno, strerror(errno) );
+		exit( 5 );
+	}
+
+	for(;;) {
+		struct sockaddr_in cli_addr;
+		__socklen_t clilen = sizeof(cli_addr);
+		int fd = accept( listenfd, (struct sockaddr *) &cli_addr, &clilen );
+		if(fd < 0) {
+			fprintf( stderr, "accept error: (%d) %s\n", (int)errno, strerror(errno) );
+			exit( ACCEPT_ERROR );
+		}
+
+		int error = 0;
+		size_t calls = 0;
+		size_t bytes = 0;
+
+		struct timespec after, before;
+
+		clock_gettime(CLOCK_MONOTONIC, &before);
+
+		for(;;) {
+			ret = recv(fd, buffer, buffer_len, 0);
+			if(ret == 0 ) goto EXIT;
+			if(ret < 0 ) {
+				if( errno == EAGAIN || errno == EWOULDBLOCK) continue;
+				if( errno == ECONNRESET ) { printf("Connection reset\n"); goto EXIT; }
+				if( errno == EPIPE ) { printf("Pipe closed\n"); goto EXIT; }
+				fprintf( stderr, "accept error: (%d) %s\n", (int)errno, strerror(errno) );
+				error = READ_ERROR;
+				goto EXIT;
+			}
+			calls++;
+			bytes += ret;
+		}
+		EXIT:;
+
+		clock_gettime(CLOCK_MONOTONIC, &after);
+
+		uint64_t tb = ((int64_t)before.tv_sec * TIMEGRAN) + before.tv_nsec;
+		uint64_t ta = ((int64_t)after.tv_sec * TIMEGRAN) + after.tv_nsec;
+		double secs = ((double)ta - tb) / TIMEGRAN;
+
+		printf("Received %'zu bytes in %'zu reads, %f seconds\n", bytes, calls, secs);
+		printf(" - %'3.3f bytes per second\n", (((double)bytes) / secs));
+		printf(" - %'3.3f bytes per calls\n", (((double)bytes) / calls));
+
+		close(fd);
+		if(error != 0) exit( error );
+	}
+	close(listenfd);
+	return 0;
+}
Index: benchmark/io/sendfile/producer.c
===================================================================
--- benchmark/io/sendfile/producer.c	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
+++ benchmark/io/sendfile/producer.c	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -0,0 +1,509 @@
+// programs that sends a file many times as fast as it can
+// compares sendfile to splice
+
+#define _GNU_SOURCE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <errno.h>
+#include <locale.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+
+#include <liburing.h>
+
+enum {
+	USAGE_ERROR = 1,
+	HOST_ERROR,
+	PIPE_ERROR,
+	FSTAT_ERROR,
+	SOCKET_ERROR,
+	CONNECT_ERROR,
+	SENDFILE_ERROR,
+	SPLICEIN_ERROR,
+	SPLICEOUT_ERROR,
+	URINGWAIT_ERROR
+};
+
+enum { buffer_len = 10240 };
+char buffer[buffer_len];
+
+enum { TIMEGRAN = 1000000000LL, TIMES = 100000 };
+
+int pipefd[2];
+struct io_uring ring;
+
+char * buf;
+
+struct stats {
+	size_t calls;
+	size_t bytes;
+	struct {
+		struct {
+			size_t cnt;
+			size_t bytes;
+		} r, w;
+	} shorts;
+};
+static void my_sendfile(int out, int in, size_t size, struct stats *);
+static void my_splice  (int out, int in, size_t size, struct stats *);
+static void my_iouring (int out, int in, size_t size, struct stats *);
+static void my_ringlink(int out, int in, size_t size, struct stats *);
+static void my_readwrit(int out, int in, size_t size, struct stats *);
+typedef void (*sender_t)(int out, int in, size_t size, struct stats *);
+
+static void run(sender_t sender, struct addrinfo * addr, int infd, size_t size);
+
+int main(int argc, char * argv[]) {
+	setlocale(LC_ALL, "");
+	const char * file_path;
+	struct addrinfo * addr;
+	int file_fd;
+	int ret;
+	switch(argc) {
+	case 3:
+		{
+			// Open the file
+			const char * const path = argv[2];
+			ret = open(path, 0, O_RDONLY);
+			if(ret < 0) {
+				fprintf( stderr, "cannot open file '%s': %s\n\n", path, strerror(errno) );
+				goto USAGE;
+			}
+
+			file_path = path;
+			file_fd = ret;
+
+
+			// connect to the address
+			char * state = 0;
+			char * str = argv[1];
+			const char * const host = strtok_r(str, ":", &state);
+			if(NULL == host) {
+				fprintf( stderr, "Invalid host:port specification, no host.\n\n" );
+				goto USAGE;
+			}
+
+			const char * const port = strtok_r(NULL, ":", &state);
+			if(NULL == port) {
+				fprintf( stderr, "Invalid host:port specification, no port.\n\n" );
+				goto USAGE;
+			}
+
+			printf("looking up '%s:%s'\n", host, port);
+
+			struct addrinfo hints = {};
+			struct addrinfo * pResultList = NULL;
+
+			hints.ai_family = AF_INET;
+			hints.ai_socktype = SOCK_STREAM;
+			hints.ai_flags = AI_NUMERICSERV;
+
+			ret = getaddrinfo(host, port, &hints, &pResultList);
+
+			switch(ret) {
+			case 0:
+				addr = pResultList;
+				goto DONE;
+
+			case EAI_ADDRFAMILY:
+				fprintf( stderr, "The specified network host does not have any network addresses in the requested address family.\n\n" );
+				break;
+
+			case EAI_AGAIN:
+				fprintf( stderr, "The name server returned a temporary failure indication. Try again later.\n\n" );
+				exit( HOST_ERROR );
+
+			case EAI_BADFLAGS:
+				fprintf( stderr, "hints.ai_flags  contains invalid flags; or, hints.ai_flags included AI_CANONNAME and name was NULL.\n\n" );
+				exit( HOST_ERROR );
+
+			case EAI_FAIL:
+				fprintf( stderr, "The name server returned a permanent failure indication.\n\n" );
+				break;
+
+			case EAI_FAMILY:
+				fprintf( stderr, "The requested address family is not supported.\n\n" );
+				exit( HOST_ERROR );
+
+			case EAI_MEMORY:
+				fprintf( stderr, "Out of memory.\n\n" );
+				exit( HOST_ERROR );
+
+			case EAI_NODATA:
+				fprintf( stderr, "The specified network host exists, but does not have any network addresses defined.\n\n" );
+				break;
+
+			case EAI_NONAME:
+				fprintf( stderr, "The unkonwn host or invalid port.\n\n" );
+				break;
+
+			case EAI_SERVICE:
+				fprintf( stderr, "The requested service is not available for the requested socket type.\n\n" );
+				break;
+
+			case EAI_SOCKTYPE:
+				fprintf( stderr, "The requested  socket  type  is  not  supported.\n\n" );
+				exit( HOST_ERROR );
+
+			case EAI_SYSTEM:
+				// Other system error, check errno for details.
+			default:
+				fprintf( stderr, "Unnown hostname error: (%d) %s\n\n", (int)errno, strerror(errno) );
+				exit( HOST_ERROR );
+			}
+			if(pResultList) freeaddrinfo(pResultList);
+			goto USAGE;
+		}
+	USAGE:
+	default:
+		fprintf( stderr, "USAGE: %s host:port file\n", argv[0] );
+		exit( USAGE_ERROR );
+	}
+
+	DONE:
+
+	io_uring_queue_init(16, &ring, 0);
+
+	size_t file_size = 0;
+	{
+		struct stat buf;
+   		ret = fstat(file_fd, &buf);
+		if(0 != ret) {
+			fprintf( stderr, "fstat error: (%d) %s\n\n", (int)errno, strerror(errno) );
+			exit( FSTAT_ERROR );
+		}
+		file_size = buf.st_size;
+	}
+
+	{
+		char addr_str[INET_ADDRSTRLEN];
+		struct sockaddr_in * address = (struct sockaddr_in *) addr->ai_addr;
+		inet_ntop( AF_INET, &address->sin_addr, addr_str, INET_ADDRSTRLEN );
+		printf("sending '%s' (%zu bytes) to '%s:%i'\n", file_path, file_size, addr_str, ntohs(address->sin_port));
+	}
+
+	ret = pipe(pipefd);
+	if( ret < 0 ) {
+		fprintf( stderr, "pipe error: (%d) %s\n\n", (int)errno, strerror(errno) );
+		exit( PIPE_ERROR );
+	}
+
+	buf = malloc(file_size);
+
+	printf("--- read + write ---\n");
+	run(my_readwrit, addr, file_fd, file_size);
+	printf("--- splice ---\n");
+	run(my_splice  , addr, file_fd, file_size);
+	printf("--- sendfile ---\n");
+	run(my_sendfile, addr, file_fd, file_size);
+	printf("--- io_uring ---\n");
+	run(my_iouring, addr, file_fd, file_size);
+	printf("--- io_uring + link ---\n");
+	run(my_ringlink, addr, file_fd, file_size);
+
+	close(pipefd[0]);
+	close(pipefd[1]);
+	close(file_fd);
+	return 0;
+}
+
+static void run(sender_t sender, struct addrinfo * addr, int infd, size_t size) {
+
+	int sock = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+      if(sock < 0) {
+		fprintf( stderr, "socket error: (%d) %s\n\n", (int)errno, strerror(errno) );
+		exit( SOCKET_ERROR );
+      }
+
+      int ret = connect(sock, addr->ai_addr, addr->ai_addrlen);
+      if(ret < 0) {
+            fprintf( stderr, "connect error: (%d) %s\n\n", (int)errno, strerror(errno) );
+		exit( CONNECT_ERROR );
+      }
+
+	struct stats st;
+	st.calls = 0;
+	st.bytes = 0;
+	st.shorts.r.cnt = 0;
+	st.shorts.r.bytes = 0;
+	st.shorts.w.cnt = 0;
+	st.shorts.w.bytes = 0;
+
+	struct timespec after, before;
+
+	clock_gettime(CLOCK_MONOTONIC, &before);
+
+	for(long long int i = 0; i < TIMES; i++) {
+		sender( sock, infd, size, &st );
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &after);
+
+	close(sock);
+
+	uint64_t tb = ((int64_t)before.tv_sec * TIMEGRAN) + before.tv_nsec;
+	uint64_t ta = ((int64_t)after.tv_sec * TIMEGRAN) + after.tv_nsec;
+	double secs = ((double)ta - tb) / TIMEGRAN;
+
+	printf("Sent %'zu bytes in %'zu files, %f seconds\n", st.bytes, st.calls, secs);
+	printf(" - %'3.3f bytes per second\n", (((double)st.bytes) / secs));
+	printf(" - %'f seconds per file\n", secs / st.calls);
+	printf(" - %'3.3f bytes per calls\n", (((double)st.bytes) / st.calls));
+	if(st.shorts.r.cnt ){
+		printf(" - %'zu short reads\n", st.shorts.r.cnt);
+		printf(" - %'3.3f bytes per short read\n", (((double)st.shorts.r.bytes) / st.shorts.r.cnt));
+	} else printf("No short reads\n");
+	if(st.shorts.w.cnt ){
+		printf(" - %'zu short reads\n", st.shorts.w.cnt);
+		printf(" - %'3.3f bytes per short read\n", (((double)st.shorts.w.bytes) / st.shorts.w.cnt));
+	} else printf("No short writes\n");
+}
+
+static void my_sendfile(int out, int in, size_t size, struct stats * st) {
+	off_t off = 0;
+	for(;;) {
+
+		ssize_t ret = sendfile(out, in, &off, size);
+		if(ret < 0) {
+			fprintf( stderr, "connect error: (%d) %s\n\n", (int)errno, strerror(errno) );
+			exit( SENDFILE_ERROR );
+		}
+
+		st->calls++;
+		st->bytes += ret;
+		off += ret;
+		size -= ret;
+		if( size == 0 ) return;
+		st->shorts.r.cnt++;
+		st->shorts.r.bytes += ret;
+	}
+}
+
+static void my_splice  (int out, int in, size_t size, struct stats * st) {
+	unsigned flags = 0; //SPLICE_F_MOVE; // | SPLICE_F_MORE;
+	off_t offset = 0;
+	size_t writes = 0;
+	for(;;) {
+		ssize_t reti = 0;
+		reti = splice(in, &offset, pipefd[1], NULL, size, flags);
+		if( reti < 0 ) {
+			fprintf( stderr, "splice in error: (%d) %s\n\n", (int)errno, strerror(errno) );
+			exit( SPLICEIN_ERROR );
+		}
+
+		size -= reti;
+		size_t in_pipe = reti;
+		for(;;) {
+			ssize_t reto = 0;
+			reto = splice(pipefd[0], NULL, out, NULL, in_pipe, flags);
+			if( reto < 0 ) {
+				fprintf( stderr, "splice out error: (%d) %s\n\n", (int)errno, strerror(errno) );
+				exit( SPLICEOUT_ERROR );
+			}
+			in_pipe -= reto;
+			writes += reto;
+			if(0 == in_pipe) break;
+			st->shorts.w.cnt++;
+			st->shorts.w.bytes += reto;
+		}
+		if(0 == size) break;
+		st->shorts.r.cnt++;
+		st->shorts.r.bytes += reti;
+	}
+	st->calls++;
+	st->bytes += writes;
+}
+
+static ssize_t naive_splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) {
+	struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+
+	io_uring_prep_splice(sqe, fd_in, NULL != off_in ? *off_in: -1, fd_out, NULL != off_out ? *off_out: -1, len, flags);
+
+	io_uring_submit(&ring);
+
+	struct io_uring_cqe * cqe = NULL;
+	/* wait for the sqe to complete */
+	int ret = io_uring_wait_cqe_nr(&ring, &cqe, 1);
+
+	/* read and process cqe event */
+	switch(ret) {
+	case 0:
+		{
+			ssize_t val = cqe->res;
+			if( cqe->res < 0 ) {
+				printf("Completion Error : %s\n", strerror( -cqe->res ));
+				return EXIT_FAILURE;
+			}
+			io_uring_cqe_seen(&ring, cqe);
+			return val;
+		}
+	default:
+		fprintf( stderr, "io_uring_wait error: (%d) %s\n\n", (int)-ret, strerror(-ret) );
+		exit( URINGWAIT_ERROR );
+	}
+}
+
+static void my_iouring (int out, int in, size_t size, struct stats * st) {
+	unsigned flags = 0; //SPLICE_F_MOVE; // | SPLICE_F_MORE;
+	off_t offset = 0;
+	size_t writes = 0;
+	for(;;) {
+		ssize_t reti = 0;
+		reti = naive_splice(in, &offset, pipefd[1], NULL, size, flags);
+		if( reti < 0 ) {
+			fprintf( stderr, "splice in error: (%d) %s\n\n", (int)errno, strerror(errno) );
+			exit( SPLICEIN_ERROR );
+		}
+
+		size -= reti;
+		size_t in_pipe = reti;
+		for(;;) {
+			ssize_t reto = 0;
+			reto = naive_splice(pipefd[0], NULL, out, NULL, in_pipe, flags);
+			if( reto < 0 ) {
+				fprintf( stderr, "splice out error: (%d) %s\n\n", (int)errno, strerror(errno) );
+				exit( SPLICEOUT_ERROR );
+			}
+			in_pipe -= reto;
+			writes += reto;
+			if(0 == in_pipe) break;
+			st->shorts.w.cnt++;
+			st->shorts.w.bytes += reto;
+		}
+		if(0 == size) break;
+		st->shorts.r.cnt++;
+		st->shorts.r.bytes += reti;
+	}
+	st->calls++;
+	st->bytes += writes;
+}
+
+static void my_ringlink(int out, int in, size_t size, struct stats * st) {
+	enum { SPLICE_IN, SPLICE_OUT };
+
+	size_t in_pipe = size;
+	off_t offset = 0;
+	bool has_in = false;
+	bool has_out = false;
+	while(true) {
+		if(!has_in && size > 0) {
+			struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_splice(sqe, in, offset, pipefd[1], -1, size, 0);
+			sqe->user_data = SPLICE_IN;
+			sqe->flags = IOSQE_IO_LINK;
+			has_in = true;
+		}
+		if(!has_out) {
+			struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
+			io_uring_prep_splice(sqe, pipefd[0], -1, out, -1, in_pipe, 0);
+			sqe->user_data = SPLICE_OUT;
+			has_out = true;
+		}
+
+		int ret = io_uring_submit_and_wait(&ring, 1);
+		if(ret < 0) {
+			fprintf( stderr, "io_uring_submit error: (%d) %s\n\n", (int)-ret, strerror(-ret) );
+			exit( URINGWAIT_ERROR );
+		}
+
+		/* poll the cq and count how much polling we did */
+		while(true) {
+			struct io_uring_cqe * cqe = NULL;
+			/* wait for the sqe to complete */
+			int ret = io_uring_wait_cqe_nr(&ring, &cqe, 0);
+
+			/* read and process cqe event */
+			switch(ret) {
+			case 0:
+				if( cqe->res < 0 ) {
+					printf("Completion Error : %s\n", strerror( -cqe->res ));
+					exit( URINGWAIT_ERROR );
+				}
+
+				ssize_t write = cqe->res;
+				int which = cqe->user_data;
+				io_uring_cqe_seen(&ring, cqe);
+				switch( which ) {
+				case SPLICE_IN:
+					has_in = false;
+					size -= write;
+					offset += write;
+					if(0 == size) break;
+					st->shorts.r.cnt++;
+					st->shorts.r.bytes += write;
+					break;
+				case SPLICE_OUT:
+					has_out = false;
+					in_pipe -= write;
+					st->bytes += write;
+					if(0 == in_pipe) break;
+					st->shorts.w.cnt++;
+					st->shorts.w.bytes += write;
+					break;
+				default:
+					printf("Completion Error : unknown user data\n");
+					exit( URINGWAIT_ERROR );
+				}
+				continue;
+			case -EAGAIN:
+				goto OUTER;
+			default:
+				fprintf( stderr, "io_uring_get_cqe error: (%d) %s\n\n", (int)-ret, strerror(-ret) );
+				exit( URINGWAIT_ERROR );
+			}
+		}
+		OUTER:
+		if(0 == in_pipe) break;
+	}
+	st->calls++;
+}
+
+static void my_readwrit(int out, int in, size_t size, struct stats * st) {
+	off_t offset = 0;
+	size_t writes = 0;
+	for(;;) {
+		ssize_t reti = pread(in, buf, size, offset);
+		if( reti < 0 ) {
+			printf("Read in Error : (%d) %s\n\n", (int)errno, strerror(errno) );
+			exit( 1 );
+		}
+
+		offset += reti;
+		size -= reti;
+
+		size_t in_buf = reti;
+		for(;;) {
+			ssize_t reto = write(out, buf, in_buf);
+			if( reto < 0 ) {
+					printf("Write out Error : (%d) %s\n\n", (int)errno, strerror(errno) );
+					exit( 1 );
+				}
+
+			in_buf -= reto;
+			writes += reto;
+			if(0 == in_buf) break;
+			st->shorts.w.cnt++;
+			st->shorts.w.bytes += reto;
+		}
+		if(0 == size) break;
+		st->shorts.r.cnt++;
+		st->shorts.r.bytes += reti;
+	}
+	st->calls++;
+	st->bytes += writes;
+}
Index: doc/user/user.tex
===================================================================
--- doc/user/user.tex	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ doc/user/user.tex	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -11,11 +11,11 @@
 %% Created On       : Wed Apr  6 14:53:29 2016
 %% Last Modified By : Peter A. Buhr
-%% Last Modified On : Sun Oct 10 12:45:00 2021
-%% Update Count     : 5095
+%% Last Modified On : Sat Feb 12 17:04:03 2022
+%% Update Count     : 5376
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 % requires tex packages: texlive-base texlive-latex-base tex-common texlive-humanities texlive-latex-extra texlive-fonts-recommended
 
-\documentclass[twoside,11pt]{article}
+\documentclass[twoside]{article}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -40,5 +40,5 @@
 % blue highlighting ß...ß (sharp s symbol) emacs: C-q M-_
 % green highlighting ¢...¢ (cent symbol) emacs: C-q M-"
-% LaTex escape §...§ (section symbol) emacs: C-q M-'
+% LaTex escape ...§ (section symbol) emacs: C-q M-'
 % keyword escape ¶...¶ (pilcrow symbol) emacs: C-q M-^
 % math escape $...$ (dollar symbol)
@@ -85,4 +85,5 @@
 \newcommand{\B}[1]{{\Textbf[blue]{#1}}}
 \newcommand{\G}[1]{{\Textbf[OliveGreen]{#1}}}
+\newcommand{\Sp}{\R{\textvisiblespace}}
 \newcommand{\KWC}{K-W C\xspace}
 
@@ -156,6 +157,6 @@
 One of the main design philosophies of \CFA is to ``\Index{describe not prescribe}'', which means \CFA tries to provide a pathway from low-level C programming to high-level \CFA programming, but it does not force programmers to ``do the right thing''.
 Programmers can cautiously add \CFA extensions to their C programs in any order and at any time to incrementally move towards safer, higher-level programming.
-A programmer is always free to reach back to C from \CFA, for any reason, and in many cases, new \CFA features can be locally switched back to there C counterpart.
-There is no notion or requirement for \emph{rewriting} a legacy C program in \CFA;
+A programmer is always free to reach back to C from \CFA, for any reason, and in many cases, new \CFA features can be locally switched back to their C counterpart.
+There is no notion or requirement for \emph{rewriting} a legacy C program to \CFA;
 instead, a programmer evolves a legacy program into \CFA by incrementally incorporating \CFA features.
 As well, new programs can be written in \CFA using a combination of C and \CFA features.
@@ -163,13 +164,13 @@
 
 \Index*[C++]{\CC{}}~\cite{c++:v1} had a similar goal 30 years ago, allowing object-oriented programming to be incrementally added to C.
-However, \CC currently has the disadvantages of a strong object-oriented bias, multiple legacy design-choices that cannot be updated, and active divergence of the language model from C, requiring significant effort and training to incrementally add \CC to a C-based project.
+However, \CC currently has the disadvantages of a strong object-oriented bias, multiple legacy design-choices that are difficult to update, and active divergence of the language model from C, requiring significant effort and training to incrementally add \CC to a C-based project.
 In contrast, \CFA has 30 years of hindsight and a clean starting point.
 
 Like \Index*[C++]{\CC{}}, there may be both old and new ways to achieve the same effect.
 For example, the following programs compare the C, \CFA, and \CC I/O mechanisms, where the programs output the same result.
-\begin{center}
+\begin{flushleft}
 \begin{tabular}{@{}l@{\hspace{1em}}l@{\hspace{1em}}l@{}}
-\multicolumn{1}{c@{\hspace{1em}}}{\textbf{C}}	& \multicolumn{1}{c}{\textbf{\CFA}}	& \multicolumn{1}{c}{\textbf{\CC}}	\\
-\begin{cfa}
+\multicolumn{1}{@{}c@{\hspace{1em}}}{\textbf{C}}	& \multicolumn{1}{c}{\textbf{\CFA}}	& \multicolumn{1}{c@{}}{\textbf{\CC}}	\\
+\begin{cfa}[tabsize=3]
 #include <stdio.h>$\indexc{stdio.h}$
 
@@ -180,5 +181,5 @@
 \end{cfa}
 &
-\begin{cfa}
+\begin{cfa}[tabsize=3]
 #include <fstream>$\indexc{fstream}$
 
@@ -189,14 +190,14 @@
 \end{cfa}
 &
-\begin{cfa}
+\begin{cfa}[tabsize=3]
 #include <iostream>$\indexc{iostream}$
 using namespace std;
 int main() {
 	int x = 0, y = 1, z = 2;
-	®cout<<x<<" "<<y<<" "<<z<<endl;®
+	®cout << x << ' ' << y << ' ' << z << endl;®
 }
 \end{cfa}
 \end{tabular}
-\end{center}
+\end{flushleft}
 While \CFA I/O \see{\VRef{s:StreamIOLibrary}} looks similar to \Index*[C++]{\CC{}}, there are important differences, such as automatic spacing between variables and an implicit newline at the end of the expression list, similar to \Index*{Python}~\cite{Python}.
 
@@ -238,5 +239,5 @@
 however, it largely extended the C language, and did not address many of C's existing problems.\footnote{%
 Two important existing problems addressed were changing the type of character literals from ©int© to ©char© and enumerator from ©int© to the type of its enumerators.}
-\Index*{Fortran}~\cite{Fortran08}, \Index*{Ada}~\cite{Ada12}, and \Index*{Cobol}~\cite{Cobol14} are examples of programming languages that took an evolutionary approach, where modern language-features (\eg objects, concurrency) are added and problems fixed within the framework of the existing language.
+\Index*{Fortran}~\cite{Fortran08}, \Index*{Cobol}~\cite{Cobol14}, and \Index*{Ada}~\cite{Ada12} are examples of programming languages that took an evolutionary approach, where modern language-features (\eg objects, concurrency) are added and problems fixed within the framework of the existing language.
 \Index*{Java}~\cite{Java8}, \Index*{Go}~\cite{Go}, \Index*{Rust}~\cite{Rust} and \Index*{D}~\cite{D} are examples of the revolutionary approach for modernizing C/\CC, resulting in a new language rather than an extension of the descendent.
 These languages have different syntax and semantics from C, do not interoperate directly with C, and are not systems languages because of restrictive memory-management or garbage collection.
@@ -333,5 +334,5 @@
 long double _Complex ®abs®( long double _Complex );
 \end{cfa}
-The problem is \Index{name clash} between the C name ©abs© and the \CFA names ©abs©, resulting in two name linkages\index{C linkage}: ©extern "C"© and ©extern "Cforall"© (default).
+The problem is a \Index{name clash} between the C name ©abs© and the \CFA names ©abs©, resulting in two name linkages\index{C linkage}: ©extern "C"© and ©extern "Cforall"© (default).
 Overloaded names must use \newterm{name mangling}\index{mangling!name} to create unique names that are different from unmangled C names.
 Hence, there is the same need as in \CC to know if a name is a C or \CFA name, so it can be correctly formed.
@@ -377,5 +378,5 @@
 The program is linked with the debugging version of the runtime system.
 The debug version performs runtime checks to aid the debugging phase of a \CFA program, but can substantially slow program execution.
-The runtime checks should only be removed after the program is completely debugged.
+The runtime checks should only be removed after a program is completely debugged.
 \textbf{This option is the default.}
 
@@ -452,4 +453,8 @@
 cfa $test$.cfa -XCFA -P -XCFA parse -XCFA -n # show program parse without prelude
 \end{lstlisting}
+Alternatively, multiple flages can be specified separated with commas and \emph{without} spaces.
+\begin{lstlisting}[language=sh,{moredelim=**[is][\protect\color{red}]{®}{®}}]
+cfa $test$.cfa -XCFA®,®-Pparse®,®-n # show program parse without prelude
+\end{lstlisting}
 \begin{description}[topsep=5pt,itemsep=0pt,parsep=0pt]
 \item
@@ -533,6 +538,5 @@
 double ®``®forall = 3.5;
 \end{cfa}
-
-Existing C programs with keyword clashes can be converted by enclosing keyword identifiers in backquotes, and eventually the identifier name can be changed to a non-keyword name.
+Existing C programs with keyword clashes can be converted by prefixing the keyword identifiers with double backquotes, and eventually the identifier name can be changed to a non-keyword name.
 \VRef[Figure]{f:HeaderFileInterposition} shows how clashes in existing C header-files \see{\VRef{s:StandardHeaders}} can be handled using preprocessor \newterm{interposition}: ©#include_next© and ©-I filename©.
 Several common C header-files with keyword clashes are fixed in the standard \CFA header-library, so there is a seamless programming-experience.
@@ -627,6 +631,6 @@
 \subsection{\texorpdfstring{\LstKeywordStyle{if} / \LstKeywordStyle{while} Statement}{if / while Statement}}
 
-The ©if©/©while© expression allows declarations, similar to ©for© declaration expression.\footnote{
-Declarations in the ©do©-©while© condition are not useful because they appear after the loop body.}
+The \Indexc{if}/\Indexc{while} expression allows declarations, similar to \Indexc{for} declaration expression.\footnote{
+Declarations in the \Indexc{do}-©while© condition are not useful because they appear after the loop body.}
 \begin{cfa}
 if ( ®int x = f()® ) ... $\C{// x != 0}$
@@ -640,6 +644,6 @@
 while ( ®struct S { int i; } x = { f() }; x.i < 4® ) ... $\C{// relational expression}$
 \end{cfa}
-Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical ©&&© operator.
-The scope of the declaration(s) is local to the ©if© statement but exist within both the \emph{then} and \emph{else} clauses.
+Unless a relational expression is specified, each variable is compared not equal to 0, which is the standard semantics for the ©if©/©while© expression, and the results are combined using the logical \Indexc{&&} operator.
+The scope of the declaration(s) is local to the ©if©/©while© statement, \ie in both \emph{then} and \emph{else} clauses for ©if©, and loop body for ©while©.
 \CC only provides a single declaration always compared ©!=© to 0.
 
@@ -649,8 +653,8 @@
 \label{s:caseClause}
 
-C restricts the ©case© clause of a ©switch© statement to a single value.
+C restricts the \Indexc{case} clause of a \Indexc{switch} statement to a single value.
 For multiple ©case© clauses associated with the same statement, it is necessary to have multiple ©case© clauses rather than multiple values.
-Requiring a ©case© clause for each value does not seem to be in the spirit of brevity normally associated with C.
-Therefore, the ©case© clause is extended with a list of values, as in:
+Requiring a ©case© clause for each value is not in the spirit of brevity normally associated with C.
+Therefore, the ©case© clause is extended with a list of values.
 \begin{cquote}
 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
@@ -703,19 +707,19 @@
 \subsection{\texorpdfstring{\LstKeywordStyle{switch} Statement}{switch Statement}}
 
-C allows a number of questionable forms for the ©switch© statement:
+C allows a number of questionable forms for the \Indexc{switch} statement:
 \begin{enumerate}
 \item
-By default, the end of a ©case© clause\footnote{
+By default, the end of a \Indexc{case} clause\footnote{
 In this section, the term \emph{case clause} refers to either a ©case© or ©default© clause.}
 \emph{falls through} to the next ©case© clause in the ©switch© statement;
-to exit a ©switch© statement from a ©case© clause requires explicitly terminating the clause with a transfer statement, most commonly ©break©:
+to exit a ©switch© statement from a ©case© clause requires explicitly terminating the clause with a transfer statement, most commonly \Indexc{break}:
 \begin{cfa}
 switch ( i ) {
   case 1:
 	...
-	// fall-through
+	$\R{\LstCommentStyle{// fall-through}}$
   case 2:
 	...
-	break;	// exit switch statement
+	®break;®	// exit switch statement
 }
 \end{cfa}
@@ -763,5 +767,5 @@
 }
 \end{cfa}
-This situation better handled without fall-through by allowing a list of case values \see{\VRef{s:caseClause}}.
+This situation is better handled by a list of case values \see{\VRef{s:caseClause}}.
 While fall-through itself is not a problem, the problem occurs when fall-through is the default, as this semantics is unintuitive to many programmers and is different from most programming languages with a ©switch© statement.
 Hence, default fall-through semantics results in a large number of programming errors as programmers often \emph{forget} the ©break© statement at the end of a ©case© clause, resulting in inadvertent fall-through.
@@ -777,11 +781,4 @@
 		...
 	} // if
-  case 2:
-	while ( j < 5 ) {
-		...
-	  ®case 3:®		// transfer into "while" statement
-		...
-	} // while
-} // switch
 \end{cfa}
 This usage branches into control structures, which is known to cause both comprehension and technical difficulties.
@@ -789,5 +786,6 @@
 The technical problem results from the inability to ensure declaration and initialization of variables when blocks are not entered at the beginning.
 There are few arguments for this kind of control flow, and therefore, there is a strong impetus to eliminate it.
-Nevertheless, C does have an idiom where this capability is used, known as ``\Index*{Duff's device}''~\cite{Duff83}:
+
+This C idiom is known as ``\Index*{Duff's device}''~\cite{Duff83}, from this example:
 \begin{cfa}
 register int n = (count + 7) / 8;
@@ -858,5 +856,5 @@
 still works.
 Nevertheless, reversing the default action would have a non-trivial effect on case actions that compound, such as the above example of processing shell arguments.
-Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called ©choose©, with no implicit fall-through semantics and an explicit fall-through if the last statement of a case-clause ends with the new keyword ©fallthrough©/©fallthru©, \eg:
+Therefore, to preserve backwards compatibility, it is necessary to introduce a new kind of ©switch© statement, called \Indexc{choose}, with no implicit fall-through semantics and an explicit fall-through if the last statement of a case-clause ends with the new keyword \Indexc{fallthrough}/\Indexc{fallthru}, \eg:
 \begin{cfa}
 ®choose® ( i ) {
@@ -885,5 +883,5 @@
 Therefore, no change is made for this issue.
 \item
-Dealing with unreachable code in a ©switch©/©choose© body is solved by restricting declarations and associated initialization to the start of statement body, which is executed \emph{before} the transfer to the appropriate ©case© clause\footnote{
+Dealing with unreachable code in a ©switch©/©choose© body is solved by restricting declarations and initialization to the start of statement body, which is executed \emph{before} the transfer to the appropriate ©case© clause\footnote{
 Essentially, these declarations are hoisted before the ©switch©/©choose© statement and both declarations and statement are surrounded by a compound statement.} and precluding statements before the first ©case© clause.
 Further declarations at the same nesting level as the statement body are disallowed to ensure every transfer into the body is sound.
@@ -908,5 +906,5 @@
 \subsection{Non-terminating and Labelled \texorpdfstring{\LstKeywordStyle{fallthrough}}{Non-terminating and Labelled fallthrough}}
 
-The ©fallthrough© clause may be non-terminating within a ©case© clause or have a target label to common code from multiple case clauses.
+The \Indexc{fallthrough} clause may be non-terminating within a \Indexc{case} clause or have a target label to common code from multiple case clauses.
 \begin{center}
 \begin{tabular}{@{}lll@{}}
@@ -960,8 +958,8 @@
 \end{tabular}
 \end{center}
-The target label must be below the ©fallthrough© and may not be nested in a control structure, and
-the target label must be at the same or higher level as the containing ©case© clause and located at
-the same level as a ©case© clause; the target label may be case ©default©, but only associated
-with the current ©switch©/©choose© statement.
+The target label must be below the \Indexc{fallthrough} and may not be nested in a control structure, and
+the target label must be at the same or higher level as the containing \Indexc{case} clause and located at
+the same level as a ©case© clause; the target label may be case \Indexc{default}, but only associated
+with the current \Indexc{switch}/\Indexc{choose} statement.
 
 \begin{figure}
@@ -1076,5 +1074,5 @@
 Looping a fixed number of times, possibly with a loop index, occurs frequently.
 \CFA condenses simply looping to facilitate coding speed and safety.
-The ©for©/©while©/©do-while© loop-control is augmented as follows \see{examples in \VRef[Figure]{f:LoopControlExamples}}:
+The \Indexc{for}, \Indexc{while}, and \Indexc{do} loop-control is augmented as follows \see{examples in \VRef[Figure]{f:LoopControlExamples}}:
 \begin{itemize}[itemsep=0pt]
 \item
@@ -1145,9 +1143,9 @@
 \subsection{\texorpdfstring{Labelled \LstKeywordStyle{continue} / \LstKeywordStyle{break} Statement}{Labelled continue / break Statement}}
 
-C ©continue© and ©break© statements, for altering control flow, are restricted to one level of nesting for a particular control structure.
+C \Indexc{continue} and \Indexc{break} statements, for altering control flow, are restricted to one level of nesting for a particular control structure.
 This restriction forces programmers to use \Indexc{goto} to achieve the equivalent control-flow for more than one level of nesting.
 To prevent having to switch to the ©goto©, \CFA extends the \Indexc{continue}\index{continue@©continue©!labelled}\index{labelled!continue@©continue©} and \Indexc{break}\index{break@©break©!labelled}\index{labelled!break@©break©} with a target label to support static multi-level exit\index{multi-level exit}\index{static multi-level exit}~\cite{Buhr85}, as in Java.
-For both ©continue© and ©break©, the target label must be directly associated with a ©for©, ©while© or ©do© statement;
-for ©break©, the target label can also be associated with a ©switch©, ©if© or compound (©{}©) statement.
+For both ©continue© and ©break©, the target label must be directly associated with a \Indexc{for}, \Indexc{while} or \Indexc{do} statement;
+for ©break©, the target label can also be associated with a \Indexc{switch}, \Indexc{if} or compound (©{}©) statement.
 \VRef[Figure]{f:MultiLevelExit} shows a comparison between labelled ©continue© and ©break© and the corresponding C equivalent using ©goto© and labels.
 The innermost loop has 8 exit points, which cause continuation or termination of one or more of the 7 \Index{nested control-structure}s.
@@ -1224,5 +1222,5 @@
 \end{figure}
 
-Both labelled ©continue© and ©break© are a ©goto©\index{goto@©goto©!restricted} restricted in the following ways:
+Both labelled \Indexc{continue} and \Indexc{break} are a \Indexc{goto}\index{goto@©goto©!restricted} restricted in the following ways:
 \begin{itemize}
 \item
@@ -1240,4 +1238,52 @@
 
 
+\subsection{\texorpdfstring{Extended \LstKeywordStyle{else}}{Extended else}}
+\label{s:ExtendedElse}
+\index{extended ©else©}
+
+The ©if© statement has an optional ©else© clause executed if the conditional is false.
+This concept is extended to the \Indexc{while}, \Indexc{for}, and \Indexc{do} looping constructs (like Python).
+Hence, if the loop conditional becomes false, looping stops and the corresponding ©else© clause is executed, if present.
+
+The following example is a linear search for the key 3 in an array, where finding the key is handled with a ©break© and not finding with the ©else© clause on the loop construct.
+\begin{cquote}
+\begin{cfa}
+int a[10];
+\end{cfa}
+\begin{tabular}{@{}lll@{}}
+\begin{cfa}
+
+while ( int i = 0; i < 10 ) {
+  if ( a[i] == 3 ) break; // found
+	i += 1;
+} ®else® { // i == 10
+	sout | "not found";
+}
+\end{cfa}
+&
+\begin{cfa}
+
+for ( i; 10 ) {
+  if ( a[i] == 3 ) break; // found
+
+} ®else® { // i == 10
+	sout | "not found";
+}
+\end{cfa}
+&
+\begin{cfa}
+int i = 0;
+do {
+  if ( a[i] == 3 ) break; // found
+	i += 1;
+} while( i < 10 ) ®else® { // i == 10
+	sout | "not found";
+}
+\end{cfa}
+\end{tabular}
+\end{cquote}
+Note, \Index{dangling else} now occurs with \Indexc{if}, \Indexc{while}, \Indexc{for}, \Indexc{do}, and \Indexc{waitfor}.
+
+
 %\subsection{\texorpdfstring{\protect\lstinline{with} Statement}{with Statement}}
 \subsection{\texorpdfstring{\LstKeywordStyle{with} Statement}{with Statement}}
@@ -1266,17 +1312,18 @@
 Therefore, reducing aggregate qualification is a useful language design goal.
 
-C allows unnamed nested aggregates that open their scope into the containing aggregate.
+C partially addresses the problem by eliminating qualification for enumerated types and unnamed \emph{nested} aggregates, which open their scope into the containing aggregate.
 This feature is used to group fields for attributes and/or with ©union© aggregates.
 \begin{cfa}
 struct S {
-	struct { int g,  h; } __attribute__(( aligned(64) ));
+	struct $\R{\LstCommentStyle{/* unnamed */}}$ { int g,  h; } __attribute__(( aligned(64) ));
 	int tag;
-	union {
+	union $\R{\LstCommentStyle{/* unnamed */}}$ {
 		struct { char c1,  c2; } __attribute__(( aligned(128) ));
 		struct { int i1,  i2; };
 		struct { double d1,  d2; };
 	};
-};
-s.g; s.h; s.tag; s.c1; s.c2; s.i1; s.i2; s.d1; s.d2;
+} s;
+enum { R, G, B };
+s.g; s.h;   s.tag = R;   s.c1; s.c2;   s.i1 = G; s.i2 = B;   s.d1; s.d2;
 \end{cfa}
 
@@ -1323,6 +1370,6 @@
 \end{cfa}
 where qualification is only necessary to disambiguate the shadowed variable ©i©.
-
-In detail, the ©with© statement may appear as the body of a function or nested within a function body.
+In detail, the ©with© statement may form a function body or be nested within a function body.
+
 The ©with© clause takes a list of expressions, where each expression provides an aggregate type and object.
 (Enumerations are already opened.)
@@ -1333,4 +1380,5 @@
 \end{cfa}
 The expression object is the implicit qualifier for the open structure-fields.
+
 \CFA's ability to overload variables \see{\VRef{s:VariableOverload}} and use the left-side of assignment in type resolution means most fields with the same name but different types are automatically disambiguated, eliminating qualification.
 All expressions in the expression list are open in parallel within the compound statement.
@@ -1362,5 +1410,5 @@
 \end{cfa}
 A cast or qualification can be used to disambiguate variables within a ©with© \emph{statement}.
-A cast can be used to disambiguate among overload variables in a ©with© \emph{expression}:
+A cast can also be used to disambiguate among overload variables in a ©with© \emph{expression}:
 \begin{cfa}
 with ( w ) { ... }							$\C{// ambiguous, same name and no context}$
@@ -1371,24 +1419,22 @@
 Finally, there is an interesting problem between parameters and the function-body ©with©, \eg:
 \begin{cfa}
-void ?{}( S & s, int i ) with ( s ) { $\C{// constructor}$
-	®s.i = i;®  j = 3;  m = 5.5; $\C{// initialize fields}$
-}
-\end{cfa}
-Here, the assignment ©s.i = i© means ©s.i = s.i©, which is meaningless, and there is no mechanism to qualify the parameter ©i©, making the assignment impossible using the function-body ©with©.
-To solve this problem, parameters are treated like an initialized aggregate:
-\begin{cfa}
-struct Params {
-	S & s;
-	int i;
+void f( S & s, char c ) with ( s ) {
+	®s.c = c;®  i = 3;  d = 5.5;			$\C{// initialize fields}$
+}
+\end{cfa}
+Here, the assignment ©s.c = c© means ©s.c = s.c©, which is meaningless, and there is no mechanism to qualify the parameter ©c©, making the assignment impossible using the function-body ©with©.
+To solve this problem, parameters \emph{not} explicitly opened are treated like an initialized aggregate:
+\begin{cfa}
+struct Params {								$\C{// s explicitly opened so S \& s elided}$
+	char c;
 } params;
 \end{cfa}
 and implicitly opened \emph{after} a function-body open, to give them higher priority:
 \begin{cfa}
-void ?{}( S & s, int ®i® ) with ( s ) ®with( $\emph{\R{params}}$ )® { // syntax not allowed, illustration only
-	s.i = ®i®; j = 3; m = 5.5;
+void f( S & s, char ®c® ) with ( s ) ®with( $\emph{\R{params}}$ )® { // syntax not allowed, illustration only
+	s.c = ®c;®  i = 3;  d = 5.5;
 }
 \end{cfa}
 This implicit semantic matches with programmer expectation.
-
 
 
@@ -3397,6 +3443,6 @@
 This requirement is the same as for comma expressions in argument lists.
 
-Type qualifiers, \ie const and volatile, may modify a tuple type.
-The meaning is the same as for a type qualifier modifying an aggregate type [Int99, x 6.5.2.3(7),x 6.7.3(11)], \ie the qualifier is distributed across all of the types in the tuple, \eg:
+Type qualifiers, \ie ©const© and ©volatile©, may modify a tuple type.
+The meaning is to distribute the qualifier across all of the types in the tuple, \eg:
 \begin{cfa}
 const volatile [ int, float, const int ] x;
@@ -3597,6 +3643,6 @@
 Stream ©exit© implicitly returns ©EXIT_FAILURE© to the shell.
 \begin{cfa}
-®exit®   | "x (" | x | ") negative value."; // terminate and return EXIT_FAILURE to shell
-®abort® | "x (" | x | ") negative value."; // terminate and generate stack trace and core file
+®exit®   | "x (" | x | ") negative value.";   // terminate and return EXIT_FAILURE to shell
+®abort® | "x (" | x | ") negative value.";   // terminate and generate stack trace and core file
 \end{cfa}
 Note, \CFA stream variables ©stdin©, ©stdout©, ©stderr©, ©exit©, and ©abort© overload C variables ©stdin©, ©stdout©, ©stderr©, and functions ©exit© and ©abort©, respectively.
@@ -4267,8 +4313,8 @@
 	sout | '1' | '2' | '3';
 	sout | 1 | "" | 2 | "" | 3;
-	sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x =" | 4 | "x $" | 5 | "x £" | 6 | "x ¥"
-		| 7 | "x ¡" | 8 | "x ¿" | 9 | "x «" | 10;
+	sout | "x (" | 1 | "x [" | 2 | "x {" | 3 | "x =" | 4 | "x $" | 5 | "x Â£" | 6 | "x Â¥"
+		| 7 | "x Â¡" | 8 | "x Â¿" | 9 | "x Â«" | 10;
 	sout | 1 | ", x" | 2 | ". x" | 3 | "; x" | 4 | "! x" | 5 | "? x" | 6 | "% x"
-		| 7 | "¢ x" | 8 | "» x" | 9 | ") x" | 10 | "] x" | 11 | "} x";
+		| 7 | "Â¢ x" | 8 | "Â» x" | 9 | ") x" | 10 | "] x" | 11 | "} x";
 	sout | "x`" | 1 | "`x'" | 2 | "'x\"" | 3 | "\"x:" | 4 | ":x " | 5 | " x\t" | 6 | "\tx";
 	sout | "x ( " | 1 | " ) x" | 2 | " , x" | 3 | " :x: " | 4;
@@ -4446,6 +4492,6 @@
 The common usage is the short form of the mutex statement\index{ostream@©ostream©!mutex@©mutex©} to lock a stream during a single cascaded I/O expression, \eg:
 \begin{cfa}
-$\emph{thread\(_1\)}$ : ®mutex()® sout | "abc " | "def ";
-$\emph{thread\(_2\)}$ : ®mutex()® sout | "uvw " | "xyz ";
+$\emph{thread\(_1\)}$ : ®mutex( sout )® sout | "abc " | "def ";
+$\emph{thread\(_2\)}$ : ®mutex( sout )® sout | "uvw " | "xyz ";
 \end{cfa}
 Now, the order of the thread execution is still non-deterministic, but the output is constrained to two possible lines in either order.
@@ -4470,5 +4516,5 @@
 ®mutex( sout )® {
 	sout | 1;
-	®mutex() sout® | 2 | 3;				$\C{// unnecessary, but ok because of recursive lock}$
+	®mutex( sout ) sout® | 2 | 3;				$\C{// unnecessary, but ok because of recursive lock}$
 	sout | 4;
 } // implicitly release sout lock
@@ -4482,5 +4528,5 @@
 	int x, y, z, w;
 	sin | x;
-	®mutex() sin® | y | z;				$\C{// unnecessary, but ok because of recursive lock}$
+	®mutex( sin )® sin | y | z;					$\C{// unnecessary, but ok because of recursive lock}$
 	sin | w;
 } // implicitly release sin lock
@@ -4491,5 +4537,5 @@
 \Textbf{WARNING:} The general problem of \Index{nested locking} can occur if routines are called in an I/O sequence that block, \eg:
 \begin{cfa}
-®mutex() sout® | "data:" | rtn( mon );	$\C{// mutex call on monitor}$
+®mutex( sout )® sout | "data:" | rtn( mon );	$\C{// mutex call on monitor}$
 \end{cfa}
 If the thread executing the I/O expression blocks in the monitor with the ©sout© lock, other threads writing to ©sout© also block until the thread holding the lock is unblocked and releases it.
@@ -4498,6 +4544,71 @@
 \begin{cfa}
 int ®data® = rtn( mon );
-mutex() sout | "data:" | ®data®;
-\end{cfa}
+mutex( sout ) sout | "data:" | ®data®;
+\end{cfa}
+
+
+\subsection{Locale}
+\index{stream!locale}
+\index{locale!stream}
+
+Cultures use different syntax, called a \newterm{locale}, for printing numbers so they are easier to read, \eg:
+\begin{cfa}
+12®,®345®.®123		$\C[1.25in]{// comma separator, period decimal-point}$
+12®.®345®,®123		$\C{// period separator, comma decimal-point}$
+12$\Sp$345®,®123®.®	$\C{// space separator, comma decimal-point, period terminator}\CRT$
+\end{cfa}
+A locale is selected with function ©setlocale©, and the corresponding locale package \emph{must} be installed on the underlying system;
+©setlocale© returns ©0p© if the requested locale is unavailable.
+Furthermore, a locale covers the syntax for many cultural items, \eg address, measurement, money, etc.
+This discussion applies to item ©LC_NUMERIC© for formatting non-monetary integral and floating-point values.
+\VRef[Figure]{f:StreamLocale} shows selecting different cultural syntax, which may be associated with one or more countries.
+
+\begin{figure}
+\begin{cfa}
+#include <fstream.hfa>
+#include <locale.h>							$\C{// setlocale}$
+#include <stdlib.h>							$\C{// getenv}$
+
+int main() {
+	void print() {
+		sout | 12 | 123 | 1234 | 12345 | 123456 | 1234567;
+		sout | 12. | 123.1 | 1234.12 | 12345.123 | 123456.1234 | 1234567.12345;
+		sout | nl;
+	}
+	sout | "Default locale off";
+	print();
+	sout | "Locale on" | ®setlocale( LC_NUMERIC, getenv( "LANG" ) )®;  // enable local locale
+	print();
+	sout | "German" | ®setlocale( LC_NUMERIC, "de_DE.UTF-8" )®;  // enable German locale
+	print();
+	sout | "Ukraine" | ®setlocale( LC_NUMERIC, "uk_UA.utf8" )®;  // enable Ukraine locale
+	print();
+	sout | "Default locale off" | ®setlocale( LC_NUMERIC, "C" )®;  // disable locale
+	print();
+}
+
+Default locale off
+12 123 1234 12345 123456 1234567
+12. 123.1 1234.12 12345.123 123456.1234 1234567.12345
+
+Locale on en_US.UTF-8
+12 123 1®,®234 12®,®345 123®,®456 1®,®234®,®567
+12®.® 123®.®1 1®,®234®.®12 12®,®345®.®123 123®,®456®.®1234 1®,®234®,®567®.®12345
+
+German de_DE.UTF-8
+12 123 1®.®234 12®.®345 123®.®456 1®.®234®.®567
+12®.® 123®,®1®.® 1®.®234®,®12 12®.®345®,®123 123®.®456®,®1234 1®.®234®.®567®,®12345
+
+Ukraine uk_UA.utf8
+12 123 1 234 12 345 123 456 1 234 567
+12®.® 123®,®1®.® 1$\Sp$234®,®12®.® 12$\Sp$ 345®,®123®.® 123$\Sp$ 456®,®1234®.® 1$\Sp$ 234$\Sp$567®,®12345®.®
+
+Default locale off C
+12 123 1234 12345 123456 1234567
+12. 123.1 1234.12 12345.123 123456.1234 1234567.12345
+\end{cfa}
+\caption{Stream Locale}
+\label{f:StreamLocale}
+\end{figure}
 
 
@@ -4555,4 +4666,5 @@
 \end{figure}
 
+
 \begin{comment}
 \section{Types}
@@ -4637,5 +4749,5 @@
 
 
-\subsection{Structures}
+\section{Structures}
 
 Structures in \CFA are basically the same as structures in C.
@@ -5270,5 +5382,5 @@
 \subsection{Coroutine}
 
-\Index{Coroutines} are the precursor to tasks.
+\Index{Coroutines} are the precursor to threads.
 \VRef[Figure]{f:FibonacciCoroutine} shows a coroutine that computes the \Index*{Fibonacci} numbers.
 
@@ -5372,9 +5484,9 @@
 
 
-\subsection{Tasks}
+\subsection{Threads}
 
 \CFA also provides a simple mechanism for creating and utilizing user level threads.
-A task provides mutual exclusion like a monitor, and also has its own execution state and a thread of control.
-Similar to a monitor, a task is defined like a structure:
+A thread provides mutual exclusion like a monitor, and also has its own execution state and a thread of control.
+Similar to a monitor, a thread is defined like a structure:
 
 \begin{figure}
@@ -5420,6 +5532,6 @@
 }
 \end{cfa}
-\caption{Simple Tasks}
-\label{f:SimpleTasks}
+\caption{Simple Threads}
+\label{f:SimpleThreads}
 \end{figure}
 
@@ -6788,6 +6900,6 @@
 In \CFA, there are ambiguous cases with dereference and operator identifiers, \eg ©int *?*?()©, where the string ©*?*?© can be interpreted as:
 \begin{cfa}
-*?$\R{\textvisiblespace}$*? $\C{// dereference operator, dereference operator}$
-*$\R{\textvisiblespace}$?*? $\C{// dereference, multiplication operator}$
+*?$\Sp$*? $\C{// dereference operator, dereference operator}$
+*$\Sp$?*? $\C{// dereference, multiplication operator}$
 \end{cfa}
 By default, the first interpretation is selected, which does not yield a meaningful parse.
@@ -6813,6 +6925,6 @@
 Therefore, it is necessary to disambiguate these cases with a space:
 \begin{cfa}
-i++$\R{\textvisiblespace}$? i : 0;
-i?$\R{\textvisiblespace}$++i : 0;
+i++$\Sp$? i : 0;
+i?$\Sp$++i : 0;
 \end{cfa}
 
@@ -7430,17 +7542,17 @@
 char random( void );$\indexc{random}$
 char random( char u ); $\C{// [0,u)}$
-char random( char l, char u ); $\C{// [l,u)}$
+char random( char l, char u ); $\C{// [l,u]}$
 int random( void );
 int random( int u ); $\C{// [0,u)}$
-int random( int l, int u ); $\C{// [l,u)}$
+int random( int l, int u ); $\C{// [l,u]}$
 unsigned int random( void );
 unsigned int random( unsigned int u ); $\C{// [0,u)}$
-unsigned int random( unsigned int l, unsigned int u ); $\C{// [l,u)}$
+unsigned int random( unsigned int l, unsigned int u ); $\C{// [l,u]}$
 long int random( void );
 long int random( long int u ); $\C{// [0,u)}$
-long int random( long int l, long int u ); $\C{// [l,u)}$
+long int random( long int l, long int u ); $\C{// [l,u]}$
 unsigned long int random( void );
 unsigned long int random( unsigned long int u ); $\C{// [0,u)}$
-unsigned long int random( unsigned long int l, unsigned long int u ); $\C{// [l,u)}$
+unsigned long int random( unsigned long int l, unsigned long int u ); $\C{// [l,u]}$
 float random( void );						 $\C{// [0.0, 1.0)}$
 double random( void );						 $\C{// [0.0, 1.0)}$
@@ -8106,4 +8218,185 @@
 
 
+\section{Pseudo Random Number Generator}
+\label{s:PRNG}
+
+Random numbers are values generated independently, i.e., new values do not depend on previous values (independent trials), \eg lottery numbers, shuffled cards, dice roll, coin flip.
+While a primary goal of programming is computing values that are \emph{not} random, random values are useful in simulation, cryptography, games, etc.
+A random-number generator is an algorithm computing independent values.
+If the algorithm uses deterministic computation (predictable sequence of values), it generates \emph{pseudo} random numbers versus \emph{true} random numbers.
+
+All \newterm{pseudo random-number generators} (\newterm{PRNG}) involve some technique to scramble bits of a value, \eg multiplicative recurrence:
+\begin{cfa}
+rand = 36973 * (rand & 65535) + (rand >> 16); // scramble bits
+\end{cfa}
+Multiplication of large values adds new least-significant bits and drops most-significant bits.
+\begin{quote}
+\begin{tabular}{@{}r|l@{}}
+bits 63--32 (most)	& bits 31--0 (least)	\\
+\hline
+0x0					& 0x3e8e36				\\
+0x5f				& 0x718c25e1			\\
+0xad3e				& 0x7b5f1dbe			\\
+0xbc3b				& 0xac69ff19			\\
+0x1070f				& 0x2d258dc6			\\
+\end{tabular}
+\end{quote}
+By dropping bits 63--32, bits 31--0 become scrambled after each multiply.
+The least-significant bits \emph{appear} random but the same bits are always generated given a fixed starting value, called the \newterm{seed} (value 0x3e8e36 above).
+Hence, if a program uses the same seed, the same sequence of pseudo-random values is generated from the PRNG.
+Often the seed is set to another random value like a program's process identifier (©getpid©\index{getpid@©getpid©}) or time when the program is run;
+hence, one random value bootstraps another.
+Finally, a PRNG usually generates a range of large values, \eg ©[0, UINT_MAX]©, which are scaled using the modulus operator, \eg ©prng() % 5© produces random values in the range 0--4.
+
+\CFA provides a sequential and concurrent PRNGs.
+\begin{itemize}
+\item
+For sequential programs, like coroutining, the PRNG is used to randomize behaviour or values during execution, \eg in games, a character makes a random move or an object takes on a random value.
+\begin{cfa}
+struct PRNG { ... }; $\C[3.75in]{// opaque type}$
+void ?{}( PRNG & prng ); $\C{// random seed}$
+void ?{}( PRNG & prng, uint32_t seed ); $\C{// fixed seed}$
+void set_seed( PRNG & prng, uint32_t seed ); $\C{// set seed}$
+uint32_t get_seed( PRNG & prng ); $\C{// get seed}$
+uint32_t prng( PRNG & prng ); $\C{// [0,UINT\_MAX]}$
+uint32_t prng( PRNG & prng, uint32_t u ); $\C{// [0,u)}$
+uint32_t prng( PRNG & prng, uint32_t l, uint32_t u ); $\C{// [l,u]}$
+uint32_t calls( PRNG & prng ); $\C{// number of calls}\CRT$
+\end{cfa}
+Sequential execution is repeatable given the same starting seeds for all ©PRNG©s. 
+In this scenario, it is useful to have multiple ©PRNG©, \eg one per player or object so a type is provided to generate multiple instances.
+\VRef[Figure]{f:SequentialPRNG} shows an example that creates two sequential ©PRNG©s, sets both to the same seed (1009), and illustrates the three forms for generating random values, where both ©PRNG©s generate the same sequence of values.
+
+\begin{figure}
+\begin{cfa}
+PRNG prng1, prng2;
+®set_seed( prng1, 1009 )®;   ®set_seed( prng2, 1009 )®;
+for ( 10 ) {
+	// Do not cascade prng calls because side-effect functions called in arbitrary order.
+	sout | nlOff | ®prng( prng1 )®;  sout | ®prng( prng1, 5 )®;  sout | ®prng( prng1, 0, 5 )® | '\t';
+	sout | ®prng( prng2 )®;  sout | ®prng( prng2, 5 )®;  sout | ®prng( prng2, 0, 5 )® | nlOn;
+}
+\end{cfa}
+\begin{cquote}
+\begin{tabular}{@{}ll@{}}
+\begin{cfa}
+37301721 2 2
+1681308562 1 3
+290112364 3 2
+1852700364 4 3
+733221210 1 3
+1775396023 2 3
+123981445 2 3
+2062557687 2 0
+283934808 1 0
+672325890 1 3
+\end{cfa}
+&
+\begin{cfa}
+37301721 2 2
+1681308562 1 3
+290112364 3 2
+1852700364 4 3
+733221210 1 3
+1775396023 2 3
+123981445 2 3
+2062557687 2 0
+283934808 1 0
+672325890 1 3
+\end{cfa}
+\end{tabular}
+\end{cquote}
+\vspace{-10pt}
+\caption{Sequential PRNG}
+\label{f:SequentialPRNG}
+\end{figure}
+
+\item
+For concurrent programs, it is important the PRNG is thread-safe and not a point of contention.
+A PRNG in concurrent programs is often used to randomize execution in short-running programs, \eg ©yield( prng() % 5 )©.
+
+Because concurrent execution is non-deterministic, seeding the concurrent PRNG is less important, as repeatable execution is impossible.
+Hence, there is one system-wide PRNG (global seed) but each \CFA thread has its own non-contended PRNG state.
+If the global seed is set, threads start with this seed, until it is reset and than threads start with the reset seed.
+Hence, these threads generate the same sequence of random numbers from their specific starting seed.
+If the global seed is \emph{not} set, threads start with a random seed, until the global seed is set.
+Hence, these threads generate different sequences of random numbers.
+If each thread needs its own seed, use a sequential ©PRNG© in each thread.
+
+There are two versions of the PRNG functions to manipulate the thread-local PRNG-state, which are differentiated by performance.
+\begin{cfa}
+void set_seed( uint32_t seed ); $\C[3.75in]{// set global seed}$
+uint32_t get_seed(); $\C{// get global seed}$
+// SLOWER
+uint32_t prng(); $\C{// [0,UINT\_MAX]}$
+uint32_t prng( uint32_t u ); $\C{// [0,u)}$
+uint32_t prng( uint32_t l, uint32_t u ); $\C{// [l,u]}$
+// FASTER
+uint32_t prng( $thread\LstStringStyle{\textdollar}$ & th );	$\C{// [0,UINT\_MAX]}$
+uint32_t prng( $thread\LstStringStyle{\textdollar}$ & th, uint32_t u );	$\C{// [0,u)}$
+uint32_t prng( $thread\LstStringStyle{\textdollar}$ & th, uint32_t l, uint32_t u );	$\C{// [l,u]}\CRT$
+\end{cfa}
+The slower ©prng© functions call ©active_thread© internally to access the thread-local PRNG-state, while the faster ©prng© functions are passed a pointer to the active thread.
+If the thread pointer is known, \eg in a thread ©main©, eliminating the call to ©active_thread© significantly reduces the cost for accessing the thread's PRNG state.
+\VRef[Figure]{f:ConcurrentPRNG} shows an example using the slower/faster concurrent PRNG in the program main and a thread.
+
+\begin{figure}
+\begin{cfa}
+thread T {};
+void main( ®T & th® ) {  // thread address
+	for ( i; 10 ) {
+		sout | nlOff | ®prng()®;  sout | ®prng( 5 )®;  sout | ®prng( 0, 5 )® | '\t';  // SLOWER
+		sout | nlOff | ®prng( th )®;  sout | ®prng( th, 5 )®;  sout | ®prng( th, 0, 5 )® | nlOn;  // FASTER
+	}
+}
+int main() {
+	set_seed( 1009 );
+	$\R{thread\LstStringStyle{\textdollar}}$ ®& th = *active_thread()®;  // program-main thread-address
+	for ( i; 10 ) {
+		sout | nlOff | ®prng()®; sout | ®prng( 5 )®; sout | ®prng( 0, 5 )® | '\t';  // SLOWER
+		sout | nlOff | ®prng( th )®; sout | ®prng( th, 5 )®; sout | ®prng( th, 0, 5 )® | nlOn;  // FASTER
+	}
+	sout | nl;
+	T t; // run thread
+}
+\end{cfa}
+\begin{cquote}
+\begin{tabular}{@{}ll@{}}
+\begin{cfa}
+37301721 2 2
+290112364 3 2
+733221210 1 3
+123981445 2 3
+283934808 1 0
+1414344101 1 3
+871831898 3 4
+2142057611 4 4
+802117363 0 4
+2346353643 1 3
+\end{cfa}
+&
+\begin{cfa}
+1681308562 1 3
+1852700364 4 3
+1775396023 2 3
+2062557687 2 0
+672325890 1 3
+873424536 3 4
+866783532 0 1
+17310256 2 5
+492964499 0 0
+2143013105 3 2
+\end{cfa}
+\end{tabular}
+\begin{cfa}
+// same output as above from thread t
+\end{cfa}
+\end{cquote}
+\caption{Concurrent PRNG}
+\label{f:ConcurrentPRNG}
+\end{figure}
+\end{itemize}
+
+
 \section{Multi-precision Integers}
 \label{s:MultiPrecisionIntegers}
@@ -8310,5 +8603,5 @@
 \end{tabular}
 \end{cquote}
-\small
+
 \begin{cfa}
 Factorial Numbers
Index: libcfa/src/Makefile.am
===================================================================
--- libcfa/src/Makefile.am	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/Makefile.am	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -72,5 +72,4 @@
 	common.hfa \
 	fstream.hfa \
-	heap.hfa \
 	iostream.hfa \
 	iterator.hfa \
@@ -102,5 +101,7 @@
 	startup.hfa \
 	virtual.c \
-	virtual.h
+	virtual.h \
+	heap.cc \
+	heap.h
 
 # not all platforms support concurrency, add option do disable it
@@ -172,4 +173,5 @@
 
 -include $(libdeps)
+-include $(DEPDIR)/heap.Plo
 
 thread_libdeps = $(join \
Index: libcfa/src/concurrency/coroutine.cfa
===================================================================
--- libcfa/src/concurrency/coroutine.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/concurrency/coroutine.cfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -85,6 +85,9 @@
 // minimum feasible stack size in bytes
 static const size_t MinStackSize = 1000;
-extern size_t __page_size;				// architecture pagesize HACK, should go in proper runtime singleton
-extern int __map_prot;
+
+extern "C" {
+	extern size_t __cfa_page_size;				// architecture pagesize HACK, should go in proper runtime singleton
+	extern int __map_prot;
+}
 
 void __stack_prepare( __stack_info_t * this, size_t create_size );
@@ -157,22 +160,22 @@
 [void *, size_t] __stack_alloc( size_t storageSize ) {
 	const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
-	assert(__page_size != 0l);
+	assert(__cfa_page_size != 0l);
 	size_t size = libCeiling( storageSize, 16 ) + stack_data_size;
-	size = ceiling(size, __page_size);
+	size = ceiling(size, __cfa_page_size);
 
 	// If we are running debug, we also need to allocate a guardpage to catch stack overflows.
 	void * storage;
 	#if CFA_COROUTINE_USE_MMAP
-		storage = mmap(0p, size + __page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		storage = mmap(0p, size + __cfa_page_size, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 		if(storage == ((void*)-1)) {
 			abort( "coroutine stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
 		}
-		if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
+		if ( mprotect( storage, __cfa_page_size, PROT_NONE ) == -1 ) {
 			abort( "coroutine stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 		} // if
-		storage = (void *)(((intptr_t)storage) + __page_size);
+		storage = (void *)(((intptr_t)storage) + __cfa_page_size);
 	#else
 		__cfaabi_dbg_debug_do(
-			storage = memalign( __page_size, size + __page_size );
+			storage = memalign( __cfa_page_size, size + __cfa_page_size );
 		);
 		__cfaabi_dbg_no_debug_do(
@@ -181,8 +184,8 @@
 
 		__cfaabi_dbg_debug_do(
-			if ( mprotect( storage, __page_size, PROT_NONE ) == -1 ) {
+			if ( mprotect( storage, __cfa_page_size, PROT_NONE ) == -1 ) {
 				abort( "__stack_alloc : internal error, mprotect failure, error(%d) %s.", (int)errno, strerror( (int)errno ) );
 			}
-			storage = (void *)(((intptr_t)storage) + __page_size);
+			storage = (void *)(((intptr_t)storage) + __cfa_page_size);
 		);
 	#endif
@@ -198,12 +201,12 @@
 	#if CFA_COROUTINE_USE_MMAP
 		size_t size = ((intptr_t)this->storage->base) - ((intptr_t)this->storage->limit) + sizeof(__stack_t);
-		storage = (void *)(((intptr_t)storage) - __page_size);
-		if(munmap(storage, size + __page_size) == -1) {
+		storage = (void *)(((intptr_t)storage) - __cfa_page_size);
+		if(munmap(storage, size + __cfa_page_size) == -1) {
 			abort( "coroutine stack destruction : internal error, munmap failure, error(%d) %s.", errno, strerror( errno ) );
 		}
 	#else
 		__cfaabi_dbg_debug_do(
-			storage = (char*)(storage) - __page_size;
-			if ( mprotect( storage, __page_size, __map_prot ) == -1 ) {
+			storage = (char*)(storage) - __cfa_page_size;
+			if ( mprotect( storage, __cfa_page_size, __map_prot ) == -1 ) {
 				abort( "(coStack_t *)%p.^?{}() : internal error, mprotect failure, error(%d) %s.", &this, errno, strerror( errno ) );
 			}
Index: libcfa/src/concurrency/kernel.hfa
===================================================================
--- libcfa/src/concurrency/kernel.hfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/concurrency/kernel.hfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -173,5 +173,5 @@
 
 static inline void  ?{}(__timestamp_t & this) { this.tv = 0; this.ma = 0; }
-static inline void ^?{}(__timestamp_t & this) {}
+static inline void ^?{}(__timestamp_t &) {}
 
 struct __attribute__((aligned(128))) __ready_queue_caches_t;
Index: libcfa/src/concurrency/kernel/startup.cfa
===================================================================
--- libcfa/src/concurrency/kernel/startup.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/concurrency/kernel/startup.cfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -18,20 +18,20 @@
 
 // C Includes
-#include <errno.h>              // errno
+#include <errno.h>										// errno
 #include <signal.h>
-#include <string.h>             // strerror
-#include <unistd.h>             // sysconf
+#include <string.h>										// strerror
+#include <unistd.h>										// sysconf
 
 extern "C" {
-      #include <limits.h>       // PTHREAD_STACK_MIN
-	#include <unistd.h>       // syscall
-	#include <sys/eventfd.h>  // eventfd
-      #include <sys/mman.h>     // mprotect
-      #include <sys/resource.h> // getrlimit
+	#include <limits.h>									// PTHREAD_STACK_MIN
+	#include <unistd.h>									// syscall
+	#include <sys/eventfd.h>							// eventfd
+	#include <sys/mman.h>								// mprotect
+	#include <sys/resource.h>							// getrlimit
 }
 
 // CFA Includes
 #include "kernel_private.hfa"
-#include "startup.hfa"          // STARTUP_PRIORITY_XXX
+#include "startup.hfa"									// STARTUP_PRIORITY_XXX
 #include "limits.hfa"
 #include "math.hfa"
@@ -102,5 +102,5 @@
 extern void __wake_proc(processor *);
 extern int cfa_main_returned;							// from interpose.cfa
-extern uint32_t __global_random_seed;
+uint32_t __global_random_prime = 4_294_967_291u, __global_random_mask = false;
 
 //-----------------------------------------------------------------------------
@@ -122,8 +122,7 @@
 extern "C" {
 	struct { __dllist_t(cluster) list; __spinlock_t lock; } __cfa_dbg_global_clusters;
-}
-
-extern size_t __page_size;
-extern int __map_prot;
+	extern size_t __cfa_page_size;
+	extern int __map_prot;
+}
 
 //-----------------------------------------------------------------------------
@@ -490,5 +489,5 @@
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
-	random_state = __global_random_seed;
+	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
 	#if defined( __CFA_WITH_VERIFY__ )
 		canary = 0x0D15EA5E0D15EA5Ep;
@@ -574,5 +573,4 @@
 }
 
-extern size_t __page_size;
 void ^?{}(processor & this) with( this ){
 	/* paranoid */ verify( !__atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) );
@@ -740,17 +738,17 @@
 	void * stack;
 	#if CFA_PROCESSOR_USE_MMAP
-		stacksize = ceiling( stacksize, __page_size ) + __page_size;
+		stacksize = ceiling( stacksize, __cfa_page_size ) + __cfa_page_size;
 		stack = mmap(0p, stacksize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 		if(stack == ((void*)-1)) {
 			abort( "pthread stack creation : internal error, mmap failure, error(%d) %s.", errno, strerror( errno ) );
 		}
-		if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
+		if ( mprotect( stack, __cfa_page_size, PROT_NONE ) == -1 ) {
 			abort( "pthread stack creation : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 		} // if
 	#else
 		__cfaabi_dbg_debug_do(
-			stack = memalign( __page_size, stacksize + __page_size );
+			stack = memalign( __cfa_page_size, stacksize + __cfa_page_size );
 			// pthread has no mechanism to create the guard page in user supplied stack.
-			if ( mprotect( stack, __page_size, PROT_NONE ) == -1 ) {
+			if ( mprotect( stack, __cfa_page_size, PROT_NONE ) == -1 ) {
 				abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 			} // if
@@ -779,5 +777,5 @@
 		check( pthread_attr_getstacksize( &attr, &stacksize ), "pthread_attr_getstacksize" );
 		assert( stacksize >= PTHREAD_STACK_MIN );
-		stacksize += __page_size;
+		stacksize += __cfa_page_size;
 
 		if(munmap(stack, stacksize) == -1) {
@@ -787,5 +785,5 @@
 		__cfaabi_dbg_debug_do(
 			// pthread has no mechanism to create the guard page in user supplied stack.
-			if ( mprotect( stack, __page_size, __map_prot ) == -1 ) {
+			if ( mprotect( stack, __cfa_page_size, __map_prot ) == -1 ) {
 				abort( "mprotect : internal error, mprotect failure, error(%d) %s.", errno, strerror( errno ) );
 			} // if
Index: libcfa/src/concurrency/thread.cfa
===================================================================
--- libcfa/src/concurrency/thread.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/concurrency/thread.cfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Jan 15 14:34:58 2022
-// Update Count     : 45
+// Last Modified On : Sat Feb 12 15:24:18 2022
+// Update Count     : 66
 //
 
@@ -25,5 +25,5 @@
 #include "invoke.h"
 
-extern uint32_t __global_random_seed;
+extern uint32_t __global_random_seed, __global_random_prime, __global_random_mask;
 
 //-----------------------------------------------------------------------------
@@ -45,5 +45,5 @@
 	preferred = ready_queue_new_preferred();
 	last_proc = 0p;
-	random_state = __global_random_seed;
+	random_state = __global_random_mask ? __global_random_prime : __global_random_prime ^ rdtscl();
 	#if defined( __CFA_WITH_VERIFY__ )
 		canary = 0x0D15EA5E0D15EA5Ep;
@@ -176,6 +176,9 @@
 
 void set_seed( uint32_t seed ) {
- 	active_thread()->random_state = __global_random_seed = seed;
-	GENERATOR( active_thread()->random_state );
+	uint32_t & state = active_thread()->random_state;
+	state = __global_random_seed = seed;
+	GENERATOR( state );
+	__global_random_prime = state;
+	__global_random_mask = true;
 } // set_seed
 
Index: libcfa/src/concurrency/thread.hfa
===================================================================
--- libcfa/src/concurrency/thread.hfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/concurrency/thread.hfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -10,6 +10,6 @@
 // Created On       : Tue Jan 17 12:27:26 2017
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Wed Feb  9 22:10:14 2022
-// Update Count     : 14
+// Last Modified On : Fri Feb 11 16:34:07 2022
+// Update Count     : 20
 //
 
@@ -131,9 +131,15 @@
 
 //----------
+// prng
 static inline {
 	uint32_t prng( thread$ & th ) __attribute__(( warn_unused_result )) { return LCG( th.random_state ); } // [0,UINT_MAX]
 	uint32_t prng( thread$ & th, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th ) % u; } // [0,u)
 	uint32_t prng( thread$ & th, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th, u - l + 1 ) + l; } // [l,u]
-} // prng
+	forall( T & | is_thread(T) ) {
+		uint32_t prng( T & th ) __attribute__(( warn_unused_result )) { return prng( (thread &)th ); } // [0,UINT_MAX]
+		uint32_t prng( T & th, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th ) % u; } // [0,u)
+		uint32_t prng( T & th, uint32_t l, uint32_t u ) __attribute__(( warn_unused_result )) { return prng( th, u - l + 1 ) + l; } // [l,u]
+	} // distribution
+} // distribution
 
 // Local Variables: //
Index: libcfa/src/heap.cc
===================================================================
--- libcfa/src/heap.cc	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
+++ libcfa/src/heap.cc	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -0,0 +1,1717 @@
+#include "heap.h"
+
+#include <algorithm>									// lower_bound, min
+#include <cstring>										// strlen, memset, memcpy
+#include <climits>										// ULONG_MAX
+#include <cstdarg>										// va_start, va_end
+#include <cerrno>										// errno, ENOMEM, EINVAL
+#include <cassert>
+#include <unistd.h>										// STDERR_FILENO, sbrk, sysconf, write
+#include <sys/mman.h>									// mmap, munmap
+#include <cstdint>										// uintptr_t, uint64_t, uint32_t
+
+#define TLS
+#define AWAYSPIN										// toggle spinlock / lockfree stack
+#define FASTLOOKUP										// use O(1) table lookup from allocation size to bucket size
+#define CFA_THREADSAFE_HEAP
+
+#ifdef __CFA_DEBUG__
+#define __DEBUG__
+#endif // __CFA_DEBUG__
+
+#define LIKELY(x) __builtin_expect(!!(x), 1)
+#define UNLIKELY(x) __builtin_expect(!!(x), 0)
+
+#define str(s) #s
+#define xstr(s) str(s)
+#define WARNING( s ) xstr( GCC diagnostic ignored str( -W ## s ) )
+#define NOWARNING( statement, warning ) \
+	_Pragma( "GCC diagnostic push" ) \
+	_Pragma( WARNING( warning ) ) \
+	statement ;	\
+	_Pragma ( "GCC diagnostic pop" )
+
+enum { __ALIGN__ = 16,									// minimum allocation alignment, bytes
+	   __DEFAULT_HEAP_EXPANSION__ = 2 * 1024 * 1024,	// sbrk extension amount when full
+	   __DEFAULT_MMAP_START__ = 512 * 1024 + 1,			// crossover allocation size from sbrk to mmap
+	   __DEFAULT_HEAP_UNFREED__ = 0,					// amount subtracted to adjust for unfreed program storage
+}; // enum
+
+#ifdef TLS
+#define TLSMODEL __attribute__(( tls_model("initial-exec") ))
+#else
+#define TLSMODEL
+#endif // TLS
+
+#ifdef CFA_THREADSAFE_HEAP
+extern "C" {
+	void enable_interrupts();
+	void disable_interrupts();
+}
+#define ENABLE_INTERRUPTS (void)enable_interrupts()
+#define DISABLE_INTERRUPTS (void)disable_interrupts()
+#else
+#define ENABLE_INTERRUPTS
+#define DISABLE_INTERRUPTS
+#endif // CFA_THREADSAFE_HEAP
+
+//######################### Helpers #########################
+
+
+// Called by macro assert in assert.h. Replace to prevent recursive call to malloc.
+/*
+void __assert_fail( const char assertion[], const char file[], unsigned int line, const char function[] ) {
+	extern const char * __progname;						// global name of running executable (argv[0])
+	char helpText[1024];
+	int len = snprintf( helpText, sizeof(helpText), "Internal assertion error \"%s\" from program \"%s\" in \"%s\" at line %d in file \"%s.\n",
+						assertion, __progname, function, line, file );
+	NOWARNING( write( STDERR_FILENO, helpText, len ), unused-result );
+	abort();
+	// CONTROL NEVER REACHES HERE!
+} // __assert_fail
+FIXME */
+
+void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+void abort( const char fmt[], ... ) {					// overload real abort
+	va_list args;
+	va_start( args, fmt );
+	vfprintf( stderr, fmt, args );
+	if ( fmt[strlen( fmt ) - 1] != '\n' ) {				// add optional newline if missing at the end of the format text
+		vfprintf( stderr, "\n", args );					// g++-10 does not allow nullptr for va_list
+	} // if
+	va_end( args );
+	abort();											// call the real abort
+	// CONTROL NEVER REACHES HERE!
+} // abort
+
+static inline bool Pow2( unsigned long int value ) {
+	// clears all bits below value, rounding value down to the next lower multiple of value
+	return (value & (value - 1)) == 0;
+} // Pow2
+
+static inline unsigned long int Floor( unsigned long int value, unsigned long int align ) {
+	assert( Pow2( align ) );
+	// clears all bits above or equal to align, getting (value % align), the phase of value with regards to align
+	return value & -align;
+} // Floor
+
+static inline unsigned long int Ceiling( unsigned long int value, unsigned long int align ) {
+	assert( Pow2( align ) );
+	// "negate, round down, negate" is the same as round up
+	return -Floor( -value, align );
+} // Ceiling
+
+template< typename T > static inline T AtomicFetchAdd( volatile T & counter, int increment ) {
+	return __atomic_fetch_add( &counter, increment, __ATOMIC_SEQ_CST );
+} // AtomicFetchAdd
+
+
+//######################### Spin Lock #########################
+
+
+#define CACHE_ALIGN 128									// Intel recommendation
+#define CALIGN __attribute__(( aligned(CACHE_ALIGN) ))
+
+// pause to prevent excess processor bus usage
+#if defined( __i386 ) || defined( __x86_64 )
+	#define Pause() __asm__ __volatile__ ( "pause" : : : )
+#elif defined(__ARM_ARCH)
+	#define Pause() __asm__ __volatile__ ( "YIELD" : : : )
+#else
+	#error unsupported architecture
+#endif
+
+typedef volatile uintptr_t SpinLock_t CALIGN;			// aligned addressable word-size
+
+void spin_acquire( volatile SpinLock_t * lock ) {
+	enum { SPIN_START = 4, SPIN_END = 64 * 1024, };
+	unsigned int spin = SPIN_START;
+
+	for ( unsigned int i = 1;; i += 1 ) {
+	  if ( *lock == 0 && __atomic_test_and_set( lock, __ATOMIC_SEQ_CST ) == 0 ) break; // Fence
+		for ( volatile unsigned int s = 0; s < spin; s += 1 ) Pause(); // exponential spin
+		spin += spin;									// powers of 2
+		//if ( i % 64 == 0 ) spin += spin;				// slowly increase by powers of 2
+		if ( spin > SPIN_END ) spin = SPIN_END;			// cap spinning
+	} // for
+} // spin_lock
+
+void spin_release( volatile SpinLock_t * lock ) {
+	__atomic_clear( lock, __ATOMIC_SEQ_CST );			// Fence
+} // spin_unlock
+
+
+//####################### Heap Statistics ####################
+
+
+#ifdef __STATISTICS__
+enum { CntTriples = 12 };								// number of counter triples
+struct HeapStatistics {
+	enum { MALLOC, AALLOC, CALLOC, MEMALIGN, AMEMALIGN, CMEMALIGN, RESIZE, REALLOC };
+	union {
+		struct {
+			unsigned int malloc_calls, malloc_0_calls;
+			unsigned long long int malloc_storage_request, malloc_storage_alloc;
+			unsigned int aalloc_calls, aalloc_0_calls;
+			unsigned long long int aalloc_storage_request, aalloc_storage_alloc;
+			unsigned int calloc_calls, calloc_0_calls;
+			unsigned long long int calloc_storage_request, calloc_storage_alloc;
+			unsigned int memalign_calls, memalign_0_calls;
+			unsigned long long int memalign_storage_request, memalign_storage_alloc;
+			unsigned int amemalign_calls, amemalign_0_calls;
+			unsigned long long int amemalign_storage_request, amemalign_storage_alloc;
+			unsigned int cmemalign_calls, cmemalign_0_calls;
+			unsigned long long int cmemalign_storage_request, cmemalign_storage_alloc;
+			unsigned int resize_calls, resize_0_calls;
+			unsigned long long int resize_storage_request, resize_storage_alloc;
+			unsigned int realloc_calls, realloc_0_calls;
+			unsigned long long int realloc_storage_request, realloc_storage_alloc;
+			unsigned int free_calls, free_null_calls;
+			unsigned long long int free_storage_request, free_storage_alloc;
+			unsigned int away_pulls, away_pushes;
+			unsigned long long int away_storage_request, away_storage_alloc;
+			unsigned int mmap_calls, mmap_0_calls;		// no zero calls
+			unsigned long long int mmap_storage_request, mmap_storage_alloc;
+			unsigned int munmap_calls, munmap_0_calls;	// no zero calls
+			unsigned long long int munmap_storage_request, munmap_storage_alloc;
+		};
+		struct {										// overlay for iteration
+			unsigned int cnt1, cnt2;
+			unsigned long long int cnt3, cnt4;
+		} counters[CntTriples];
+	};
+
+	HeapStatistics() {
+		for ( unsigned int i = 0; i < CntTriples; i += 1 ) {
+			counters[i].cnt1 = counters[i].cnt2 = counters[i].cnt3 = counters[i].cnt4 = 0;
+		} // for
+	} // HeapStatistics::HeapStatistics
+
+	friend HeapStatistics & operator+=( HeapStatistics & lhs, const HeapStatistics & rhs ) {
+		for ( unsigned int i = 0; i < CntTriples; i += 1 ) {
+			lhs.counters[i].cnt1 += rhs.counters[i].cnt1;
+			lhs.counters[i].cnt2 += rhs.counters[i].cnt2;
+			lhs.counters[i].cnt3 += rhs.counters[i].cnt3;
+			lhs.counters[i].cnt4 += rhs.counters[i].cnt4;
+		} // for
+		return lhs;
+	} // HeapStatistics::operator+=
+}; // HeapStatistics
+
+static_assert( sizeof(HeapStatistics) == CntTriples * sizeof(HeapStatistics::counters[0] ),
+			   "Heap statistics counter-triplets does not match with array size" );
+#endif // __STATISTICS__
+
+
+//####################### Heap Structure ####################
+
+
+struct Heap {
+	struct FreeHeader;									// forward declaration
+
+	struct Storage {
+		struct Header {									// header
+			union Kind {
+				struct RealHeader {
+					union {
+						struct {						// 4-byte word => 8-byte header, 8-byte word => 16-byte header
+							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
+							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
+
+							union {
+								// 2nd low-order bit => zero filled, 3rd low-order bit => mmapped
+								FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
+								size_t blockSize;		// size for munmap (must overlay alignment)
+								Storage * next;			// freed block points to next freed block of same size
+							};
+							size_t size;				// allocation size in bytes
+
+							#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
+							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
+							#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
+						};
+					};
+				} real; // RealHeader
+				struct FakeHeader {
+					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+					uint32_t alignment;					// 1st low-order bit => fake header & alignment
+					#endif // __ORDER_LITTLE_ENDIAN__
+
+					uint32_t offset;
+
+					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+					uint32_t alignment;					// 1st low-order bit => fake header & alignment
+					#endif // __ORDER_BIG_ENDIAN__
+				} fake; // FakeHeader
+			} kind; // Kind
+		} header; // Header
+
+		char pad[__ALIGN__ - sizeof( Header )];
+		char data[0];									// storage
+	}; // Storage
+
+	static_assert( __ALIGN__ >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" );
+
+	struct FreeHeader {
+		#ifdef AWAYSPIN
+		SpinLock_t awayLock;							// LOCK(S) MUST BE FIRST FIELD(S) FOR ALIGNMENT
+		#endif // AWAYSPIN
+
+		Storage * freeList;								// thread free list
+		Storage * awayList;								// other thread return list
+
+		Heap * homeManager;								// heap owner (free storage to bucket, from bucket to heap)
+		size_t blockSize;								// size of allocations on this list
+
+		bool operator<( const size_t bsize ) const { return blockSize < bsize; }
+	}; // FreeHeader
+
+	// Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
+	// Break recursion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
+	enum {
+		#ifdef FASTLOOKUP
+		LookupSizes = 65'536 + sizeof(Storage),			// number of fast lookup sizes '
+		#endif // FASTLOOKUP
+		NoBucketSizes = 91,								// number of bucket sizes
+	}; // enum
+
+	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
+	void * heapBuffer;
+	size_t heapReserve;
+
+	Heap * nextHeapManager;								// intrusive link of existing heaps; traversed to collect statistics
+	Heap * nextFreeHeapManager;							// intrusive link of free heaps from terminated threads; reused by new threads
+
+	#ifdef __DEBUG__
+	long long int allocUnfreed;							// running total of allocations minus frees; can be negative
+	#endif // __DEBUG__
+
+	#ifdef __STATISTICS__
+	HeapStatistics stats;								// local statistic table for this heap
+	#endif // __STATISTICS__
+
+	static void heapManagerCtor(
+		#ifdef __DEBUG__
+		size_t size
+		#endif // __DEBUG__
+	);
+	static void heapManagerDtor();
+}; // Heap
+
+
+struct ThreadManager {
+	int dummy;											// used to trigger allocation of storage
+	~ThreadManager() { Heap::heapManagerDtor(); }		// called automagically when thread terminates
+}; // ThreadManager
+
+
+struct HeapMaster {
+	SpinLock_t masterExtLock;							// protects allocation-buffer extension
+	SpinLock_t masterMgrLock;							// protects freeHeapManagersList, heapManagersList, heapManagersStorage, heapManagersStorageEnd
+
+	#ifdef FASTLOOKUP
+	unsigned char lookup[Heap::LookupSizes];			// O(1) lookup for small sizes
+	#endif // FASTLOOKUP
+
+	static const unsigned int bucketSizes[];			// initialized statically, outside constructor
+	void * heapBegin;									// start of heap
+	void * heapEnd;										// logical end of heap
+	size_t heapRemaining;								// amount of storage not allocated in the current chunk
+	size_t pageSize;									// architecture pagesize
+	size_t heapExpand;									// sbrk advance
+	size_t mmapStart;									// cross over point for mmap
+	unsigned int maxBucketsUsed;						// maximum number of buckets in use
+	static const off_t mmapFd;							// fake or actual fd for anonymous file. initialized statically, outside constructor
+
+	Heap * heapManagersList;							// heap-list head
+	Heap * freeHeapManagersList;						// free-list head
+
+	// Heap superblocks are not linked; heaps in superblocks are linked via intrusive links.
+	Heap * heapManagersStorage;							// next heap to use in heap superblock
+	Heap * heapManagersStorageEnd;						// logical heap outside of superblock's end
+
+	#ifdef __STATISTICS__
+	unsigned long int threads_started, threads_exited;  // counts threads that have started and exited
+	unsigned long int reused_heap, new_heap;  			// counts reusability of heaps
+	unsigned int sbrk_calls;
+	unsigned long long int sbrk_storage;
+	int stats_fd;
+	HeapStatistics stats;								// global stats for thread-local heaps to add there counters when exiting
+	#endif // __STATISTICS__
+
+	// Prevents two threads from constructing heapMaster.
+	static volatile bool heapMasterBootFlag;			// trigger for first heap
+
+	#ifdef __DEBUG__
+	long long int allocUnfreed;
+	#endif // __DEBUG__
+
+	static void heapMasterCtor();
+	static void heapMasterDtor();
+}; // HeapMaster
+
+extern "C" {
+	int __map_prot = PROT_READ | PROT_WRITE | PROT_EXEC; 	// common mmap/mprotect protection
+	size_t __cfa_page_size;									// architecture pagesize
+}
+
+volatile bool HeapMaster::heapMasterBootFlag = false;
+static HeapMaster heapMaster;							// program global
+
+// Thread-local storage is allocated lazily when the storage is accessed.
+static thread_local size_t PAD1 CALIGN TLSMODEL __attribute__(( unused )); // protect false sharing
+static thread_local ThreadManager threadManager CALIGN TLSMODEL;
+// Do not put heapManager in ThreadManager because thread-local destructor results in extra access code.
+static thread_local Heap * heapManager CALIGN TLSMODEL;
+static thread_local bool heapManagerBootFlag CALIGN TLSMODEL = false;
+static thread_local size_t PAD2 CALIGN TLSMODEL __attribute__(( unused )); // protect further false sharing
+
+
+#ifdef __DEBUG__
+extern "C" {
+	void heapAppStart( void ) {
+		assert( heapManager );
+		heapManager->allocUnfreed = 0;
+	} // heapAppStart
+
+	void heapAppStop( void ) {
+		long long int allocUnfreed = heapMaster.allocUnfreed;
+		for ( Heap * heap = heapMaster.heapManagersList; heap; heap = heap->nextHeapManager ) {
+			allocUnfreed += heap->allocUnfreed;
+		} // for
+
+		allocUnfreed -= malloc_unfreed();
+		if ( allocUnfreed > 0 ) {
+			// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+			char helpText[512];
+			int len = snprintf( helpText, sizeof(helpText), "Runtime warning (UNIX pid:%ld) : program terminating with %llu(0x%llx) bytes of storage allocated but not freed.\n"
+								"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+								(long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
+			NOWARNING( write( STDERR_FILENO, helpText, len ), unused-result );
+		} // if
+	} // heapAppStop
+} // extern "C"
+#endif // __DEBUG__
+
+
+// declare helper functions for HeapMaster
+void noMemory();										// forward, called by "builtin_new" when malloc returns 0
+
+void HeapMaster::heapMasterCtor() {
+	// Singleton pattern to initialize heap master
+	__cfa_page_size = sysconf( _SC_PAGESIZE );
+
+	assert( heapMaster.mmapFd == -1 );
+	assert( heapMaster.bucketSizes[0] == (16 + sizeof(Heap::Storage)) );
+
+	heapMaster.masterExtLock = 0;
+	heapMaster.masterMgrLock = 0;
+
+	char * end = (char *)sbrk( 0 );
+	heapMaster.heapBegin = heapMaster.heapEnd = sbrk( (char *)Ceiling( (long unsigned int)end, __ALIGN__ ) - end ); // move start of heap to multiple of alignment
+	heapMaster.heapRemaining = 0;
+	heapMaster.heapExpand = malloc_expansion();
+	heapMaster.mmapStart = malloc_mmap_start();
+
+	// find the closest bucket size less than or equal to the mmapStart size
+	heapMaster.maxBucketsUsed = std::lower_bound( heapMaster.bucketSizes, heapMaster.bucketSizes + (Heap::NoBucketSizes - 1), heapMaster.mmapStart ) - heapMaster.bucketSizes; // binary search
+
+	assert( (heapMaster.mmapStart >= __cfa_page_size) && (heapMaster.bucketSizes[Heap::NoBucketSizes - 1] >= heapMaster.mmapStart) );
+	assert( heapMaster.maxBucketsUsed < Heap::NoBucketSizes ); // subscript failure ?
+	assert( heapMaster.mmapStart <= heapMaster.bucketSizes[heapMaster.maxBucketsUsed] ); // search failure ?
+
+	heapMaster.heapManagersList = nullptr;
+	heapMaster.freeHeapManagersList = nullptr;
+
+	heapMaster.heapManagersStorage = nullptr;
+	heapMaster.heapManagersStorageEnd = nullptr;
+
+	#ifdef __STATISTICS__
+	heapMaster.threads_started = heapMaster.threads_exited = 0;
+	heapMaster.reused_heap = heapMaster.new_heap = 0;
+	heapMaster.sbrk_calls = heapMaster.sbrk_storage = 0;
+	heapMaster.stats_fd = STDERR_FILENO;
+	#endif // __STATISTICS__
+
+	#ifdef __DEBUG__
+	heapMaster.allocUnfreed = 0;
+	#endif // __DEBUG__
+
+	#ifdef FASTLOOKUP
+	for ( unsigned int i = 0, idx = 0; i < Heap::LookupSizes; i += 1 ) {
+		if ( i > heapMaster.bucketSizes[idx] ) idx += 1;
+		heapMaster.lookup[i] = idx;
+		assert( i <= heapMaster.bucketSizes[idx] );
+		assert( (i <= 32 && idx == 0) || (i > heapMaster.bucketSizes[idx - 1]) );
+	} // for
+	#endif // FASTLOOKUP
+
+	std::set_new_handler( noMemory );					// do not throw exception as the default
+
+	HeapMaster::heapMasterBootFlag = true;
+} // HeapMaster::heapMasterCtor
+
+
+#define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes."
+
+void Heap::heapManagerCtor(
+		#ifdef __DEBUG__
+		size_t size
+		#endif // __DEBUG__
+) {
+	if ( UNLIKELY( ! HeapMaster::heapMasterBootFlag ) ) HeapMaster::heapMasterCtor();
+
+	// Trigger thread_local storage implicit allocation (causes recursive call)
+	volatile int dummy __attribute__(( unused )) = threadManager.dummy;
+
+	spin_acquire( &heapMaster.masterMgrLock );			// protect heapMaster counters
+	// The atomic test-and-set instruction is a fence so heapManagerBootFlag is read after the magic recursive call to
+	// initialize thread-local storage. Hence, heapManagerBootFlag is NOT declared as volatile.
+  if ( heapManagerBootFlag ) {							// singleton
+		spin_release( &heapMaster.masterMgrLock );
+		return;											// always return on recursive initiation
+	} // if
+
+	assert( ! heapManagerBootFlag );
+
+	// get storage for heap manager
+
+	if ( heapMaster.freeHeapManagersList ) {			// free heap for reused ?
+		heapManager = heapMaster.freeHeapManagersList;
+		heapMaster.freeHeapManagersList = heapManager->nextFreeHeapManager;
+
+		#ifdef __STATISTICS__
+		heapMaster.reused_heap += 1;
+		#endif // __STATISTICS__
+	} else {											// free heap not found, create new
+		// Heap size is about 12K, FreeHeader (128 bytes because of cache alignment) * NoBucketSizes (91) => 128 heaps * 12K ~= 120K byte superblock.
+		// Where 128-heap superblock handles a medium sized multi-processor server.
+		enum { HeapDim = 128 };							// number of heaps in superblock
+		size_t remaining = heapMaster.heapManagersStorageEnd - heapMaster.heapManagersStorage; // remaining free heaps in superblock
+		if ( ! heapMaster.heapManagersStorage || remaining != 0 ) {
+			size_t size = HeapDim * sizeof( Heap );
+			heapMaster.heapManagersStorage = (Heap *)mmap( 0, size, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, heapMaster.mmapFd, 0 );
+			if ( UNLIKELY( heapMaster.heapManagersStorage == MAP_FAILED ) ) { // failed ?
+				if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, size ); // no memory
+				// Do not call strerror( errno ) as it may call malloc.
+				abort( "heapManagerCtor() : internal error, mmap failure, size:%zu error %d.",
+					   size, errno );
+			} // if
+			heapMaster.heapManagersStorageEnd = &heapMaster.heapManagersStorage[HeapDim]; // outside array
+		} // if
+
+		heapManager = heapMaster.heapManagersStorage;
+		heapMaster.heapManagersStorage = heapMaster.heapManagersStorage + 1; // bump next heap
+
+		heapManager->nextHeapManager = heapMaster.heapManagersList;
+		heapMaster.heapManagersList = heapManager;
+
+		#ifdef __STATISTICS__
+		heapMaster.new_heap += 1;
+		#endif // __STATISTICS__
+	} // if
+
+	#ifdef __STATISTICS__
+	heapMaster.threads_started += 1;
+	#endif // __STATISTICS__
+
+	#ifdef __DEBUG__
+	heapManager->allocUnfreed -= size;
+	#endif // __DEBUG__
+
+	spin_release( &heapMaster.masterMgrLock );
+
+	for ( unsigned int j = 0; j < Heap::NoBucketSizes; j += 1 ) { // initialize free lists
+		heapManager->freeLists[j] = (Heap::FreeHeader){
+			#ifdef AWAYSPIN
+			.awayLock = 0,
+			#endif // AWAYSPIN
+			.freeList = nullptr,
+			.awayList = nullptr,
+			.homeManager = heapManager,
+			.blockSize = heapMaster.bucketSizes[j],
+		};
+	} // for
+
+	heapManager->heapBuffer = nullptr;
+	heapManager->heapReserve = 0;
+	heapManager->nextFreeHeapManager = nullptr;
+	heapManagerBootFlag = true;
+} // Heap::heapManagerCtor
+
+
+void Heap::heapManagerDtor() {
+  if ( UNLIKELY( ! heapManagerBootFlag ) ) return;
+
+	DISABLE_INTERRUPTS;
+
+	spin_acquire( &heapMaster.masterMgrLock );
+
+	// place heap on list of free heaps for reusability
+	heapManager->nextFreeHeapManager = heapMaster.freeHeapManagersList;
+	heapMaster.freeHeapManagersList = heapManager;
+
+	// SKULLDUGGERY: The thread heap ends BEFORE the last free(s) occurs from the thread-local storage allocations for
+	// the thread. This final allocation must be handled in doFree for this thread and its terminated heap. However,
+	// this heap has just been put on the heap freelist, and hence there is a race returning the thread-local storage
+	// and a new thread using this heap. The current thread detects it is executing its last free in doFree via
+	// heapManager being null. The trick is for this thread to placed the last free onto the current heap's away-list as
+	// the free-storage header points are this heap. Now, even if other threads are pushing to the away list, it is safe
+	// because of the locking.
+	heapManager = nullptr;
+
+	#ifdef __STATISTICS__
+	heapMaster.threads_exited += 1;
+	#endif // __STATISTICS__
+
+	spin_release( &heapMaster.masterMgrLock );
+
+	ENABLE_INTERRUPTS;
+} // Heap::heapManagerDtor
+
+
+const off_t HeapMaster::mmapFd = -1;
+// Bucket size must be multiple of 16.
+// Powers of 2 are common allocation sizes, so make powers of 2 generate the minimum required size.
+const unsigned int HeapMaster::bucketSizes[] = {		// different bucket sizes
+	16 + sizeof(Heap::Storage), 32 + sizeof(Heap::Storage), 48 + sizeof(Heap::Storage), 64 + sizeof(Heap::Storage), // 4
+	96 + sizeof(Heap::Storage), 112 + sizeof(Heap::Storage), 128 + sizeof(Heap::Storage), // 3
+	160, 192, 224, 256 + sizeof(Heap::Storage), // 4
+	320, 384, 448, 512 + sizeof(Heap::Storage), // 4
+	640, 768, 896, 1'024 + sizeof(Heap::Storage), // 4
+	1'536, 2'048 + sizeof(Heap::Storage), // 2
+	2'560, 3'072, 3'584, 4'096 + sizeof(Heap::Storage), // 4
+	6'144, 8'192 + sizeof(Heap::Storage), // 2
+	9'216, 10'240, 11'264, 12'288, 13'312, 14'336, 15'360, 16'384 + sizeof(Heap::Storage), // 8
+	18'432, 20'480, 22'528, 24'576, 26'624, 28'672, 30'720, 32'768 + sizeof(Heap::Storage), // 8
+	36'864, 40'960, 45'056, 49'152, 53'248, 57'344, 61'440, 65'536 + sizeof(Heap::Storage), // 8
+	73'728, 81'920, 90'112, 98'304, 106'496, 114'688, 122'880, 131'072 + sizeof(Heap::Storage), // 8
+	147'456, 163'840, 180'224, 196'608, 212'992, 229'376, 245'760, 262'144 + sizeof(Heap::Storage), // 8
+	294'912, 327'680, 360'448, 393'216, 425'984, 458'752, 491'520, 524'288 + sizeof(Heap::Storage), // 8
+	655'360, 786'432, 917'504, 1'048'576 + sizeof(Heap::Storage), // 4
+	1'179'648, 1'310'720, 1'441'792, 1'572'864, 1'703'936, 1'835'008, 1'966'080, 2'097'152 + sizeof(Heap::Storage), // 8
+	2'621'440, 3'145'728, 3'670'016, 4'194'304 + sizeof(Heap::Storage), // 4
+};
+
+static_assert( Heap::NoBucketSizes == sizeof(HeapMaster::bucketSizes) / sizeof(HeapMaster::bucketSizes[0]), "size of bucket array wrong" );
+
+
+//####################### Memory Allocation Routines' Helpers ####################
+
+
+#ifdef __STATISTICS__
+static inline HeapStatistics & collectStats( HeapStatistics & stats ) {
+	spin_acquire( &heapMaster.masterMgrLock );
+
+	stats += heapMaster.stats;
+	for ( Heap * node = heapMaster.heapManagersList; node; node = node->nextHeapManager ) {
+		stats += node->stats;
+	} // for
+
+	spin_release(&heapMaster.masterMgrLock);
+	return stats;
+} // collectStats
+
+// Use "write" because streams may be shutdown when calls are made.
+static void printStats( HeapStatistics & stats ) {
+	char helpText[1024];
+	int len = snprintf( helpText, sizeof(helpText),
+						"\nHeap statistics: (storage request / allocation)\n"
+						"  malloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  aalloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  calloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  memalign  >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  amemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  cmemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  resize    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  realloc   >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
+						"  free      !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n"
+						"  away      pulls %'u; pushes %'u; storage %'llu / %'llu bytes\n"
+						"  sbrk      calls %'u; storage %'llu bytes\n"
+						"  mmap      calls %'u; storage %'llu / %'llu bytes\n"
+						"  munmap    calls %'u; storage %'llu / %'llu bytes\n"
+						"  threads   started %'lu; exited %'lu\n"
+						"  heaps     new %'lu; reused %'lu\n",
+						stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc,
+						stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc,
+						stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc,
+						stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc,
+						stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc,
+						stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc,
+						stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc,
+						stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc,
+						stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc,
+						stats.away_pulls, stats.away_pushes, stats.away_storage_request, stats.away_storage_alloc,
+						heapMaster.sbrk_calls, heapMaster.sbrk_storage,
+						stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc,
+						stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc,
+						heapMaster.threads_started, heapMaster.threads_exited,
+						heapMaster.new_heap, heapMaster.reused_heap
+		);
+	NOWARNING( write( heapMaster.stats_fd, helpText, len ), unused-result );
+} // printStats
+
+
+static int printStatsXML( HeapStatistics & stats, FILE * stream ) {
+	char helpText[1024];
+	int len = snprintf( helpText, sizeof(helpText),
+						"<malloc version=\"1\">\n"
+						"<heap nr=\"0\">\n"
+						"<sizes>\n"
+						"</sizes>\n"
+						"<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"away\" pulls=\"%'u;\" 0 pushes=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
+						"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n"
+						"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
+						"<total type=\"threads\" started=\"%'lu;\" exited=\"%'lu\"/>\n"
+						"</malloc>",
+						stats.malloc_calls, stats.malloc_0_calls, stats.malloc_storage_request, stats.malloc_storage_alloc,
+						stats.aalloc_calls, stats.aalloc_0_calls, stats.aalloc_storage_request, stats.aalloc_storage_alloc,
+						stats.calloc_calls, stats.calloc_0_calls, stats.calloc_storage_request, stats.calloc_storage_alloc,
+						stats.memalign_calls, stats.memalign_0_calls, stats.memalign_storage_request, stats.memalign_storage_alloc,
+						stats.amemalign_calls, stats.amemalign_0_calls, stats.amemalign_storage_request, stats.amemalign_storage_alloc,
+						stats.cmemalign_calls, stats.cmemalign_0_calls, stats.cmemalign_storage_request, stats.cmemalign_storage_alloc,
+						stats.resize_calls, stats.resize_0_calls, stats.resize_storage_request, stats.resize_storage_alloc,
+						stats.realloc_calls, stats.realloc_0_calls, stats.realloc_storage_request, stats.realloc_storage_alloc,
+						stats.free_calls, stats.free_null_calls, stats.free_storage_request, stats.free_storage_alloc,
+						stats.away_pulls, stats.away_pushes, stats.away_storage_request, stats.away_storage_alloc,
+						heapMaster.sbrk_calls, heapMaster.sbrk_storage,
+						stats.mmap_calls, stats.mmap_storage_request, stats.mmap_storage_alloc,
+						stats.munmap_calls, stats.munmap_storage_request, stats.munmap_storage_alloc,
+						heapMaster.threads_started, heapMaster.threads_exited
+		);
+	NOWARNING( write( fileno(stream), helpText, len ), unused-result );
+	return len;
+} // printStatsXML
+#endif // __STATISTICS__
+
+
+inline void noMemory() {
+	abort( "Heap memory exhausted at %zu bytes.\n"
+		   "Possible cause is very large memory allocation and/or large amount of unfreed storage allocated by the program or system/library routines.",
+		   ((char *)(sbrk( 0 )) - (char *)(heapMaster.heapBegin)) );
+} // noMemory
+
+
+static bool setMmapStart( size_t value ) {
+  if ( value < __cfa_page_size || heapMaster.bucketSizes[Heap::NoBucketSizes - 1] < value ) return false;
+	heapMaster.mmapStart = value;						// set global
+
+	// find the closest bucket size less than or equal to the mmapStart size
+	heapMaster.maxBucketsUsed = std::lower_bound( heapMaster.bucketSizes, heapMaster.bucketSizes + (Heap::NoBucketSizes - 1), heapMaster.mmapStart ) - heapMaster.bucketSizes; // binary search
+	assert( heapMaster.maxBucketsUsed < Heap::NoBucketSizes ); // subscript failure ?
+	assert( heapMaster.mmapStart <= heapMaster.bucketSizes[heapMaster.maxBucketsUsed] ); // search failure ?
+	return true;
+} // setMmapStart
+
+// <-------+----------------------------------------------------> bsize (bucket size)
+// |header |addr
+//==================================================================================
+//                   align/offset |
+// <-----------------<------------+-----------------------------> bsize (bucket size)
+//                   |fake-header | addr
+#define headerAddr( addr ) ((Heap::Storage::Header *)( (char *)addr - sizeof(Heap::Storage) ))
+#define realHeader( header ) ((Heap::Storage::Header *)((char *)header - header->kind.fake.offset))
+
+// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
+// |header |addr
+//==================================================================================
+//                   align/offset |
+// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
+//                   |fake-header |addr
+#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
+
+
+static inline void checkAlign( size_t alignment ) {
+	if ( UNLIKELY( alignment < __ALIGN__ || ! Pow2( alignment ) ) ) {
+		abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, __ALIGN__ );
+	} // if
+} // checkAlign
+
+
+static inline void checkHeader( bool check, const char name[], void * addr ) {
+	if ( UNLIKELY( check ) ) {							// bad address ?
+		abort( "Attempt to %s storage %p with address outside the heap.\n"
+			   "Possible cause is duplicate free on same block or overwriting of memory.",
+			   name, addr );
+	} // if
+} // checkHeader
+
+
+static inline void fakeHeader( Heap::Storage::Header *& header, size_t & alignment ) {
+	if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+		alignment = header->kind.fake.alignment & -2;	// remove flag from value
+		#ifdef __DEBUG__
+		checkAlign( alignment );						// check alignment
+		#endif // __DEBUG__
+		header = realHeader( header );					// backup from fake to real header
+	} else {
+		alignment = __ALIGN__;							// => no fake header
+	} // if
+} // fakeHeader
+
+
+static inline bool headers( const char name[] __attribute__(( unused )), void * addr, Heap::Storage::Header *& header, Heap::FreeHeader *& freeHead, size_t & size, size_t & alignment ) {
+	header = headerAddr( addr );
+
+  if ( UNLIKELY( addr < heapMaster.heapBegin || heapMaster.heapEnd < addr ) ) { // mmapped ?
+		fakeHeader( header, alignment );
+		size = header->kind.real.blockSize & -3;		// mmap size
+		return true;
+	} // if
+
+	#ifdef __DEBUG__
+	checkHeader( header < heapMaster.heapBegin, name, addr ); // bad low address ?
+	#endif // __DEBUG__
+
+	// header may be safe to dereference
+	fakeHeader( header, alignment );
+	#ifdef __DEBUG__
+	checkHeader( header < heapMaster.heapBegin || heapMaster.heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
+	#endif // __DEBUG__
+
+	freeHead = (Heap::FreeHeader *)((size_t)header->kind.real.home & -3);
+	#ifdef __DEBUG__
+	Heap * homeManager = freeHead->homeManager;
+	if ( UNLIKELY( freeHead < &homeManager->freeLists[0] || &homeManager->freeLists[Heap::NoBucketSizes - 1] < freeHead ) ) {
+		abort( "Attempt to %s storage %p with corrupted header.\n"
+			   "Possible cause is duplicate free on same block or overwriting of header information.",
+			   name, addr );
+	} // if
+	#endif // __DEBUG__
+	size = freeHead->blockSize;
+	return false;
+} // headers
+
+
+static inline void * master_extend( size_t size ) {
+	spin_acquire( &heapMaster.masterExtLock );
+
+	ptrdiff_t rem = heapMaster.heapRemaining - size;
+	if ( UNLIKELY( rem < 0 ) ) {
+		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
+
+		size_t increase = Ceiling( size > heapMaster.heapExpand ? size : heapMaster.heapExpand, __ALIGN__ );
+		if ( UNLIKELY( sbrk( increase ) == (void *)-1 ) ) {	// failed, no memory ?
+			spin_release( &heapMaster.masterExtLock );
+			abort( NO_MEMORY_MSG, size );				// give up
+		} // if
+		#ifdef __STATISTICS__
+		heapMaster.sbrk_calls += 1;
+		heapMaster.sbrk_storage += increase;
+		#endif // __STATISTICS__
+		rem = heapMaster.heapRemaining + increase - size;
+	} // if
+
+	Heap::Storage * block = (Heap::Storage *)heapMaster.heapEnd;
+	heapMaster.heapRemaining = rem;
+	heapMaster.heapEnd = (char *)heapMaster.heapEnd + size;
+
+	spin_release( &heapMaster.masterExtLock );
+	return block;
+} // master_extend
+
+
+static inline void * manager_extend( size_t size ) {
+	ptrdiff_t rem = heapManager->heapReserve - size;
+
+	if ( UNLIKELY( rem < 0 ) ) {						// negative
+		// If the size requested is bigger than the current remaining reserve, use the current reserve to populate
+		// smaller freeLists, and increase the reserve.
+
+		rem = heapManager->heapReserve;					// positive
+
+		if ( rem >= heapMaster.bucketSizes[0] ) {
+			Heap::FreeHeader * freeHead =
+			#ifdef FASTLOOKUP
+				rem < Heap::LookupSizes ? &(heapManager->freeLists[heapMaster.lookup[rem]]) :
+			#endif // FASTLOOKUP
+			std::lower_bound( heapManager->freeLists, heapManager->freeLists + heapMaster.maxBucketsUsed, rem ); // binary search
+
+			if ( UNLIKELY( freeHead->blockSize > (size_t)rem ) ) freeHead -= 1;
+			Heap::Storage * block = (Heap::Storage *)heapManager->heapBuffer;
+
+			block->header.kind.real.next = freeHead->freeList;	// push on stack
+			freeHead->freeList = block;
+		} // if
+
+		size_t increase = Ceiling( size > ( heapMaster.heapExpand / 10 ) ? size : ( heapMaster.heapExpand / 10 ), __ALIGN__ );
+		heapManager->heapBuffer = master_extend(increase);
+		rem = increase - size;
+	} // if
+
+	Heap::Storage * block = (Heap::Storage *)heapManager->heapBuffer;
+	heapManager->heapReserve = rem;
+	heapManager->heapBuffer = (char *)heapManager->heapBuffer + size;
+
+	return block;
+} // manager_extend
+
+
+static inline void * doMalloc( size_t size
+							  #ifdef __STATISTICS__
+							  , unsigned int counter
+							  #endif // __STATISTICS__
+		) {
+	 Heap::Storage * block;
+
+	// Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
+	// along with the block and is a multiple of the alignment size.
+	size_t tsize = size + sizeof(Heap::Storage);
+
+	#ifdef __STATISTICS__
+	heapManager->stats.counters[counter].cnt1 += 1;
+	heapManager->stats.counters[counter].cnt3 += size;
+	#endif // __STATISTICS__
+
+	if ( LIKELY( tsize < heapMaster.mmapStart ) ) {		// small size => sbrk
+		Heap::FreeHeader * freeHead =
+			#ifdef FASTLOOKUP
+			LIKELY( tsize < Heap::LookupSizes ) ? &(heapManager->freeLists[heapMaster.lookup[tsize]]) :
+			#endif // FASTLOOKUP
+			std::lower_bound( heapManager->freeLists, heapManager->freeLists + heapMaster.maxBucketsUsed, tsize ); // binary search
+
+		assert( freeHead <= &heapManager->freeLists[heapMaster.maxBucketsUsed] ); // subscripting error ?
+		assert( tsize <= freeHead->blockSize );			// search failure ?
+		tsize = freeHead->blockSize;					// total space needed for request
+		#ifdef __STATISTICS__
+		heapManager->stats.counters[counter].cnt4 += tsize;
+		#endif // __STATISTICS__
+
+		block = freeHead->freeList;						// remove node from stack
+		if ( UNLIKELY( block == nullptr ) ) {			// no free block ?
+			// Freelist for that size is empty, so carve it out of the heap, if there is enough left, or get some more
+			// and then carve it off.
+
+			#ifdef AWAYSPIN
+			spin_acquire( &freeHead->awayLock );
+			block = freeHead->awayList;
+			freeHead->awayList = nullptr;
+			spin_release( &freeHead->awayLock );
+			#else
+			block = __atomic_exchange_n( &freeHead->awayList, nullptr, __ATOMIC_SEQ_CST );
+			#endif // AWAYSPIN
+			if ( LIKELY( block == nullptr ) ) {			// away list also empty?
+				block = (Heap::Storage *)manager_extend( tsize ); // mutual exclusion on call
+			} else {									// merge awayList into freeHead
+				#ifdef __STATISTICS__
+				heapManager->stats.away_pulls += 1;
+				#endif // __STATISTICS__
+				freeHead->freeList = block->header.kind.real.next;
+			} // if
+		} else {
+			freeHead->freeList = block->header.kind.real.next;
+		} // if
+
+		block->header.kind.real.home = freeHead;		// pointer back to free list of apropriate size
+	} else {											// large size => mmap
+  if ( UNLIKELY( size > ULONG_MAX - __cfa_page_size ) ) return nullptr; // error check
+		tsize = Ceiling( tsize, __cfa_page_size );	// must be multiple of page size
+		#ifdef __STATISTICS__
+		heapManager->stats.counters[counter].cnt4 += tsize;
+		heapManager->stats.mmap_calls += 1;
+		heapManager->stats.mmap_storage_request += size;
+		heapManager->stats.mmap_storage_alloc += tsize;
+		#endif // __STATISTICS__
+
+		block = (Heap::Storage *)::mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, heapMaster.mmapFd, 0 );
+		if ( UNLIKELY( block == MAP_FAILED ) ) {		// failed ?
+			if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
+			// Do not call strerror( errno ) as it may call malloc.
+			abort( "(Heap &)0x%p.doMalloc() : internal error, mmap failure, size:%zu %lu %lu error %d.",
+				   &heapManager, tsize, size, heapMaster.mmapStart, errno );
+		} // if
+		block->header.kind.real.blockSize = tsize;		// storage size for munmap
+	} // if
+
+	block->header.kind.real.size = size;				// store allocation size
+	void * addr = &(block->data);						// adjust off header to user bytes
+	assert( ((uintptr_t)addr & (__ALIGN__ - 1)) == 0 ); // minimum alignment ?
+
+	#ifdef __DEBUG__
+	heapManager->allocUnfreed += size;
+	#endif // __DEBUG__
+
+	return addr;
+} // doMalloc
+
+
+static inline void doFree( void * addr ) {
+	DISABLE_INTERRUPTS;
+
+	if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
+		#ifdef __DEBUG__
+		0
+		#endif // __DEBUG__
+		);
+
+	Heap::Storage::Header * header;
+	Heap::FreeHeader * freeHead;
+	size_t size, alignment;								// not used (see realloc)
+
+	bool mapped = headers( "free", addr, header, freeHead, size, alignment );
+
+	if ( UNLIKELY( mapped ) ) {						// mmapped ?
+		#ifdef __STATISTICS__
+		heapManager->stats.munmap_calls += 1;
+		heapManager->stats.munmap_storage_request += header->kind.real.size;
+		heapManager->stats.munmap_storage_alloc += size;
+		#endif // __STATISTICS__
+		if ( UNLIKELY( munmap( header, size ) == -1 ) ) {
+			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
+				   "Possible cause is invalid pointer.",
+				   addr );
+		} // if
+	} else {
+		if ( LIKELY( heapManager == freeHead->homeManager ) ) {	// belongs to this thread
+			header->kind.real.next = freeHead->freeList; // push on stack
+			freeHead->freeList = (Heap::Storage *)header;
+		} else {										// return to thread owner
+			#ifdef AWAYSPIN
+			spin_acquire( &freeHead->awayLock );
+			header->kind.real.next = freeHead->awayList; // push to bucket away list
+			freeHead->awayList = (Heap::Storage *)header;
+			spin_release( &freeHead->awayLock );
+			#else										// lock free
+			header->kind.real.next = freeHead->awayList; // link new node to top node
+			// CAS resets header->kind.real.next = freeHead->awayList on failure
+			while ( ! __atomic_compare_exchange_n( &freeHead->awayList, &header->kind.real.next, header,
+												   false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST ) );
+			#endif // AWAYSPIN
+
+			// detect free after thread-local storage destruction and use global stats in that case
+			if ( UNLIKELY( heapManager == nullptr ) ) {
+				#ifdef __STATISTICS__
+				AtomicFetchAdd( heapMaster.stats.free_storage_request, header->kind.real.size );
+				AtomicFetchAdd( heapMaster.stats.free_storage_alloc, size );
+				#endif // __STATISTICS__
+				// away push counters are not incremented because this is a self-away push, and there is no
+				// corresponding pull counter that needs to match.
+				ENABLE_INTERRUPTS;
+				return;
+			} // if
+
+			#ifdef __STATISTICS__
+			heapManager->stats.away_pushes += 1;
+			heapManager->stats.away_storage_request += header->kind.real.size;
+			heapManager->stats.away_storage_alloc += size;
+			#endif // __STATISTICS__
+		} // if
+	} // if
+
+	#ifdef __STATISTICS__
+	heapManager->stats.free_storage_request += header->kind.real.size;
+	heapManager->stats.free_storage_alloc += size;
+	#endif // __STATISTICS__
+
+	#ifdef __DEBUG__
+	heapManager->allocUnfreed -= header->kind.real.size;
+	#endif // __DEBUG__
+
+	ENABLE_INTERRUPTS;
+} // doFree
+
+
+static inline void * mallocNoStats( size_t size
+									#ifdef __STATISTICS__
+									, unsigned int counter
+									#endif // __STATISTICS__
+		) {
+	DISABLE_INTERRUPTS;
+
+	if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
+		#ifdef __DEBUG__
+		size
+		#endif // __DEBUG__
+		);
+
+  if ( UNLIKELY( size ) == 0 ||							// 0 BYTE ALLOCATION RETURNS NULL POINTER
+	   UNLIKELY( size > ULONG_MAX - sizeof(Heap::Storage) ) ) { // error check
+		#ifdef __STATISTICS__
+		heapManager->stats.counters[counter].cnt2 += 1;
+		#endif // __STATISTICS__
+		ENABLE_INTERRUPTS;
+		return nullptr;
+	} // if
+
+	void * addr = doMalloc( size
+							#ifdef __STATISTICS__
+							, counter
+							#endif // __STATISTICS__
+		);
+
+	ENABLE_INTERRUPTS;
+	return addr;
+} // mallocNoStats
+
+
+static inline void * memalignNoStats( size_t alignment, size_t size
+									  #ifdef __STATISTICS__
+									  , unsigned int counter
+									  #endif // __STATISTICS__
+		) {
+	DISABLE_INTERRUPTS;
+
+	if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
+		#ifdef __DEBUG__
+		size
+		#endif // __DEBUG__
+		);
+
+  if ( UNLIKELY( size ) == 0 ||							// 0 BYTE ALLOCATION RETURNS NULL POINTER
+	   UNLIKELY( size > ULONG_MAX - sizeof(Heap::Storage) ) ) { // error check
+		#ifdef __STATISTICS__
+		heapManager->stats.counters[counter].cnt2 += 1;
+		#endif // __STATISTICS__
+
+		ENABLE_INTERRUPTS;
+		return nullptr;
+	} // if
+
+	#ifdef __DEBUG__
+	checkAlign( alignment );							// check alignment
+	#endif // __DEBUG__
+
+	// if alignment <= default alignment, do normal malloc as two headers are unnecessary
+  if ( UNLIKELY( alignment <= __ALIGN__ ) ) {
+		void * addr = doMalloc( size
+			#ifdef __STATISTICS__
+			, counter
+			#endif // __STATISTICS__
+		);
+
+		ENABLE_INTERRUPTS;
+		return addr;
+  }
+
+
+	// Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
+	// administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
+	//      .-------------v-----------------v----------------v----------,
+	//      | Real Header | ... padding ... |   Fake Header  | data ... |
+	//      `-------------^-----------------^-+--------------^----------'
+	//      |<--------------------------------' offset/align |<-- alignment boundary
+
+	// subtract __ALIGN__ because it is already the minimum alignment
+	// add sizeof(Heap::Storage) for fake header
+	char * addr = (char *)doMalloc( size + alignment - __ALIGN__ + sizeof(Heap::Storage)
+									#ifdef __STATISTICS__
+									, counter
+									#endif // __STATISTICS__
+		);
+
+	// address in the block of the "next" alignment address
+	char * user = (char *)Ceiling( (uintptr_t)(addr + sizeof(Heap::Storage)), alignment );
+
+	// address of header from malloc
+	Heap::Storage::Header * realHeader = headerAddr( addr );
+	realHeader->kind.real.size = size;					// correct size to eliminate above alignment offset
+	// address of fake header * before* the alignment location
+	Heap::Storage::Header * fakeHeader = headerAddr( user );
+	// SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
+	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
+	// SKULLDUGGERY: odd alignment imples fake header
+	fakeHeader->kind.fake.alignment = alignment | 1;
+
+	ENABLE_INTERRUPTS;
+	return user;
+} // memalignNoStats
+
+// Operators new and new [] call malloc; delete calls free
+
+
+//####################### Memory Allocation Routines ####################
+
+
+extern "C" {
+	// Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
+	// then malloc() returns a unique pointer value that can later be successfully passed to free().
+	void * malloc( size_t size ) {
+		return mallocNoStats( size
+							  #ifdef __STATISTICS__
+							  , HeapStatistics::MALLOC
+							  #endif // __STATISTICS__
+			);
+	} // malloc
+
+
+	// Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
+	void * aalloc( size_t dim, size_t elemSize ) {
+		return mallocNoStats( dim * elemSize
+							  #ifdef __STATISTICS__
+							  , HeapStatistics::AALLOC
+							  #endif // __STATISTICS__
+			);
+	} // aalloc
+
+
+	// Same as aalloc() with memory set to zero.
+	void * calloc( size_t dim, size_t elemSize ) {
+		size_t size = dim * elemSize;
+		char * addr = (char *)mallocNoStats( size
+											 #ifdef __STATISTICS__
+											 , HeapStatistics::CALLOC
+											 #endif // __STATISTICS__
+			);
+
+		if ( UNLIKELY( addr == NULL ) ) return NULL; // stop further processing if 0p is returned
+
+		Heap::Storage::Header * header;
+		Heap::FreeHeader * freeHead;
+		size_t bsize, alignment;
+
+		#ifndef __DEBUG__
+		bool mapped =
+		#endif // __DEBUG__
+			headers( "calloc", addr, header, freeHead, bsize, alignment );
+
+		#ifndef __DEBUG__
+		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
+		if ( LIKELY( ! mapped ) )
+		#endif // __DEBUG__
+			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
+			// `-header`-addr                      `-size
+			memset( addr, '\0', size );					// set to zeros
+
+		header->kind.real.blockSize |= 2;				// mark as zero filled
+		return addr;
+	} // calloc
+
+
+	// Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
+	// nullptr, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
+	// not nullptr, then the call is equivalent to free(oaddr). Unless oaddr is nullptr, it must have been returned by an earlier
+	// call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
+	void * resize( void * oaddr, size_t size ) {
+	  if ( UNLIKELY( oaddr == nullptr ) ) {				// special cases
+			return mallocNoStats( size
+								  #ifdef __STATISTICS__
+								  , HeapStatistics::RESIZE
+								  #endif // __STATISTICS__
+				);
+		} // if
+
+		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+	  if ( UNLIKELY( size == 0 ) ) {					// special cases
+			#ifdef __STATISTICS__
+			heapManager->stats.resize_0_calls += 1;
+			#endif // __STATISTICS__
+			doFree( oaddr );							// free previous storage
+			return nullptr;
+		} // if
+
+		Heap::Storage::Header * header;
+		Heap::FreeHeader * freeHead;
+		size_t bsize, oalign;
+		headers( "resize", oaddr, header, freeHead, bsize, oalign );
+
+		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+		// same size, DO NOT preserve STICKY PROPERTIES.
+		if ( oalign == __ALIGN__ && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
+			#ifdef __STATISTICS__
+			heapManager->stats.resize_calls += 1;
+			#endif // __STATISTICS__
+			header->kind.real.blockSize &= -2;			// no alignment and turn off 0 fill
+			header->kind.real.size = size;				// reset allocation size
+			return oaddr;
+		} // if
+
+		// change size, DO NOT preserve STICKY PROPERTIES.
+		doFree( oaddr );								// free previous storage
+		return mallocNoStats( size						// create new area
+							  #ifdef __STATISTICS__
+							  , HeapStatistics::RESIZE
+							  #endif // __STATISTICS__
+			);
+	} // resize
+
+
+	// Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
+	// the old and new sizes.
+	void * realloc( void * oaddr, size_t size ) {
+	  if ( UNLIKELY( oaddr == nullptr ) ) {				// special cases
+			return mallocNoStats( size
+								  #ifdef __STATISTICS__
+								  , HeapStatistics::REALLOC
+								  #endif // __STATISTICS__
+				);
+		} // if
+
+		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+	  if ( UNLIKELY( size == 0 ) ) {					// special cases
+			#ifdef __STATISTICS__
+			heapManager->stats.realloc_0_calls += 1;
+			#endif // __STATISTICS__
+			doFree( oaddr );							// free previous storage
+			return nullptr;
+		} // if
+
+		Heap::Storage::Header * header;
+		Heap::FreeHeader * freeHead;
+		size_t bsize, oalign;
+		headers( "realloc", oaddr, header, freeHead, bsize, oalign );
+
+		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+		size_t osize = header->kind.real.size;			// old allocation size
+		bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
+	  if ( UNLIKELY( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
+			#ifdef __STATISTICS__
+			heapManager->stats.realloc_calls += 1;
+			heapManager->stats.realloc_storage_request += size;
+			#endif // __STATISTICS__
+
+	  		header->kind.real.size = size;				// reset allocation size
+	  		if ( UNLIKELY( ozfill ) && size > osize ) {	// previous request zero fill and larger ?
+	  			memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage
+	  		} // if
+			return oaddr;
+		} // if
+
+		// change size and copy old content to new storage
+
+		void * naddr;
+		if ( UNLIKELY( oalign <= __ALIGN__ ) ) {		// previous request not aligned ?
+			naddr = mallocNoStats( size					// create new area
+								   #ifdef __STATISTICS__
+								   , HeapStatistics::REALLOC
+								   #endif // __STATISTICS__
+				);
+		} else {
+			naddr = memalignNoStats( oalign, size		// create new aligned area
+									 #ifdef __STATISTICS__
+									 , HeapStatistics::REALLOC
+									 #endif // __STATISTICS__
+				);
+		} // if
+
+		headers( "realloc", naddr, header, freeHead, bsize, oalign );
+		// To preserve prior fill, the entire bucket must be copied versus the size.
+		memcpy( naddr, oaddr, std::min( osize, size ) ); // copy bytes
+		doFree( oaddr );								// free previous storage
+
+		if ( UNLIKELY( ozfill ) ) {						// previous request zero fill ?
+			header->kind.real.blockSize |= 2;			// mark new request as zero filled
+			if ( size > osize ) {						// previous request larger ?
+				memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
+			} // if
+		} // if
+		return naddr;
+	} // realloc
+
+
+	// Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
+	void * memalign( size_t alignment, size_t size ) {
+		return memalignNoStats( alignment, size
+								#ifdef __STATISTICS__
+								, HeapStatistics::MEMALIGN
+								#endif // __STATISTICS__
+			);
+	} // memalign
+
+
+	// Same as aalloc() with memory alignment.
+	void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
+		return memalignNoStats( alignment, dim * elemSize
+								#ifdef __STATISTICS__
+								, HeapStatistics::AMEMALIGN
+								#endif // __STATISTICS__
+			);
+	} // amemalign
+
+
+	// Same as calloc() with memory alignment.
+	void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
+		size_t size = dim * elemSize;
+		char * addr = (char *)memalignNoStats( alignment, size
+											   #ifdef __STATISTICS__
+											   , HeapStatistics::CMEMALIGN
+											   #endif // __STATISTICS__
+			);
+
+		if ( UNLIKELY( addr == NULL ) ) return NULL; // stop further processing if 0p is returned
+
+		Heap::Storage::Header * header;
+		Heap::FreeHeader * freeHead;
+		size_t bsize;
+
+		#ifndef __DEBUG__
+		bool mapped =
+		#endif // __DEBUG__
+			headers( "cmemalign", addr, header, freeHead, bsize, alignment );
+
+		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
+		#ifndef __DEBUG__
+		if ( LIKELY( ! mapped ) )
+		#endif // __DEBUG__
+			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
+			// `-header`-addr                      `-size
+			memset( addr, '\0', size );					// set to zeros
+
+		header->kind.real.blockSize |= 2;				// mark as zero filled
+		return addr;
+	} // cmemalign
+
+
+	// Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
+	// of alignment. This requirement is universally ignored.
+	void * aligned_alloc( size_t alignment, size_t size ) {
+		return memalign( alignment, size );
+	} // aligned_alloc
+
+
+	// Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
+	// memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
+	// is 0, then posix_memalign() returns either nullptr, or a unique pointer value that can later be successfully passed to
+	// free(3).
+	int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
+	  if ( UNLIKELY( alignment < __ALIGN__ || ! Pow2( alignment ) ) ) return EINVAL; // check alignment
+		*memptr = memalign( alignment, size );
+		return 0;
+	} // posix_memalign
+
+
+	// Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
+	// page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
+	void * valloc( size_t size ) {
+		return memalign( __cfa_page_size, size );
+	} // valloc
+
+
+	// Same as valloc but rounds size to multiple of page size.
+	void * pvalloc( size_t size ) {						// round size to multiple of page size
+		return memalign( __cfa_page_size, Ceiling( size, __cfa_page_size ) );
+	} // pvalloc
+
+
+	// Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
+	// or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
+	// nullptr, no operation is performed.
+	void free( void * addr ) {
+		// detect free after thread-local storage destruction and use global stats in that case
+		#ifdef __STATISTICS__
+		DISABLE_INTERRUPTS;
+		if ( UNLIKELY( ! heapManagerBootFlag ) ) Heap::heapManagerCtor( // trigger for first heap
+			#ifdef __DEBUG__
+			0
+			#endif // __DEBUG__
+			);
+		#endif // __STATISTICS__
+
+		// detect free after thread-local storage destruction and use global stats in that case
+		if ( UNLIKELY( addr == nullptr ) ) {			// special case
+			#ifdef __STATISTICS__
+			if ( LIKELY( heapManager ) ) heapManager->stats.free_null_calls += 1;
+			else AtomicFetchAdd( heapMaster.stats.free_null_calls, 1 );
+			ENABLE_INTERRUPTS;
+			#endif // __STATISTICS__
+			return;
+		} // fi
+
+		#ifdef __STATISTICS__
+		if ( LIKELY( heapManager ) ) heapManager->stats.free_calls += 1;
+		else AtomicFetchAdd( heapMaster.stats.free_calls, 1 );
+		ENABLE_INTERRUPTS;
+		#endif // __STATISTICS__
+
+		doFree( addr );
+	} // free
+
+
+	// Returns the alignment of an allocation.
+	size_t malloc_alignment( void * addr ) {
+	  if ( UNLIKELY( addr == nullptr ) ) return __ALIGN__; // minimum alignment
+		Heap::Storage::Header * header = headerAddr( addr );
+		if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) {	// fake header ?
+			return header->kind.fake.alignment & -2;	// remove flag from value
+		} else {
+			return __ALIGN__;							// minimum alignment
+		} // if
+	} // malloc_alignment
+
+
+	// Returns true if the allocation is zero filled, e.g., allocated by calloc().
+	bool malloc_zero_fill( void * addr ) {
+	  if ( UNLIKELY( addr == nullptr ) ) return false;	// null allocation is not zero fill
+		Heap::Storage::Header * header = headerAddr( addr );
+		if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+			header = realHeader( header );				// backup from fake to real header
+		} // if
+		return (header->kind.real.blockSize & 2) != 0;	// zero filled ?
+	} // malloc_zero_fill
+
+
+	// Returns original total allocation size (not bucket size) => array size is dimension * sizeof(T).
+	size_t malloc_size( void * addr ) {
+	  if ( UNLIKELY( addr == nullptr ) ) return 0;		// null allocation is not zero fill
+		Heap::Storage::Header * header = headerAddr( addr );
+		if ( UNLIKELY( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
+			header = realHeader( header );				// backup from fake to real header
+		} // if
+		return header->kind.real.size;
+	} // malloc_size
+
+
+	// Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
+	// malloc or a related function.
+	size_t malloc_usable_size( void * addr ) {
+	  if ( UNLIKELY( addr == nullptr ) ) return 0;		// null allocation has 0 size
+		Heap::Storage::Header * header;
+		Heap::FreeHeader * freeHead;
+		size_t bsize, alignment;
+
+		headers( "malloc_usable_size", addr, header, freeHead, bsize, alignment );
+		return dataStorage( bsize, addr, header );		// data storage in bucket
+	} // malloc_usable_size
+
+
+	// Prints (on default standard error) statistics about memory allocated by malloc and related functions.
+	void malloc_stats() {
+		#ifdef __STATISTICS__
+		HeapStatistics stats;
+		printStats( collectStats( stats ) );
+		#else
+		#define MALLOC_STATS_MSG "malloc_stats statistics disabled.\n"
+		NOWARNING( write( STDERR_FILENO, MALLOC_STATS_MSG, sizeof( MALLOC_STATS_MSG ) - 1 /* size includes '\0' */ ), unused-result );
+		#endif // __STATISTICS__
+	} // malloc_stats
+
+
+	// Changes the file descriptor where malloc_stats() writes statistics.
+	int malloc_stats_fd( int fd __attribute__(( unused )) ) {
+		#ifdef __STATISTICS__
+		int temp = heapMaster.stats_fd;
+		heapMaster.stats_fd = fd;
+		return temp;
+		#else
+		return -1;										// unsupported
+		#endif // __STATISTICS__
+	} // malloc_stats_fd
+
+
+	// Prints an XML string that describes the current state of the memory-allocation implementation in the caller.
+	// The string is printed on the file stream stream.  The exported string includes information about all arenas (see
+	// malloc).
+	int malloc_info( int options, FILE * stream __attribute__(( unused )) ) {
+	  if ( options != 0 ) { errno = EINVAL; return -1; }
+		#ifdef __STATISTICS__
+		HeapStatistics stats;
+		return printStatsXML( collectStats( stats ), stream );
+		#else
+		return 0;										// unsupported
+		#endif // __STATISTICS__
+	} // malloc_info
+
+
+	// Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
+	// specifies the parameter to be modified, and value specifies the new value for that parameter.
+	int mallopt( int option, int value ) {
+	  if ( value < 0 ) return 0;
+		switch( option ) {
+		  case M_TOP_PAD:
+			heapMaster.heapExpand = Ceiling( value, __cfa_page_size );
+			return 1;
+		  case M_MMAP_THRESHOLD:
+			if ( setMmapStart( value ) ) return 1;
+			break;
+		} // switch
+		return 0;										// error, unsupported
+	} // mallopt
+
+
+	// Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
+	int malloc_trim( size_t ) {
+		return 0;										// => impossible to release memory
+	} // malloc_trim
+
+
+	// Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
+	// or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
+	// structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
+	// result.  (The caller must free this memory.)
+	void * malloc_get_state( void ) {
+		return nullptr;									// unsupported
+	} // malloc_get_state
+
+
+	// Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
+	// structure pointed to by state.
+	int malloc_set_state( void * ) {
+		return 0;										// unsupported
+	} // malloc_set_state
+
+	// Set the amount (bytes) to extend the heap size once all the current storage in the heap is allocated.
+	size_t malloc_expansion() { return __DEFAULT_HEAP_EXPANSION__; }
+
+	// Set the crossover point between allocations occuring in the sbrk area or separately mmapped.
+	size_t malloc_mmap_start() { return __DEFAULT_MMAP_START__; }
+
+	// Amount subtracted to adjust for unfreed program storage (debug only).
+	size_t malloc_unfreed() { return __DEFAULT_HEAP_UNFREED__; }
+} // extern "C"
+
+
+#ifdef __cforall
+void * resize( void * oaddr, size_t nalign, size_t size )
+#else
+extern "C" {
+void * _X6resizeFPv_Pvmm__1( void * oaddr, size_t nalign, size_t size )
+#endif
+{
+  if ( UNLIKELY( oaddr == nullptr ) ) {
+		return memalignNoStats( nalign, size
+								#ifdef __STATISTICS__
+								, HeapStatistics::RESIZE
+								#endif // __STATISTICS__
+			);
+	} // if
+
+	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( UNLIKELY( size == 0 ) ) {						// special cases
+		#ifdef __STATISTICS__
+		heapManager->stats.resize_0_calls += 1;
+		#endif // __STATISTICS__
+		doFree( oaddr );								// free previous storage
+		return nullptr;
+	} // if
+
+	#ifdef __DEBUG__
+	checkAlign( nalign );								// check alignment
+	#endif // __DEBUG__
+
+	// Attempt to reuse existing alignment.
+	Heap::Storage::Header * header = headerAddr( oaddr );
+	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
+	size_t oalign;
+	if ( UNLIKELY( isFakeHeader ) ) {
+		oalign = header->kind.fake.alignment & -2;		// old alignment
+		if ( UNLIKELY( (uintptr_t)oaddr % nalign == 0	// lucky match ?
+			 && ( oalign <= nalign						// going down
+				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
+			) ) {
+			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+			Heap::FreeHeader * freeHead;
+			size_t bsize, oalign;
+			headers( "resize", oaddr, header, freeHead, bsize, oalign );
+			size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
+
+			if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
+				headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+
+				header->kind.real.blockSize &= -2;		// turn off 0 fill
+				header->kind.real.size = size;			// reset allocation size
+				return oaddr;
+			} // if
+		} // if
+	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
+				&& nalign == __ALIGN__ ) {				// new alignment also on libAlign => no fake header needed
+		return resize( oaddr, size );					// duplicate special case checks
+	} // if
+
+	// change size, DO NOT preserve STICKY PROPERTIES.
+	doFree( oaddr );									// free previous storage
+	return memalignNoStats( nalign, size				// create new aligned area
+							#ifdef __STATISTICS__
+							, HeapStatistics::RESIZE
+							#endif // __STATISTICS__
+		);
+} // resize
+#ifndef __cforall
+}
+#endif
+
+
+#ifdef __cforall
+void * realloc( void * oaddr, size_t nalign, size_t size )
+#else
+extern "C" {
+void * _X7reallocFPv_Pvmm__1( void * oaddr, size_t nalign, size_t size )
+#endif
+{
+  if ( UNLIKELY( oaddr == nullptr ) ) {
+		return memalignNoStats( nalign, size
+								#ifdef __STATISTICS__
+								, HeapStatistics::REALLOC
+								#endif // __STATISTICS__
+			);
+	} // if
+
+	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
+  if ( UNLIKELY( size == 0 ) ) {						// special cases
+		#ifdef __STATISTICS__
+		heapManager->stats.realloc_0_calls += 1;
+		#endif // __STATISTICS__
+		doFree( oaddr );								// free previous storage
+		return nullptr;
+	} // if
+
+	#ifdef __DEBUG__
+	checkAlign( nalign );								// check alignment
+	#endif // __DEBUG__
+
+	// Attempt to reuse existing alignment.
+	Heap::Storage::Header * header = headerAddr( oaddr );
+	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
+	size_t oalign;
+	if ( UNLIKELY( isFakeHeader ) ) {
+		oalign = header->kind.fake.alignment & -2;		// old alignment
+		if ( UNLIKELY( (uintptr_t)oaddr % nalign == 0	// lucky match ?
+			 && ( oalign <= nalign						// going down
+				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
+			) ) {
+			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
+			return realloc( oaddr, size );				// duplicate special case checks
+		} // if
+	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
+				&& nalign == __ALIGN__ ) {				// new alignment also on libAlign => no fake header needed
+		return realloc( oaddr, size );					// duplicate special case checks
+	} // if
+
+	Heap::FreeHeader * freeHead;
+	size_t bsize;
+	headers( "realloc", oaddr, header, freeHead, bsize, oalign );
+
+	// change size and copy old content to new storage
+
+	size_t osize = header->kind.real.size;				// old allocation size
+	bool ozfill = (header->kind.real.blockSize & 2);	// old allocation zero filled
+
+	void * naddr = memalignNoStats( nalign, size		// create new aligned area
+									#ifdef __STATISTICS__
+									, HeapStatistics::REALLOC
+									#endif // __STATISTICS__
+		);
+
+	headers( "realloc", naddr, header, freeHead, bsize, oalign );
+	memcpy( naddr, oaddr, std::min( osize, size ) );	// copy bytes
+	doFree( oaddr );									// free previous storage
+
+	if ( UNLIKELY( ozfill ) ) {							// previous request zero fill ?
+		header->kind.real.blockSize |= 2;				// mark new request as zero filled
+		if ( size > osize ) {							// previous request larger ?
+			memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
+		} // if
+	} // if
+	return naddr;
+} // realloc
+#ifndef __cforall
+}
+#endif
+
+// zip -r HeapPerThread.zip heap/HeapPerThread.h heap/HeapPerThread.cc heap/Makefile heap/affinity.h heap/test.cc heap/away.cc
+
+// g++-10 -Wall -Wextra -g -O3 -DNDEBUG -D__STATISTICS__ -DTLS HeapPerThread.cc -fPIC -shared -o HeapPerThread.so
+
+// Local Variables: //
+// tab-width: 4 //
+// compile-command: "g++-10 -Wall -Wextra -g -O3 -DNDEBUG -D__STATISTICS__ HeapPerThread.cc -c" //
+// End: //
Index: bcfa/src/heap.cfa
===================================================================
--- libcfa/src/heap.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ 	(revision )
@@ -1,1416 +1,0 @@
-//
-// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-//
-// heap.cfa --
-//
-// Author           : Peter A. Buhr
-// Created On       : Tue Dec 19 21:58:35 2017
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Sun Jan  2 23:29:41 2022
-// Update Count     : 1058
-//
-
-#include <unistd.h>										// sbrk, sysconf
-#include <stdlib.h>										// EXIT_FAILURE
-#include <stdbool.h>									// true, false
-#include <stdio.h>										// snprintf, fileno
-#include <errno.h>										// errno
-#include <string.h>										// memset, memcpy
-#include <limits.h>										// ULONG_MAX
-#include <malloc.h>										// memalign, malloc_usable_size
-#include <sys/mman.h>									// mmap, munmap
-
-#include "bits/align.hfa"								// libAlign
-#include "bits/defs.hfa"								// likely, unlikely
-#include "bits/locks.hfa"								// __spinlock_t
-#include "startup.hfa"									// STARTUP_PRIORITY_MEMORY
-#include "math.hfa"										// min
-#include "bitmanip.hfa"									// is_pow2, ceiling2
-
-static bool traceHeap = false;
-
-inline bool traceHeap() { return traceHeap; }
-
-bool traceHeapOn() {
-	bool temp = traceHeap;
-	traceHeap = true;
-	return temp;
-} // traceHeapOn
-
-bool traceHeapOff() {
-	bool temp = traceHeap;
-	traceHeap = false;
-	return temp;
-} // traceHeapOff
-
-bool traceHeapTerm() { return false; }
-
-
-static bool prtFree = false;
-
-bool prtFree() {
-	return prtFree;
-} // prtFree
-
-bool prtFreeOn() {
-	bool temp = prtFree;
-	prtFree = true;
-	return temp;
-} // prtFreeOn
-
-bool prtFreeOff() {
-	bool temp = prtFree;
-	prtFree = false;
-	return temp;
-} // prtFreeOff
-
-
-enum {
-	// Define the default extension heap amount in units of bytes. When the uC++ supplied heap reaches the brk address,
-	// the brk address is extended by the extension amount.
-	__CFA_DEFAULT_HEAP_EXPANSION__ = (10 * 1024 * 1024),
-
-	// Define the mmap crossover point during allocation. Allocations less than this amount are allocated from buckets;
-	// values greater than or equal to this value are mmap from the operating system.
-	__CFA_DEFAULT_MMAP_START__ = (512 * 1024 + 1),
-};
-
-size_t default_mmap_start() __attribute__(( weak )) {
-	return __CFA_DEFAULT_MMAP_START__;
-} // default_mmap_start
-
-size_t default_heap_expansion() __attribute__(( weak )) {
-	return __CFA_DEFAULT_HEAP_EXPANSION__;
-} // default_heap_expansion
-
-
-#ifdef __CFA_DEBUG__
-static size_t allocUnfreed;								// running total of allocations minus frees
-
-static void prtUnfreed() {
-	if ( allocUnfreed != 0 ) {
-		// DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
-		char helpText[512];
-		int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"
-							"Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
-							(long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
-		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
-	} // if
-} // prtUnfreed
-
-extern int cfa_main_returned;							// from interpose.cfa
-extern "C" {
-	void heapAppStart() {								// called by __cfaabi_appready_startup
-		allocUnfreed = 0;
-	} // heapAppStart
-
-	void heapAppStop() {								// called by __cfaabi_appready_startdown
-		fclose( stdin ); fclose( stdout );
-		if ( cfa_main_returned ) prtUnfreed();			// do not check unfreed storage if exit called
-	} // heapAppStop
-} // extern "C"
-#endif // __CFA_DEBUG__
-
-
-// statically allocated variables => zero filled.
-size_t __page_size;										// architecture pagesize
-int __map_prot;											// common mmap/mprotect protection
-static size_t heapExpand;								// sbrk advance
-static size_t mmapStart;								// cross over point for mmap
-static unsigned int maxBucketsUsed;						// maximum number of buckets in use
-
-
-#define SPINLOCK 0
-#define LOCKFREE 1
-#define BUCKETLOCK SPINLOCK
-#if BUCKETLOCK == SPINLOCK
-#elif BUCKETLOCK == LOCKFREE
-#include <stackLockFree.hfa>
-#else
-	#error undefined lock type for bucket lock
-#endif // LOCKFREE
-
-// Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
-// Break recusion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
-enum { NoBucketSizes = 91 };							// number of buckets sizes
-
-struct HeapManager {
-	struct Storage {
-		struct Header {									// header
-			union Kind {
-				struct RealHeader {
-					union {
-						struct {						// 4-byte word => 8-byte header, 8-byte word => 16-byte header
-							#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
-							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
-							#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_POINTER__ == 4
-
-							union {
-								// FreeHeader * home;		// allocated block points back to home locations (must overlay alignment)
-								// 2nd low-order bit => zero filled
-								void * home;			// allocated block points back to home locations (must overlay alignment)
-								size_t blockSize;		// size for munmap (must overlay alignment)
-								#if BUCKETLOCK == SPINLOCK
-								Storage * next;			// freed block points next freed block of same size
-								#endif // SPINLOCK
-							};
-							size_t size;				// allocation size in bytes
-
-							#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
-							uint64_t padding;			// unused, force home/blocksize to overlay alignment in fake header
-							#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_POINTER__ == 4
-						};
-						#if BUCKETLOCK == LOCKFREE
-						Link(Storage) next;				// freed block points next freed block of same size (double-wide)
-						#endif // LOCKFREE
-					};
-				} real; // RealHeader
-
-				struct FakeHeader {
-					#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-					uint32_t alignment;					// 1st low-order bit => fake header & alignment
-					#endif // __ORDER_LITTLE_ENDIAN__
-
-					uint32_t offset;
-
-					#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-					uint32_t alignment;					// low-order bits of home/blockSize used for tricks
-					#endif // __ORDER_BIG_ENDIAN__
-				} fake; // FakeHeader
-			} kind; // Kind
-		} header; // Header
-		char pad[libAlign() - sizeof( Header )];
-		char data[0];									// storage
-	}; // Storage
-
-	static_assert( libAlign() >= sizeof( Storage ), "libAlign() < sizeof( Storage )" );
-
-	struct FreeHeader {
-		#if BUCKETLOCK == SPINLOCK
-		__spinlock_t lock;								// must be first field for alignment
-		Storage * freeList;
-		#else
-		StackLF(Storage) freeList;
-		#endif // BUCKETLOCK
-		size_t blockSize;								// size of allocations on this list
-	}; // FreeHeader
-
-	// must be first fields for alignment
-	__spinlock_t extlock;								// protects allocation-buffer extension
-	FreeHeader freeLists[NoBucketSizes];				// buckets for different allocation sizes
-
-	void * heapBegin;									// start of heap
-	void * heapEnd;										// logical end of heap
-	size_t heapRemaining;								// amount of storage not allocated in the current chunk
-}; // HeapManager
-
-#if BUCKETLOCK == LOCKFREE
-static inline {
-	Link(HeapManager.Storage) * ?`next( HeapManager.Storage * this ) { return &this->header.kind.real.next; }
-	void ?{}( HeapManager.FreeHeader & ) {}
-	void ^?{}( HeapManager.FreeHeader & ) {}
-} // distribution
-#endif // LOCKFREE
-
-static inline size_t getKey( const HeapManager.FreeHeader & freeheader ) { return freeheader.blockSize; }
-
-
-#define FASTLOOKUP
-#define __STATISTICS__
-
-// Size of array must harmonize with NoBucketSizes and individual bucket sizes must be multiple of 16.
-// Smaller multiples of 16 and powers of 2 are common allocation sizes, so make them generate the minimum required bucket size.
-// malloc(0) returns 0p, so no bucket is necessary for 0 bytes returning an address that can be freed.
-static const unsigned int bucketSizes[] @= {			// different bucket sizes
-	16 + sizeof(HeapManager.Storage), 32 + sizeof(HeapManager.Storage), 48 + sizeof(HeapManager.Storage), 64 + sizeof(HeapManager.Storage), // 4
-	96 + sizeof(HeapManager.Storage), 112 + sizeof(HeapManager.Storage), 128 + sizeof(HeapManager.Storage), // 3
-	160, 192, 224, 256 + sizeof(HeapManager.Storage), // 4
-	320, 384, 448, 512 + sizeof(HeapManager.Storage), // 4
-	640, 768, 896, 1_024 + sizeof(HeapManager.Storage), // 4
-	1_536, 2_048 + sizeof(HeapManager.Storage), // 2
-	2_560, 3_072, 3_584, 4_096 + sizeof(HeapManager.Storage), // 4
-	6_144, 8_192 + sizeof(HeapManager.Storage), // 2
-	9_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360, 16_384 + sizeof(HeapManager.Storage), // 8
-	18_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720, 32_768 + sizeof(HeapManager.Storage), // 8
-	36_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440, 65_536 + sizeof(HeapManager.Storage), // 8
-	73_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880, 131_072 + sizeof(HeapManager.Storage), // 8
-	147_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760, 262_144 + sizeof(HeapManager.Storage), // 8
-	294_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520, 524_288 + sizeof(HeapManager.Storage), // 8
-	655_360, 786_432, 917_504, 1_048_576 + sizeof(HeapManager.Storage), // 4
-	1_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(HeapManager.Storage), // 8
-	2_621_440, 3_145_728, 3_670_016, 4_194_304 + sizeof(HeapManager.Storage), // 4
-};
-
-static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" );
-
-#ifdef FASTLOOKUP
-enum { LookupSizes = 65_536 + sizeof(HeapManager.Storage) }; // number of fast lookup sizes
-static unsigned char lookup[LookupSizes];				// O(1) lookup for small sizes
-#endif // FASTLOOKUP
-
-static const off_t mmapFd = -1;							// fake or actual fd for anonymous file
-#ifdef __CFA_DEBUG__
-static bool heapBoot = 0;								// detect recursion during boot
-#endif // __CFA_DEBUG__
-
-// The constructor for heapManager is called explicitly in memory_startup.
-static HeapManager heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
-
-
-#ifdef __STATISTICS__
-// Heap statistics counters.
-static unsigned int malloc_calls, malloc_0_calls;
-static unsigned long long int malloc_storage_request, malloc_storage_alloc;
-static unsigned int aalloc_calls, aalloc_0_calls;
-static unsigned long long int aalloc_storage_request, aalloc_storage_alloc;
-static unsigned int calloc_calls, calloc_0_calls;
-static unsigned long long int calloc_storage_request, calloc_storage_alloc;
-static unsigned int memalign_calls, memalign_0_calls;
-static unsigned long long int memalign_storage_request, memalign_storage_alloc;
-static unsigned int amemalign_calls, amemalign_0_calls;
-static unsigned long long int amemalign_storage_request, amemalign_storage_alloc;
-static unsigned int cmemalign_calls, cmemalign_0_calls;
-static unsigned long long int cmemalign_storage_request, cmemalign_storage_alloc;
-static unsigned int resize_calls, resize_0_calls;
-static unsigned long long int resize_storage_request, resize_storage_alloc;
-static unsigned int realloc_calls, realloc_0_calls;
-static unsigned long long int realloc_storage_request, realloc_storage_alloc;
-static unsigned int free_calls, free_null_calls;
-static unsigned long long int free_storage_request, free_storage_alloc;
-static unsigned int mmap_calls;
-static unsigned long long int mmap_storage_request, mmap_storage_alloc;
-static unsigned int munmap_calls;
-static unsigned long long int munmap_storage_request, munmap_storage_alloc;
-static unsigned int sbrk_calls;
-static unsigned long long int sbrk_storage;
-// Statistics file descriptor (changed by malloc_stats_fd).
-static int stats_fd = STDERR_FILENO;					// default stderr
-
-// Use "write" because streams may be shutdown when calls are made.
-static void printStats() {
-	char helpText[1024];
-	__cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
-								"\nHeap statistics: (storage request / allocation + header)\n"
-								"  malloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  aalloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  calloc    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  memalign  >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  amemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  cmemalign >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  resize    >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  realloc   >0 calls %'u; 0 calls %'u; storage %'llu / %'llu bytes\n"
-								"  free      !null calls %'u; null calls %'u; storage %'llu / %'llu bytes\n"
-								"  sbrk      calls %'u; storage %'llu bytes\n"
-								"  mmap      calls %'u; storage %'llu / %'llu bytes\n"
-								"  munmap    calls %'u; storage %'llu / %'llu bytes\n",
-								malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
-								aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
-								calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
-								memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
-								amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
-								cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
-								resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
-								realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
-								free_calls, free_null_calls, free_storage_request, free_storage_alloc,
-								sbrk_calls, sbrk_storage,
-								mmap_calls, mmap_storage_request, mmap_storage_alloc,
-								munmap_calls, munmap_storage_request, munmap_storage_alloc
-		);
-} // printStats
-
-static int printStatsXML( FILE * stream ) {				// see malloc_info
-	char helpText[1024];
-	int len = snprintf( helpText, sizeof(helpText),
-						"<malloc version=\"1\">\n"
-						"<heap nr=\"0\">\n"
-						"<sizes>\n"
-						"</sizes>\n"
-						"<total type=\"malloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"aalloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"calloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"memalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"amemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"cmemalign\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"resize\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"realloc\" >0 count=\"%'u;\" 0 count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"free\" !null=\"%'u;\" 0 null=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"<total type=\"sbrk\" count=\"%'u;\" size=\"%'llu\"/> bytes\n"
-						"<total type=\"mmap\" count=\"%'u;\" size=\"%'llu / %'llu\" / > bytes\n"
-						"<total type=\"munmap\" count=\"%'u;\" size=\"%'llu / %'llu\"/> bytes\n"
-						"</malloc>",
-						malloc_calls, malloc_0_calls, malloc_storage_request, malloc_storage_alloc,
-						aalloc_calls, aalloc_0_calls, aalloc_storage_request, aalloc_storage_alloc,
-						calloc_calls, calloc_0_calls, calloc_storage_request, calloc_storage_alloc,
-						memalign_calls, memalign_0_calls, memalign_storage_request, memalign_storage_alloc,
-						amemalign_calls, amemalign_0_calls, amemalign_storage_request, amemalign_storage_alloc,
-						cmemalign_calls, cmemalign_0_calls, cmemalign_storage_request, cmemalign_storage_alloc,
-						resize_calls, resize_0_calls, resize_storage_request, resize_storage_alloc,
-						realloc_calls, realloc_0_calls, realloc_storage_request, realloc_storage_alloc,
-						free_calls, free_null_calls, free_storage_request, free_storage_alloc,
-						sbrk_calls, sbrk_storage,
-						mmap_calls, mmap_storage_request, mmap_storage_alloc,
-						munmap_calls, munmap_storage_request, munmap_storage_alloc
-		);
-	__cfaabi_bits_write( fileno( stream ), helpText, len );	// ensures all bytes written or exit
-	return len;
-} // printStatsXML
-#endif // __STATISTICS__
-
-
-// thunk problem
-size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
-	size_t l = 0, m, h = dim;
-	while ( l < h ) {
-		m = (l + h) / 2;
-		if ( (unsigned int &)(vals[m]) < key ) {		// cast away const
-			l = m + 1;
-		} else {
-			h = m;
-		} // if
-	} // while
-	return l;
-} // Bsearchl
-
-
-static inline bool setMmapStart( size_t value ) {		// true => mmapped, false => sbrk
-  if ( value < __page_size || bucketSizes[NoBucketSizes - 1] < value ) return false;
-	mmapStart = value;									// set global
-
-	// find the closest bucket size less than or equal to the mmapStart size
-	maxBucketsUsed = Bsearchl( (unsigned int)mmapStart, bucketSizes, NoBucketSizes ); // binary search
-	assert( maxBucketsUsed < NoBucketSizes );			// subscript failure ?
-	assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
-	return true;
-} // setMmapStart
-
-
-// <-------+----------------------------------------------------> bsize (bucket size)
-// |header |addr
-//==================================================================================
-//                   align/offset |
-// <-----------------<------------+-----------------------------> bsize (bucket size)
-//                   |fake-header | addr
-#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
-#define realHeader( header ) ((HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset))
-
-// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
-// |header |addr
-//==================================================================================
-//                   align/offset |
-// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
-//                   |fake-header |addr
-#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
-
-
-static inline void checkAlign( size_t alignment ) {
-	if ( alignment < libAlign() || ! is_pow2( alignment ) ) {
-		abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
-	} // if
-} // checkAlign
-
-
-static inline void checkHeader( bool check, const char name[], void * addr ) {
-	if ( unlikely( check ) ) {							// bad address ?
-		abort( "Attempt to %s storage %p with address outside the heap.\n"
-			   "Possible cause is duplicate free on same block or overwriting of memory.",
-			   name, addr );
-	} // if
-} // checkHeader
-
-
-static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & alignment ) {
-	if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
-		alignment = header->kind.fake.alignment & -2;	// remove flag from value
-		#ifdef __CFA_DEBUG__
-		checkAlign( alignment );						// check alignment
-		#endif // __CFA_DEBUG__
-		header = realHeader( header );					// backup from fake to real header
-	} else {
-		alignment = libAlign();							// => no fake header
-	} // if
-} // fakeHeader
-
-
-static inline bool headers( const char name[] __attribute__(( unused )), void * addr, HeapManager.Storage.Header *& header, HeapManager.FreeHeader *& freeElem,
-							size_t & size, size_t & alignment ) with( heapManager ) {
-	header = headerAddr( addr );
-
-  if ( unlikely( addr < heapBegin || heapEnd < addr ) ) { // mmapped ?
-		fakeHeader( header, alignment );
-		size = header->kind.real.blockSize & -3;		// mmap size
-		return true;
-	} // if
-
-	#ifdef __CFA_DEBUG__
-	checkHeader( header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
-	#endif // __CFA_DEBUG__
-
-	// header may be safe to dereference
-	fakeHeader( header, alignment );
-	#ifdef __CFA_DEBUG__
-	checkHeader( header < (HeapManager.Storage.Header *)heapBegin || (HeapManager.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
-	#endif // __CFA_DEBUG__
-
-	freeElem = (HeapManager.FreeHeader *)((size_t)header->kind.real.home & -3);
-	#ifdef __CFA_DEBUG__
-	if ( freeElem < &freeLists[0] || &freeLists[NoBucketSizes] <= freeElem ) {
-		abort( "Attempt to %s storage %p with corrupted header.\n"
-			   "Possible cause is duplicate free on same block or overwriting of header information.",
-			   name, addr );
-	} // if
-	#endif // __CFA_DEBUG__
-	size = freeElem->blockSize;
-	return false;
-} // headers
-
-// #ifdef __CFA_DEBUG__
-// #if __SIZEOF_POINTER__ == 4
-// #define MASK 0xdeadbeef
-// #else
-// #define MASK 0xdeadbeefdeadbeef
-// #endif
-// #define STRIDE size_t
-
-// static void * Memset( void * addr, STRIDE size ) {		// debug only
-// 	if ( size % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, size %zd not multiple of %zd.", size, sizeof(STRIDE) );
-// 	if ( (STRIDE)addr % sizeof(STRIDE) != 0 ) abort( "Memset() : internal error, addr %p not multiple of %zd.", addr, sizeof(STRIDE) );
-
-// 	STRIDE * end = (STRIDE *)addr + size / sizeof(STRIDE);
-// 	for ( STRIDE * p = (STRIDE *)addr; p < end; p += 1 ) *p = MASK;
-// 	return addr;
-// } // Memset
-// #endif // __CFA_DEBUG__
-
-
-#define NO_MEMORY_MSG "insufficient heap memory available for allocating %zd new bytes."
-
-static inline void * extend( size_t size ) with( heapManager ) {
-	lock( extlock __cfaabi_dbg_ctx2 );
-	ptrdiff_t rem = heapRemaining - size;
-	if ( rem < 0 ) {
-		// If the size requested is bigger than the current remaining storage, increase the size of the heap.
-
-		size_t increase = ceiling2( size > heapExpand ? size : heapExpand, __page_size );
-		// Do not call abort or strerror( errno ) as they may call malloc.
-		if ( sbrk( increase ) == (void *)-1 ) {			// failed, no memory ?
-			unlock( extlock );
-			__cfaabi_bits_print_nolock( STDERR_FILENO, NO_MEMORY_MSG, size );
-			_exit( EXIT_FAILURE );						// give up
-		} // if
-		// Make storage executable for thunks.
-		if ( mprotect( (char *)heapEnd + heapRemaining, increase, __map_prot ) ) {
-			unlock( extlock );
-			__cfaabi_bits_print_nolock( STDERR_FILENO, "extend() : internal error, mprotect failure, heapEnd:%p size:%zd, errno:%d.\n", heapEnd, increase, errno );
-			_exit( EXIT_FAILURE );
-		} // if
-		#ifdef __STATISTICS__
-		sbrk_calls += 1;
-		sbrk_storage += increase;
-		#endif // __STATISTICS__
-		#ifdef __CFA_DEBUG__
-		// Set new memory to garbage so subsequent uninitialized usages might fail.
-		memset( (char *)heapEnd + heapRemaining, '\xde', increase );
-		//Memset( (char *)heapEnd + heapRemaining, increase );
-		#endif // __CFA_DEBUG__
-		rem = heapRemaining + increase - size;
-	} // if
-
-	HeapManager.Storage * block = (HeapManager.Storage *)heapEnd;
-	heapRemaining = rem;
-	heapEnd = (char *)heapEnd + size;
-	unlock( extlock );
-	return block;
-} // extend
-
-
-static inline void * doMalloc( size_t size ) with( heapManager ) {
-	HeapManager.Storage * block;						// pointer to new block of storage
-
-	// Look up size in the size list.  Make sure the user request includes space for the header that must be allocated
-	// along with the block and is a multiple of the alignment size.
-
-  if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
-	size_t tsize = size + sizeof(HeapManager.Storage);
-	if ( likely( tsize < mmapStart ) ) {				// small size => sbrk
-		size_t posn;
-		#ifdef FASTLOOKUP
-		if ( tsize < LookupSizes ) posn = lookup[tsize];
-		else
-		#endif // FASTLOOKUP
-			posn = Bsearchl( (unsigned int)tsize, bucketSizes, (size_t)maxBucketsUsed );
-		HeapManager.FreeHeader * freeElem = &freeLists[posn];
-		verify( freeElem <= &freeLists[maxBucketsUsed] ); // subscripting error ?
-		verify( tsize <= freeElem->blockSize );			// search failure ?
-		tsize = freeElem->blockSize;					// total space needed for request
-
-		// Spin until the lock is acquired for this particular size of block.
-
-		#if BUCKETLOCK == SPINLOCK
-		lock( freeElem->lock __cfaabi_dbg_ctx2 );
-		block = freeElem->freeList;						// remove node from stack
-		#else
-		block = pop( freeElem->freeList );
-		#endif // BUCKETLOCK
-		if ( unlikely( block == 0p ) ) {				// no free block ?
-			#if BUCKETLOCK == SPINLOCK
-			unlock( freeElem->lock );
-			#endif // BUCKETLOCK
-
-			// Freelist for that size was empty, so carve it out of the heap if there's enough left, or get some more
-			// and then carve it off.
-
-			block = (HeapManager.Storage *)extend( tsize );	// mutual exclusion on call
-		#if BUCKETLOCK == SPINLOCK
-		} else {
-			freeElem->freeList = block->header.kind.real.next;
-			unlock( freeElem->lock );
-		#endif // BUCKETLOCK
-		} // if
-
-		block->header.kind.real.home = freeElem;		// pointer back to free list of apropriate size
-	} else {											// large size => mmap
-  if ( unlikely( size > ULONG_MAX - __page_size ) ) return 0p;
-		tsize = ceiling2( tsize, __page_size );			// must be multiple of page size
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &mmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &mmap_storage_request, size, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &mmap_storage_alloc, tsize, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		block = (HeapManager.Storage *)mmap( 0, tsize, __map_prot, MAP_PRIVATE | MAP_ANONYMOUS, mmapFd, 0 );
-		if ( block == (HeapManager.Storage *)MAP_FAILED ) { // failed ?
-			if ( errno == ENOMEM ) abort( NO_MEMORY_MSG, tsize ); // no memory
-			// Do not call strerror( errno ) as it may call malloc.
-			abort( "(HeapManager &)0x%p.doMalloc() : internal error, mmap failure, size:%zu errno:%d.", &heapManager, tsize, errno );
-		} //if
-		#ifdef __CFA_DEBUG__
-		// Set new memory to garbage so subsequent uninitialized usages might fail.
-		memset( block, '\xde', tsize );
-		//Memset( block, tsize );
-		#endif // __CFA_DEBUG__
-		block->header.kind.real.blockSize = tsize;		// storage size for munmap
-	} // if
-
-	block->header.kind.real.size = size;				// store allocation size
-	void * addr = &(block->data);						// adjust off header to user bytes
-	verify( ((uintptr_t)addr & (libAlign() - 1)) == 0 ); // minimum alignment ?
-
-	#ifdef __CFA_DEBUG__
-	__atomic_add_fetch( &allocUnfreed, tsize, __ATOMIC_SEQ_CST );
-	if ( traceHeap() ) {
-		enum { BufferSize = 64 };
-		char helpText[BufferSize];
-		int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", addr, size, tsize );
-		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
-	} // if
-	#endif // __CFA_DEBUG__
-
-	return addr;
-} // doMalloc
-
-
-static inline void doFree( void * addr ) with( heapManager ) {
-	#ifdef __CFA_DEBUG__
-	if ( unlikely( heapManager.heapBegin == 0p ) ) {
-		abort( "doFree( %p ) : internal error, called before heap is initialized.", addr );
-	} // if
-	#endif // __CFA_DEBUG__
-
-	HeapManager.Storage.Header * header;
-	HeapManager.FreeHeader * freeElem;
-	size_t size, alignment;								// not used (see realloc)
-
-	if ( headers( "free", addr, header, freeElem, size, alignment ) ) { // mmapped ?
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &munmap_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &munmap_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &munmap_storage_alloc, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		if ( munmap( header, size ) == -1 ) {
-			abort( "Attempt to deallocate storage %p not allocated or with corrupt header.\n"
-				   "Possible cause is invalid pointer.",
-				   addr );
-		} // if
-	} else {
-		#ifdef __CFA_DEBUG__
-		// Set free memory to garbage so subsequent usages might fail.
-		memset( ((HeapManager.Storage *)header)->data, '\xde', freeElem->blockSize - sizeof( HeapManager.Storage ) );
-		//Memset( ((HeapManager.Storage *)header)->data, freeElem->blockSize - sizeof( HeapManager.Storage ) );
-		#endif // __CFA_DEBUG__
-
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &free_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &free_storage_request, header->kind.real.size, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &free_storage_alloc, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		#if BUCKETLOCK == SPINLOCK
-		lock( freeElem->lock __cfaabi_dbg_ctx2 );		// acquire spin lock
-		header->kind.real.next = freeElem->freeList;	// push on stack
-		freeElem->freeList = (HeapManager.Storage *)header;
-		unlock( freeElem->lock );						// release spin lock
-		#else
-		push( freeElem->freeList, *(HeapManager.Storage *)header );
-		#endif // BUCKETLOCK
-	} // if
-
-	#ifdef __CFA_DEBUG__
-	__atomic_add_fetch( &allocUnfreed, -size, __ATOMIC_SEQ_CST );
-	if ( traceHeap() ) {
-		char helpText[64];
-		int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
-		__cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
-	} // if
-	#endif // __CFA_DEBUG__
-} // doFree
-
-
-size_t prtFree( HeapManager & manager ) with( manager ) {
-	size_t total = 0;
-	#ifdef __STATISTICS__
-	__cfaabi_bits_acquire();
-	__cfaabi_bits_print_nolock( STDERR_FILENO, "\nBin lists (bin size : free blocks on list)\n" );
-	#endif // __STATISTICS__
-	for ( unsigned int i = 0; i < maxBucketsUsed; i += 1 ) {
-		size_t size = freeLists[i].blockSize;
-		#ifdef __STATISTICS__
-		unsigned int N = 0;
-		#endif // __STATISTICS__
-
-		#if BUCKETLOCK == SPINLOCK
-		for ( HeapManager.Storage * p = freeLists[i].freeList; p != 0p; p = p->header.kind.real.next ) {
-		#else
-			for(;;) {
-//		for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; p = (p)`next->top ) {
-//		for ( HeapManager.Storage * p = top( freeLists[i].freeList ); p != 0p; /* p = getNext( p )->top */) {
-//			HeapManager.Storage * temp = p->header.kind.real.next.top; // FIX ME: direct assignent fails, initialization works`
-//			typeof(p) temp = (( p )`next)->top;			// FIX ME: direct assignent fails, initialization works`
-//			p = temp;
-		#endif // BUCKETLOCK
-			total += size;
-			#ifdef __STATISTICS__
-			N += 1;
-			#endif // __STATISTICS__
-		} // for
-
-		#ifdef __STATISTICS__
-		__cfaabi_bits_print_nolock( STDERR_FILENO, "%7zu, %-7u  ", size, N );
-		if ( (i + 1) % 8 == 0 ) __cfaabi_bits_print_nolock( STDERR_FILENO, "\n" );
-		#endif // __STATISTICS__
-	} // for
-	#ifdef __STATISTICS__
-	__cfaabi_bits_print_nolock( STDERR_FILENO, "\ntotal free blocks:%zu\n", total );
-	__cfaabi_bits_release();
-	#endif // __STATISTICS__
-	return (char *)heapEnd - (char *)heapBegin - total;
-} // prtFree
-
-
-static void ?{}( HeapManager & manager ) with( manager ) {
-	__page_size = sysconf( _SC_PAGESIZE );
-	__map_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
-
-	for ( unsigned int i = 0; i < NoBucketSizes; i += 1 ) { // initialize the free lists
-		freeLists[i].blockSize = bucketSizes[i];
-	} // for
-
-	#ifdef FASTLOOKUP
-	unsigned int idx = 0;
-	for ( unsigned int i = 0; i < LookupSizes; i += 1 ) {
-		if ( i > bucketSizes[idx] ) idx += 1;
-		lookup[i] = idx;
-	} // for
-	#endif // FASTLOOKUP
-
-	if ( ! setMmapStart( default_mmap_start() ) ) {
-		abort( "HeapManager : internal error, mmap start initialization failure." );
-	} // if
-	heapExpand = default_heap_expansion();
-
-	char * end = (char *)sbrk( 0 );
-	heapBegin = heapEnd = sbrk( (char *)ceiling2( (long unsigned int)end, __page_size ) - end ); // move start of heap to multiple of alignment
-} // HeapManager
-
-
-static void ^?{}( HeapManager & ) {
-	#ifdef __STATISTICS__
-	if ( traceHeapTerm() ) {
-		printStats();
-		// prtUnfreed() called in heapAppStop()
-	} // if
-	#endif // __STATISTICS__
-} // ~HeapManager
-
-
-static void memory_startup( void ) __attribute__(( constructor( STARTUP_PRIORITY_MEMORY ) ));
-void memory_startup( void ) {
-	#ifdef __CFA_DEBUG__
-	if ( heapBoot ) {									// check for recursion during system boot
-		abort( "boot() : internal error, recursively invoked during system boot." );
-	} // if
-	heapBoot = true;
-	#endif // __CFA_DEBUG__
-
-	//verify( heapManager.heapBegin != 0 );
-	//heapManager{};
-	if ( heapManager.heapBegin == 0p ) heapManager{};	// sanity check
-} // memory_startup
-
-static void memory_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_MEMORY ) ));
-void memory_shutdown( void ) {
-	^heapManager{};
-} // memory_shutdown
-
-
-static inline void * mallocNoStats( size_t size ) {		// necessary for malloc statistics
-	verify( heapManager.heapBegin != 0p );				// called before memory_startup ?
-  if ( unlikely( size ) == 0 ) return 0p;				// 0 BYTE ALLOCATION RETURNS NULL POINTER
-
-#if __SIZEOF_POINTER__ == 8
-	verify( size < ((typeof(size_t))1 << 48) );
-#endif // __SIZEOF_POINTER__ == 8
-	return doMalloc( size );
-} // mallocNoStats
-
-
-static inline void * memalignNoStats( size_t alignment, size_t size ) {
-  if ( unlikely( size ) == 0 ) return 0p;				// 0 BYTE ALLOCATION RETURNS NULL POINTER
-
-	#ifdef __CFA_DEBUG__
-	checkAlign( alignment );							// check alignment
-	#endif // __CFA_DEBUG__
-
-	// if alignment <= default alignment, do normal malloc as two headers are unnecessary
-  if ( unlikely( alignment <= libAlign() ) ) return mallocNoStats( size );
-
-	// Allocate enough storage to guarantee an address on the alignment boundary, and sufficient space before it for
-	// administrative storage. NOTE, WHILE THERE ARE 2 HEADERS, THE FIRST ONE IS IMPLICITLY CREATED BY DOMALLOC.
-	//      .-------------v-----------------v----------------v----------,
-	//      | Real Header | ... padding ... |   Fake Header  | data ... |
-	//      `-------------^-----------------^-+--------------^----------'
-	//      |<--------------------------------' offset/align |<-- alignment boundary
-
-	// subtract libAlign() because it is already the minimum alignment
-	// add sizeof(Storage) for fake header
-	char * addr = (char *)mallocNoStats( size + alignment - libAlign() + sizeof(HeapManager.Storage) );
-
-	// address in the block of the "next" alignment address
-	char * user = (char *)ceiling2( (uintptr_t)(addr + sizeof(HeapManager.Storage)), alignment );
-
-	// address of header from malloc
-	HeapManager.Storage.Header * realHeader = headerAddr( addr );
-	realHeader->kind.real.size = size;					// correct size to eliminate above alignment offset
-	// address of fake header * before* the alignment location
-	HeapManager.Storage.Header * fakeHeader = headerAddr( user );
-	// SKULLDUGGERY: insert the offset to the start of the actual storage block and remember alignment
-	fakeHeader->kind.fake.offset = (char *)fakeHeader - (char *)realHeader;
-	// SKULLDUGGERY: odd alignment imples fake header
-	fakeHeader->kind.fake.alignment = alignment | 1;
-
-	return user;
-} // memalignNoStats
-
-
-extern "C" {
-	// Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
-	// then malloc() returns a unique pointer value that can later be successfully passed to free().
-	void * malloc( size_t size ) {
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &malloc_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &malloc_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &malloc_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return mallocNoStats( size );
-	} // malloc
-
-
-	// Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
-	void * aalloc( size_t dim, size_t elemSize ) {
-		size_t size = dim * elemSize;
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &aalloc_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &aalloc_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return mallocNoStats( size );
-	} // aalloc
-
-
-	// Same as aalloc() with memory set to zero.
-	void * calloc( size_t dim, size_t elemSize ) {
-		size_t size = dim * elemSize;
-	  if ( unlikely( size ) == 0 ) {			// 0 BYTE ALLOCATION RETURNS NULL POINTER
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &calloc_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &calloc_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		char * addr = (char *)mallocNoStats( size );
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize, alignment;
-
-		#ifndef __CFA_DEBUG__
-		bool mapped =
-			#endif // __CFA_DEBUG__
-			headers( "calloc", addr, header, freeElem, bsize, alignment );
-
-		#ifndef __CFA_DEBUG__
-		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		if ( ! mapped )
-		#endif // __CFA_DEBUG__
-			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
-			// `-header`-addr                      `-size
-			memset( addr, '\0', size );					// set to zeros
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return addr;
-	} // calloc
-
-
-	// Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
-	// 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
-	// not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
-	// call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
-	void * resize( void * oaddr, size_t size ) {
-		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-	  if ( unlikely( size == 0 ) ) {					// special cases
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			free( oaddr );
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-	  if ( unlikely( oaddr == 0p ) ) {
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return mallocNoStats( size );
-		} // if
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize, oalign;
-		headers( "resize", oaddr, header, freeElem, bsize, oalign );
-
-		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-		// same size, DO NOT preserve STICKY PROPERTIES.
-		if ( oalign == libAlign() && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
-			header->kind.real.blockSize &= -2;			// no alignment and turn off 0 fill
-			header->kind.real.size = size;				// reset allocation size
-			return oaddr;
-		} // if
-
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		// change size, DO NOT preserve STICKY PROPERTIES.
-		free( oaddr );
-		return mallocNoStats( size );					// create new area
-	} // resize
-
-
-	// Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
-	// the old and new sizes.
-	void * realloc( void * oaddr, size_t size ) {
-		// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-	  if ( unlikely( size == 0 ) ) {					// special cases
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			free( oaddr );
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-	  if ( unlikely( oaddr == 0p ) ) {
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return mallocNoStats( size );
-		} // if
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize, oalign;
-		headers( "realloc", oaddr, header, freeElem, bsize, oalign );
-
-		size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-		size_t osize = header->kind.real.size;			// old allocation size
-		bool ozfill = (header->kind.real.blockSize & 2); // old allocation zero filled
-	  if ( unlikely( size <= odsize ) && odsize <= size * 2 ) { // allow up to 50% wasted storage
-	  		header->kind.real.size = size;				// reset allocation size
-	  		if ( unlikely( ozfill ) && size > osize ) {	// previous request zero fill and larger ?
-	  			memset( (char *)oaddr + osize, '\0', size - osize ); // initialize added storage
-	  		} // if
-			return oaddr;
-		} // if
-
-		#ifdef __STATISTICS__
-	  	__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		// change size and copy old content to new storage
-
-		void * naddr;
-		if ( likely( oalign == libAlign() ) ) {			// previous request not aligned ?
-			naddr = mallocNoStats( size );				// create new area
-		} else {
-			naddr = memalignNoStats( oalign, size );	// create new aligned area
-		} // if
-
-		headers( "realloc", naddr, header, freeElem, bsize, oalign );
-		memcpy( naddr, oaddr, min( osize, size ) );		// copy bytes
-		free( oaddr );
-
-		if ( unlikely( ozfill ) ) {						// previous request zero fill ?
-			header->kind.real.blockSize |= 2;			// mark new request as zero filled
-			if ( size > osize ) {						// previous request larger ?
-				memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
-			} // if
-		} // if
-		return naddr;
-	} // realloc
-
-
-	// Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
-	void * memalign( size_t alignment, size_t size ) {
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &memalign_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &memalign_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &memalign_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return memalignNoStats( alignment, size );
-	} // memalign
-
-
-	// Same as aalloc() with memory alignment.
-	void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
-		size_t size = dim * elemSize;
-		#ifdef __STATISTICS__
-		if ( likely( size > 0 ) ) {
-			__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-			__atomic_add_fetch( &cmemalign_storage_request, size, __ATOMIC_SEQ_CST );
-		} else {
-			__atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
-		} // if
-		#endif // __STATISTICS__
-
-		return memalignNoStats( alignment, size );
-	} // amemalign
-
-
-	// Same as calloc() with memory alignment.
-	void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
-		size_t size = dim * elemSize;
-	  if ( unlikely( size ) == 0 ) {					// 0 BYTE ALLOCATION RETURNS NULL POINTER
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &cmemalign_0_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-			return 0p;
-		} // if
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &cmemalign_storage_request, dim * elemSize, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-
-		char * addr = (char *)memalignNoStats( alignment, size );
-
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize;
-
-		#ifndef __CFA_DEBUG__
-		bool mapped =
-			#endif // __CFA_DEBUG__
-			headers( "cmemalign", addr, header, freeElem, bsize, alignment );
-
-		// Mapped storage is zero filled, but in debug mode mapped memory is scrubbed in doMalloc, so it has to be reset to zero.
-		#ifndef __CFA_DEBUG__
-		if ( ! mapped )
-		#endif // __CFA_DEBUG__
-			// <-------0000000000000000000000000000UUUUUUUUUUUUUUUUUUUUUUUUU> bsize (bucket size) U => undefined
-			// `-header`-addr                      `-size
-			memset( addr, '\0', size );					// set to zeros
-
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return addr;
-	} // cmemalign
-
-
-	// Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
-    // of alignment. This requirement is universally ignored.
-	void * aligned_alloc( size_t alignment, size_t size ) {
-		return memalign( alignment, size );
-	} // aligned_alloc
-
-
-	// Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
-	// memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
-	// is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to
-	// free(3).
-	int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
-	  if ( alignment < libAlign() || ! is_pow2( alignment ) ) return EINVAL; // check alignment
-		* memptr = memalign( alignment, size );
-		return 0;
-	} // posix_memalign
-
-
-	// Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
-	// page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
-	void * valloc( size_t size ) {
-		return memalign( __page_size, size );
-	} // valloc
-
-
-	// Same as valloc but rounds size to multiple of page size.
-	void * pvalloc( size_t size ) {
-		return memalign( __page_size, ceiling2( size, __page_size ) );
-	} // pvalloc
-
-
-	// Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
-	// or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
-	// 0p, no operation is performed.
-	void free( void * addr ) {
-	  if ( unlikely( addr == 0p ) ) {					// special case
-			#ifdef __STATISTICS__
-			__atomic_add_fetch( &free_null_calls, 1, __ATOMIC_SEQ_CST );
-			#endif // __STATISTICS__
-
-			// #ifdef __CFA_DEBUG__
-			// if ( traceHeap() ) {
-			// 	#define nullmsg "Free( 0x0 ) size:0\n"
-			// 	// Do not debug print free( 0p ), as it can cause recursive entry from sprintf.
-			// 	__cfaabi_dbg_write( nullmsg, sizeof(nullmsg) - 1 );
-			// } // if
-			// #endif // __CFA_DEBUG__
-			return;
-		} // exit
-
-		doFree( addr );
-	} // free
-
-
-	// Returns the alignment of an allocation.
-	size_t malloc_alignment( void * addr ) {
-	  if ( unlikely( addr == 0p ) ) return libAlign();	// minimum alignment
-		HeapManager.Storage.Header * header = headerAddr( addr );
-		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
-			return header->kind.fake.alignment & -2;	// remove flag from value
-		} else {
-			return libAlign();							// minimum alignment
-		} // if
-	} // malloc_alignment
-
-
-	// Set the alignment for an the allocation and return previous alignment or 0 if no alignment.
-	size_t malloc_alignment_set$( void * addr, size_t alignment ) {
-	  if ( unlikely( addr == 0p ) ) return libAlign();	// minimum alignment
-		size_t ret;
-		HeapManager.Storage.Header * header = headerAddr( addr );
-		if ( (header->kind.fake.alignment & 1) == 1 ) {	// fake header ?
-			ret = header->kind.fake.alignment & -2;		// remove flag from old value
-			header->kind.fake.alignment = alignment | 1; // add flag to new value
-		} else {
-			ret = 0;									// => no alignment to change
-		} // if
-		return ret;
-	} // malloc_alignment_set$
-
-
-	// Returns true if the allocation is zero filled, e.g., allocated by calloc().
-	bool malloc_zero_fill( void * addr ) {
-	  if ( unlikely( addr == 0p ) ) return false;		// null allocation is not zero fill
-		HeapManager.Storage.Header * header = headerAddr( addr );
-		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
-			header = realHeader( header );				// backup from fake to real header
-		} // if
-		return (header->kind.real.blockSize & 2) != 0;	// zero filled ?
-	} // malloc_zero_fill
-
-	// Set allocation is zero filled and return previous zero filled.
-	bool malloc_zero_fill_set$( void * addr ) {
-	  if ( unlikely( addr == 0p ) ) return false;		// null allocation is not zero fill
-		HeapManager.Storage.Header * header = headerAddr( addr );
-		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
-			header = realHeader( header );				// backup from fake to real header
-		} // if
-		bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ?
-		header->kind.real.blockSize |= 2;				// mark as zero filled
-		return ret;
-	} // malloc_zero_fill_set$
-
-
-	// Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T).
-	size_t malloc_size( void * addr ) {
-	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has zero size
-		HeapManager.Storage.Header * header = headerAddr( addr );
-		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
-			header = realHeader( header );				// backup from fake to real header
-		} // if
-		return header->kind.real.size;
-	} // malloc_size
-
-	// Set allocation size and return previous size.
-	size_t malloc_size_set$( void * addr, size_t size ) {
-	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has 0 size
-		HeapManager.Storage.Header * header = headerAddr( addr );
-		if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
-			header = realHeader( header );				// backup from fake to real header
-		} // if
-		size_t ret = header->kind.real.size;
-		header->kind.real.size = size;
-		return ret;
-	} // malloc_size_set$
-
-
-	// Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
-	// malloc or a related function.
-	size_t malloc_usable_size( void * addr ) {
-	  if ( unlikely( addr == 0p ) ) return 0;			// null allocation has 0 size
-		HeapManager.Storage.Header * header;
-		HeapManager.FreeHeader * freeElem;
-		size_t bsize, alignment;
-
-		headers( "malloc_usable_size", addr, header, freeElem, bsize, alignment );
-		return dataStorage( bsize, addr, header );		// data storage in bucket
-	} // malloc_usable_size
-
-
-	// Prints (on default standard error) statistics about memory allocated by malloc and related functions.
-	void malloc_stats( void ) {
-		#ifdef __STATISTICS__
-		printStats();
-		if ( prtFree() ) prtFree( heapManager );
-		#endif // __STATISTICS__
-	} // malloc_stats
-
-
-	// Changes the file descripter where malloc_stats() writes statistics.
-	int malloc_stats_fd( int fd __attribute__(( unused )) ) {
-		#ifdef __STATISTICS__
-		int temp = stats_fd;
-		stats_fd = fd;
-		return temp;
-		#else
-		return -1;
-		#endif // __STATISTICS__
-	} // malloc_stats_fd
-
-
-	// Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
-	// specifies the parameter to be modified, and value specifies the new value for that parameter.
-	int mallopt( int option, int value ) {
-		choose( option ) {
-		  case M_TOP_PAD:
-			heapExpand = ceiling2( value, __page_size ); return 1;
-		  case M_MMAP_THRESHOLD:
-			if ( setMmapStart( value ) ) return 1;
-			break;
-		} // switch
-		return 0;										// error, unsupported
-	} // mallopt
-
-
-	// Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
-	int malloc_trim( size_t ) {
-		return 0;										// => impossible to release memory
-	} // malloc_trim
-
-
-	// Exports an XML string that describes the current state of the memory-allocation implementation in the caller.
-	// The string is printed on the file stream stream.  The exported string includes information about all arenas (see
-	// malloc).
-	int malloc_info( int options, FILE * stream __attribute__(( unused )) ) {
-	  if ( options != 0 ) { errno = EINVAL; return -1; }
-		#ifdef __STATISTICS__
-		return printStatsXML( stream );
-		#else
-		return 0;										// unsupported
-		#endif // __STATISTICS__
-	} // malloc_info
-
-
-	// Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
-	// or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
-	// structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
-	// result.  (The caller must free this memory.)
-	void * malloc_get_state( void ) {
-		return 0p;										// unsupported
-	} // malloc_get_state
-
-
-	// Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
-	// structure pointed to by state.
-	int malloc_set_state( void * ) {
-		return 0;										// unsupported
-	} // malloc_set_state
-} // extern "C"
-
-
-// Must have CFA linkage to overload with C linkage realloc.
-void * resize( void * oaddr, size_t nalign, size_t size ) {
-	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( unlikely( size == 0 ) ) {						// special cases
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &resize_0_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		free( oaddr );
-		return 0p;
-	} // if
-
-	if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
-	#ifdef __CFA_DEBUG__
-	else checkAlign( nalign );							// check alignment
-	#endif // __CFA_DEBUG__
-
-  if ( unlikely( oaddr == 0p ) ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		return memalignNoStats( nalign, size );
-	} // if
-
-	// Attempt to reuse existing alignment.
-	HeapManager.Storage.Header * header = headerAddr( oaddr );
-	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
-	size_t oalign;
-	if ( isFakeHeader ) {
-		oalign = header->kind.fake.alignment & -2;		// old alignment
-		if ( (uintptr_t)oaddr % nalign == 0				// lucky match ?
-			 && ( oalign <= nalign						// going down
-				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
-			) {
-			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-			HeapManager.FreeHeader * freeElem;
-			size_t bsize, oalign;
-			headers( "resize", oaddr, header, freeElem, bsize, oalign );
-			size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
-
-			if ( size <= odsize && odsize <= size * 2 ) { // allow 50% wasted data storage
-				headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-
-				header->kind.real.blockSize &= -2;		// turn off 0 fill
-				header->kind.real.size = size;			// reset allocation size
-				return oaddr;
-			} // if
-		} // if
-	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
-				&& nalign == libAlign() ) {				// new alignment also on libAlign => no fake header needed
-		return resize( oaddr, size );					// duplicate special case checks
-	} // if
-
-	#ifdef __STATISTICS__
-	__atomic_add_fetch( &resize_storage_request, size, __ATOMIC_SEQ_CST );
-	#endif // __STATISTICS__
-
-	// change size, DO NOT preserve STICKY PROPERTIES.
-	free( oaddr );
-	return memalignNoStats( nalign, size );				// create new aligned area
-} // resize
-
-
-void * realloc( void * oaddr, size_t nalign, size_t size ) {
-	// If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
-  if ( unlikely( size == 0 ) ) {						// special cases
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_0_calls, 1, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		free( oaddr );
-		return 0p;
-	} // if
-
-	if ( unlikely( nalign < libAlign() ) ) nalign = libAlign(); // reset alignment to minimum
-	#ifdef __CFA_DEBUG__
-	else checkAlign( nalign );							// check alignment
-	#endif // __CFA_DEBUG__
-
-  if ( unlikely( oaddr == 0p ) ) {
-		#ifdef __STATISTICS__
-		__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
-		__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
-		#endif // __STATISTICS__
-		return memalignNoStats( nalign, size );
-	} // if
-
-	// Attempt to reuse existing alignment.
-	HeapManager.Storage.Header * header = headerAddr( oaddr );
-	bool isFakeHeader = header->kind.fake.alignment & 1; // old fake header ?
-	size_t oalign;
-	if ( isFakeHeader ) {
-		oalign = header->kind.fake.alignment & -2;		// old alignment
-		if ( (uintptr_t)oaddr % nalign == 0				// lucky match ?
-			 && ( oalign <= nalign						// going down
-				  || (oalign >= nalign && oalign <= 256) ) // little alignment storage wasted ?
-			) {
-			headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
-			return realloc( oaddr, size );				// duplicate alignment and special case checks
-		} // if
-	} else if ( ! isFakeHeader							// old real header (aligned on libAlign) ?
-				&& nalign == libAlign() )				// new alignment also on libAlign => no fake header needed
-		return realloc( oaddr, size );					// duplicate alignment and special case checks
-
-	#ifdef __STATISTICS__
-	__atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
-	__atomic_add_fetch( &realloc_storage_request, size, __ATOMIC_SEQ_CST );
-	#endif // __STATISTICS__
-
-	HeapManager.FreeHeader * freeElem;
-	size_t bsize;
-	headers( "realloc", oaddr, header, freeElem, bsize, oalign );
-
-	// change size and copy old content to new storage
-
-	size_t osize = header->kind.real.size;				// old allocation size
-	bool ozfill = (header->kind.real.blockSize & 2);	// old allocation zero filled
-
-	void * naddr = memalignNoStats( nalign, size );		// create new aligned area
-
-	headers( "realloc", naddr, header, freeElem, bsize, oalign );
-	memcpy( naddr, oaddr, min( osize, size ) );			// copy bytes
-	free( oaddr );
-
-	if ( unlikely( ozfill ) ) {							// previous request zero fill ?
-		header->kind.real.blockSize |= 2;				// mark new request as zero filled
-		if ( size > osize ) {							// previous request larger ?
-			memset( (char *)naddr + osize, '\0', size - osize ); // initialize added storage
-		} // if
-	} // if
-	return naddr;
-} // realloc
-
-
-// Local Variables: //
-// tab-width: 4 //
-// compile-command: "cfa -nodebug -O2 heap.cfa" //
-// End: //
Index: libcfa/src/heap.h
===================================================================
--- libcfa/src/heap.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
+++ libcfa/src/heap.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <stdlib.h>
+#include <malloc.h>
+
+// supported mallopt options
+#ifndef M_MMAP_THRESHOLD
+#define M_MMAP_THRESHOLD (-1)
+#endif // M_MMAP_THRESHOLD
+
+#ifndef M_TOP_PAD
+#define M_TOP_PAD (-2)
+#endif // M_TOP_PAD
+
+extern "C" {
+	void * malloc( size_t size );
+	void * aalloc( size_t dim, size_t elemSize );
+	void * calloc( size_t dim, size_t elemSize );
+	void * resize( void * oaddr, size_t size );
+	void * realloc( void * oaddr, size_t size );
+	void * memalign( size_t alignment, size_t size );
+	void * amemalign( size_t align, size_t dim, size_t elemSize );
+	void * cmemalign( size_t align, size_t dim, size_t elemSize );
+	void * valloc( size_t size );
+	void * pvalloc( size_t size );
+	void free( void * addr );
+	size_t malloc_alignment( void * addr );
+	bool malloc_zero_fill( void * addr );
+	size_t malloc_size( void * addr );
+	size_t malloc_usable_size( void * addr );
+	int malloc_stats_fd( int fd );
+	int malloc_info( int options, FILE * stream );
+	int mallopt( int param_number, int value );
+	size_t malloc_expansion() __attribute__((weak));	// heap expansion size (bytes)
+	size_t malloc_mmap_start() __attribute__((weak));	// crossover allocation size from sbrk to mmap
+	size_t malloc_unfreed() __attribute__((weak));		// heap unfreed size (bytes)
+} // extern "C"
+
+#ifdef __cforall
+void * resize( void * oaddr, size_t alignment, size_t size );
+void * realloc( void * oaddr, size_t alignment, size_t size );
+#else
+extern "C" {
+	void * _X6resizeFPv_Pvmm__1( void * oaddr, size_t alignment, size_t size );
+	void * _X7reallocFPv_Pvmm__1( void * oaddr, size_t alignment, size_t size );
+}
+#endif
+
+// Local Variables: //
+// tab-width: 4 //
+// End: //
Index: bcfa/src/heap.hfa
===================================================================
--- libcfa/src/heap.hfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ 	(revision )
@@ -1,59 +1,0 @@
-// 
-// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
-//
-// The contents of this file are covered under the licence agreement in the
-// file "LICENCE" distributed with Cforall.
-// 
-// heap.hfa -- 
-// 
-// Author           : Peter A. Buhr
-// Created On       : Tue May 26 11:23:55 2020
-// Last Modified By : Peter A. Buhr
-// Last Modified On : Sat Aug  8 17:36:48 2020
-// Update Count     : 16
-// 
-
-#pragma once
-
-size_t default_mmap_start();							// CFA extras
-size_t default_heap_expansion();
-
-bool traceHeap();
-bool traceHeapOn();
-bool traceHeapOff();
-
-bool traceHeapTerm();
-bool traceHeapTermOn();
-bool traceHeapTermOff();
-
-bool checkFree();
-bool checkFreeOn();
-bool checkFreeOff();
-
-// supported mallopt options
-#ifndef M_MMAP_THRESHOLD
-#define M_MMAP_THRESHOLD (-1)
-#endif // M_TOP_PAD
-#ifndef M_TOP_PAD
-#define M_TOP_PAD (-2)
-#endif // M_TOP_PAD
-
-extern "C" {
-	void * aalloc( size_t dim, size_t elemSize );
-	void * resize( void * oaddr, size_t size );
-	void * amemalign( size_t align, size_t dim, size_t elemSize );
-	void * cmemalign( size_t align, size_t dim, size_t elemSize );
-	size_t malloc_alignment( void * addr );
-	bool malloc_zero_fill( void * addr );
-	size_t malloc_size( void * addr );
-	size_t malloc_usable_size( void * addr );
-	int malloc_stats_fd( int fd );
-} // extern "C"
-
-void * resize( void * oaddr, size_t nalign, size_t size );
-void * realloc( void * oaddr, size_t nalign, size_t size );
-
-// Local Variables: //
-// mode: c //
-// tab-width: 4 //
-// End: //
Index: libcfa/src/startup.cfa
===================================================================
--- libcfa/src/startup.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/startup.cfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -27,16 +27,16 @@
 	void __cfaabi_appready_startup( void ) {
 		tzset();										// initialize time global variables
-		#ifdef __CFA_DEBUG__
+		#ifdef __CFA_DEBUG__FIXME
 		extern void heapAppStart();
 		heapAppStart();
-		#endif // __CFA_DEBUG__
+		#endif // __CFA_DEBUG__FIXME
 	} // __cfaabi_appready_startup
 
 	void __cfaabi_appready_shutdown( void ) __attribute__(( destructor( STARTUP_PRIORITY_APPREADY ) ));
 	void __cfaabi_appready_shutdown( void ) {
-		#ifdef __CFA_DEBUG__
+		#ifdef __CFA_DEBUG__FIXME
 		extern void heapAppStop();
 		heapAppStop();
-		#endif // __CFA_DEBUG__
+		#endif // __CFA_DEBUG__FIXME
 	} // __cfaabi_appready_shutdown
 
Index: libcfa/src/stdhdr/malloc.h
===================================================================
--- libcfa/src/stdhdr/malloc.h	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/stdhdr/malloc.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -18,5 +18,5 @@
 } // extern "C"
 
-#include <heap.hfa>
+#include <heap.h>
 
 // Local Variables: //
Index: libcfa/src/stdlib.hfa
===================================================================
--- libcfa/src/stdlib.hfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ libcfa/src/stdlib.hfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -10,6 +10,6 @@
 // Created On       : Thu Jan 28 17:12:35 2016
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Thu Feb 10 18:34:58 2022
-// Update Count     : 641
+// Last Modified On : Sat Feb 12 17:22:25 2022
+// Update Count     : 643
 //
 
@@ -21,5 +21,5 @@
 
 #include <stdlib.h>										// *alloc, strto*, ato*
-#include <heap.hfa>
+#include <heap.h>
 
 
@@ -209,9 +209,9 @@
 
 	forall( TT... | { T * alloc_internal$( void *, T *, size_t, size_t, S_fill(T), TT ); } ) {
-		T * alloc_internal$( void *       , T * Realloc, size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) {
+		T * alloc_internal$( void *       , T *        , size_t Align, size_t Dim, S_fill(T) Fill, T_resize Resize, TT rest) {
 	        return alloc_internal$( Resize, (T*)0p, Align, Dim, Fill, rest);
 		}
 
-		T * alloc_internal$( void * Resize, T *        , size_t Align, size_t Dim, S_fill(T) Fill, S_realloc(T) Realloc, TT rest) {
+		T * alloc_internal$( void *        , T *        , size_t Align, size_t Dim, S_fill(T) Fill, S_realloc(T) Realloc, TT rest) {
 	        return alloc_internal$( (void*)0p, Realloc, Align, Dim, Fill, rest);
 		}
@@ -389,5 +389,5 @@
 // Declaration :
 //   PRNG sprng = { 1009 } - set starting seed versus random seed
-//   
+//
 // Interface :
 //   set_seed( sprng, 1009 ) - set starting seed for ALL kernel threads versus random seed
@@ -412,6 +412,6 @@
 void set_seed( PRNG & prng, uint32_t seed_ );
 static inline {
-	void ?{}( PRNG & prng ) { set_seed( prng, rdtscl() ); }	// random seed
-	void ?{}( PRNG & prng, uint32_t seed ) { set_seed( prng, seed ); } // fixed seed
+	void ?{}( PRNG & prng ) with( prng ) { callcnt = 0; set_seed( prng, rdtscl() ); } // random seed
+	void ?{}( PRNG & prng, uint32_t seed ) with( prng ) { callcnt = 0; set_seed( prng, seed ); } // fixed seed
 	uint32_t get_seed( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { return seed; } // get seed
 	uint32_t prng( PRNG & prng ) __attribute__(( warn_unused_result )) with( prng ) { callcnt += 1; return LCG( state ); } // [0,UINT_MAX]
Index: src/AST/Convert.hpp
===================================================================
--- src/AST/Convert.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/AST/Convert.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -20,5 +20,5 @@
 class Declaration;
 namespace ast {
-	struct TranslationUnit;
+	class TranslationUnit;
 };
 
Index: src/AST/Fwd.hpp
===================================================================
--- src/AST/Fwd.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/AST/Fwd.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -140,5 +140,5 @@
 typedef unsigned int UniqueId;
 
-struct TranslationUnit;
+class TranslationUnit;
 // TODO: Get from the TranslationUnit:
 extern ptr<Type> sizeType;
Index: src/AST/Pass.hpp
===================================================================
--- src/AST/Pass.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/AST/Pass.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -239,90 +239,32 @@
 private:
 
-	// Regular nodes
+	__pass::result1<ast::Stmt> call_accept( const ast::Stmt * );
+	__pass::result1<ast::Expr> call_accept( const ast::Expr * );
+
+	/// This has a `type` member that is the return type for the
+	/// generic call_accept if the generic call_accept is defined.
 	template< typename node_t >
-	struct result1 {
-		bool differs;
-		const node_t * value;
-
-		template< typename object_t, typename super_t, typename field_t >
-		void apply(object_t *, field_t super_t::* field);
-	};
-
-	result1<ast::Stmt> call_accept( const ast::Stmt * );
-	result1<ast::Expr> call_accept( const ast::Expr * );
+	using generic_call_accept_result =
+		std::enable_if<
+				!std::is_base_of<ast::Expr, node_t>::value &&
+				!std::is_base_of<ast::Stmt, node_t>::value
+			, __pass::result1<
+				typename std::remove_pointer< typename std::result_of<
+					decltype(&node_t::accept)(node_t*, type&) >::type >::type
+			>
+		>;
 
 	template< typename node_t >
 	auto call_accept( const node_t * node )
-		-> typename std::enable_if<
-				!std::is_base_of<ast::Expr, node_t>::value &&
-				!std::is_base_of<ast::Stmt, node_t>::value
-			, result1<
-				typename std::remove_pointer< decltype( node->accept(*this) ) >::type
-			>
-		>::type;
+		-> typename generic_call_accept_result<node_t>::type;
 
 	// requests WithStmtsToAdd directly add to this statement, as if it is a compound.
-	result1<ast::Stmt> call_accept_as_compound(const ast::Stmt *);
-
-	// Container of statements
+	__pass::result1<ast::Stmt> call_accept_as_compound(const ast::Stmt *);
+
 	template< template <class...> class container_t >
-	struct resultNstmt {
-		struct delta {
-			ptr<Stmt> nval;
-			ssize_t old_idx;
-			bool is_old;
-
-			delta(const Stmt * s, ssize_t i, bool old) : nval{s}, old_idx{i}, is_old{old} {}
-		};
-
-		bool differs;
-		container_t< delta > values;
-
-		resultNstmt() : differs(false), values{} {}
-		resultNstmt(bool diff, container_t< delta > && vals) : differs(diff), values(vals) {}
-
-		template< typename object_t, typename super_t, typename field_t >
-		void apply(object_t *, field_t super_t::* field);
-
-		template< template <class...> class incontainer_t >
-		void take_all( incontainer_t<ast::ptr<ast::Stmt>> * stmts ) {
-			if(!stmts || stmts->empty()) return;
-
-			std::transform(stmts->begin(), stmts->end(), std::back_inserter( values ), [](ast::ptr<ast::Stmt>& decl) -> delta {
-					return delta( decl.release(), -1, false );
-				});
-			stmts->clear();
-			differs = true;
-		}
-
-		template< template <class...> class incontainer_t >
-		void take_all( incontainer_t<ast::ptr<ast::Decl>> * decls ) {
-			if(!decls || decls->empty()) return;
-
-			std::transform(decls->begin(), decls->end(), std::back_inserter( values ), [](ast::ptr<ast::Decl>& decl) -> auto {
-					auto loc = decl->location;
-					auto stmt = new DeclStmt( loc, decl.release() );
-					return delta( stmt, -1, false );
-				});
-			decls->clear();
-			differs = true;
-		}
-	};
-
-	template< template <class...> class container_t >
-	resultNstmt<container_t> call_accept( const container_t< ptr<Stmt> > & );
-
-	// Container of something
+	__pass::resultNstmt<container_t> call_accept( const container_t< ptr<Stmt> > & );
+
 	template< template <class...> class container_t, typename node_t >
-	struct resultN {
-		bool differs;
-		container_t<ptr<node_t>> values;
-
-		template< typename object_t, typename super_t, typename field_t >
-		void apply(object_t *, field_t super_t::* field);
-	};
-
-	template< template <class...> class container_t, typename node_t >
-	resultN< container_t, node_t > call_accept( const container_t< ptr<node_t> > & container );
+	__pass::resultN< container_t, node_t > call_accept( const container_t< ptr<node_t> > & container );
 
 public:
Index: src/AST/Pass.impl.hpp
===================================================================
--- src/AST/Pass.impl.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/AST/Pass.impl.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -111,5 +111,4 @@
 		}
 
-
 		//------------------------------
 		/// Check if value was mutated, different for pointers and containers
@@ -125,9 +124,7 @@
 	}
 
-
-	template< typename core_t >
 	template< typename node_t >
 	template< typename object_t, typename super_t, typename field_t >
-	void ast::Pass< core_t >::result1< node_t >::apply(object_t * object, field_t super_t::* field) {
+	void __pass::result1< node_t >::apply( object_t * object, field_t super_t::* field ) {
 		object->*field = value;
 	}
@@ -136,11 +133,5 @@
 	template< typename node_t >
 	auto ast::Pass< core_t >::call_accept( const node_t * node )
-		-> typename std::enable_if<
-				!std::is_base_of<ast::Expr, node_t>::value &&
-				!std::is_base_of<ast::Stmt, node_t>::value
-			, ast::Pass< core_t >::result1<
-				typename std::remove_pointer< decltype( node->accept(*this) ) >::type
-			>
-		>::type
+		-> typename ast::Pass< core_t >::template generic_call_accept_result<node_t>::type
 	{
 		__pedantic_pass_assert( __visit_children() );
@@ -151,5 +142,5 @@
 
 		auto nval = node->accept( *this );
-		ast::Pass< core_t >::result1<
+		__pass::result1<
 			typename std::remove_pointer< decltype( node->accept(*this) ) >::type
 		> res;
@@ -160,5 +151,5 @@
 
 	template< typename core_t >
-	typename ast::Pass< core_t >::template result1<ast::Expr> ast::Pass< core_t >::call_accept( const ast::Expr * expr ) {
+	__pass::template result1<ast::Expr> ast::Pass< core_t >::call_accept( const ast::Expr * expr ) {
 		__pedantic_pass_assert( __visit_children() );
 		__pedantic_pass_assert( expr );
@@ -174,5 +165,5 @@
 
 	template< typename core_t >
-	typename ast::Pass< core_t >::template result1<ast::Stmt> ast::Pass< core_t >::call_accept( const ast::Stmt * stmt ) {
+	__pass::template result1<ast::Stmt> ast::Pass< core_t >::call_accept( const ast::Stmt * stmt ) {
 		__pedantic_pass_assert( __visit_children() );
 		__pedantic_pass_assert( stmt );
@@ -183,5 +174,5 @@
 
 	template< typename core_t >
-	typename ast::Pass< core_t >::template result1<ast::Stmt> ast::Pass< core_t >::call_accept_as_compound( const ast::Stmt * stmt ) {
+	__pass::template result1<ast::Stmt> ast::Pass< core_t >::call_accept_as_compound( const ast::Stmt * stmt ) {
 		__pedantic_pass_assert( __visit_children() );
 		__pedantic_pass_assert( stmt );
@@ -233,8 +224,7 @@
 	}
 
-	template< typename core_t >
 	template< template <class...> class container_t >
 	template< typename object_t, typename super_t, typename field_t >
-	void ast::Pass< core_t >::resultNstmt<container_t>::apply(object_t * object, field_t super_t::* field) {
+	void __pass::resultNstmt<container_t>::apply(object_t * object, field_t super_t::* field) {
 		auto & container = object->*field;
 		__pedantic_pass_assert( container.size() <= values.size() );
@@ -243,20 +233,48 @@
 
 		container_t<ptr<Stmt>> nvals;
-		for(delta & d : values) {
-			if( d.is_old ) {
+		for (delta & d : values) {
+			if ( d.is_old ) {
 				__pedantic_pass_assert( cit.idx <= d.old_idx );
 				std::advance( cit, d.old_idx - cit.idx );
 				nvals.push_back( std::move( (*cit).val) );
 			} else {
-				nvals.push_back( std::move(d.nval) );
+				nvals.push_back( std::move(d.new_val) );
 			}
 		}
 
-		object->*field = std::move(nvals);
+		container = std::move(nvals);
+	}
+
+	template< template <class...> class container_t >
+	template< template <class...> class incontainer_t >
+	void __pass::resultNstmt< container_t >::take_all( incontainer_t<ptr<Stmt>> * stmts ) {
+		if (!stmts || stmts->empty()) return;
+
+		std::transform(stmts->begin(), stmts->end(), std::back_inserter( values ),
+			[](ast::ptr<ast::Stmt>& stmt) -> delta {
+				return delta( stmt.release(), -1, false );
+			});
+		stmts->clear();
+		differs = true;
+	}
+
+	template< template<class...> class container_t >
+	template< template<class...> class incontainer_t >
+	void __pass::resultNstmt< container_t >::take_all( incontainer_t<ptr<Decl>> * decls ) {
+		if (!decls || decls->empty()) return;
+
+		std::transform(decls->begin(), decls->end(), std::back_inserter( values ),
+			[](ast::ptr<ast::Decl>& decl) -> delta {
+				auto loc = decl->location;
+				auto stmt = new DeclStmt( loc, decl.release() );
+				return delta( stmt, -1, false );
+			});
+		decls->clear();
+		differs = true;
 	}
 
 	template< typename core_t >
 	template< template <class...> class container_t >
-	typename ast::Pass< core_t >::template resultNstmt<container_t> ast::Pass< core_t >::call_accept( const container_t< ptr<Stmt> > & statements ) {
+	__pass::template resultNstmt<container_t> ast::Pass< core_t >::call_accept( const container_t< ptr<Stmt> > & statements ) {
 		__pedantic_pass_assert( __visit_children() );
 		if( statements.empty() ) return {};
@@ -285,5 +303,5 @@
 		pass_visitor_stats.avg->push(pass_visitor_stats.depth);
 
-		resultNstmt<container_t> new_kids;
+		__pass::resultNstmt<container_t> new_kids;
 		for( auto value : enumerate( statements ) ) {
 			try {
@@ -327,8 +345,7 @@
 	}
 
-	template< typename core_t >
 	template< template <class...> class container_t, typename node_t >
 	template< typename object_t, typename super_t, typename field_t >
-	void ast::Pass< core_t >::resultN<container_t, node_t>::apply(object_t * object, field_t super_t::* field) {
+	void __pass::resultN<container_t, node_t>::apply(object_t * object, field_t super_t::* field) {
 		auto & container = object->*field;
 		__pedantic_pass_assert( container.size() == values.size() );
@@ -346,5 +363,5 @@
 	template< typename core_t >
 	template< template <class...> class container_t, typename node_t >
-	typename ast::Pass< core_t >::template resultN<container_t, node_t> ast::Pass< core_t >::call_accept( const container_t< ast::ptr<node_t> > & container ) {
+	__pass::template resultN<container_t, node_t> ast::Pass< core_t >::call_accept( const container_t< ast::ptr<node_t> > & container ) {
 		__pedantic_pass_assert( __visit_children() );
 		if( container.empty() ) return {};
@@ -378,5 +395,5 @@
 		if ( ! errors.isEmpty() ) { throw errors; }
 
-		return ast::Pass< core_t >::resultN<container_t, node_t>{ mutated,  new_kids };
+		return ast::__pass::resultN<container_t, node_t>{ mutated, new_kids };
 	}
 
Index: src/AST/Pass.proto.hpp
===================================================================
--- src/AST/Pass.proto.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/AST/Pass.proto.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -23,5 +23,5 @@
 class Pass;
 
-struct TranslationUnit;
+class TranslationUnit;
 
 struct PureVisitor;
@@ -123,4 +123,50 @@
 		static constexpr bool value = std::is_void< ret_t >::value ||
 			std::is_base_of<const node_t, typename std::remove_pointer<ret_t>::type >::value;
+	};
+
+	/// The result is a single node.
+	template< typename node_t >
+	struct result1 {
+		bool differs;
+		const node_t * value;
+
+		template< typename object_t, typename super_t, typename field_t >
+		void apply( object_t *, field_t super_t::* field );
+	};
+
+	/// The result is a container of statements.
+	template< template<class...> class container_t >
+	struct resultNstmt {
+		/// The delta/change on a single node.
+		struct delta {
+			ptr<Stmt> new_val;
+			ssize_t old_idx;
+			bool is_old;
+
+			delta(const Stmt * s, ssize_t i, bool old) :
+				new_val(s), old_idx(i), is_old(old) {}
+		};
+
+		bool differs;
+		container_t< delta > values;
+
+		template< typename object_t, typename super_t, typename field_t >
+		void apply( object_t *, field_t super_t::* field );
+
+		template< template<class...> class incontainer_t >
+		void take_all( incontainer_t<ptr<Stmt>> * stmts );
+
+		template< template<class...> class incontainer_t >
+		void take_all( incontainer_t<ptr<Decl>> * decls );
+	};
+
+	/// The result is a container of nodes.
+	template< template<class...> class container_t, typename node_t >
+	struct resultN {
+		bool differs;
+		container_t<ptr<node_t>> values;
+
+		template< typename object_t, typename super_t, typename field_t >
+		void apply( object_t *, field_t super_t::* field );
 	};
 
Index: src/AST/TranslationUnit.hpp
===================================================================
--- src/AST/TranslationUnit.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/AST/TranslationUnit.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -23,5 +23,6 @@
 namespace ast {
 
-struct TranslationUnit {
+class TranslationUnit {
+public:
 	std::list< ptr< Decl > > decls;
 
Index: src/CodeGen/FixNames.h
===================================================================
--- src/CodeGen/FixNames.h	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/CodeGen/FixNames.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -20,5 +20,5 @@
 class Declaration;
 namespace ast {
-	struct TranslationUnit;
+	class TranslationUnit;
 }
 
Index: src/Common/CodeLocation.h
===================================================================
--- src/Common/CodeLocation.h	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/Common/CodeLocation.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -25,5 +25,4 @@
 	/// Create a new unset CodeLocation.
 	CodeLocation() = default;
-
 
 	/// Create a new CodeLocation with the given values.
Index: src/Common/CodeLocationTools.hpp
===================================================================
--- src/Common/CodeLocationTools.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/Common/CodeLocationTools.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -17,5 +17,5 @@
 
 namespace ast {
-	struct TranslationUnit;
+	class TranslationUnit;
 }
 
Index: src/Common/ResolvProtoDump.hpp
===================================================================
--- src/Common/ResolvProtoDump.hpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/Common/ResolvProtoDump.hpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -17,5 +17,5 @@
 
 namespace ast {
-	struct TranslationUnit;
+	class TranslationUnit;
 }
 
Index: src/Concurrency/Waitfor.cc
===================================================================
--- src/Concurrency/Waitfor.cc	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/Concurrency/Waitfor.cc	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -372,22 +372,6 @@
 			),
 			new ListInit(
-				map_range < std::list<Initializer*> > ( clause.target.arguments, [this](Expression * expr ){
-					Expression * init = new CastExpr(
-						new UntypedExpr(
-							new NameExpr( "get_monitor" ),
-							{ expr }
-						),
-						new PointerType(
-							noQualifiers,
-							new StructInstType(
-								noQualifiers,
-								decl_monitor
-							)
-						),
-						false
-					);
-
-					ResolvExpr::findSingleExpression( init, indexer );
-					return new SingleInit( init );
+				map_range < std::list<Initializer*> > ( clause.target.arguments, [](Expression * expr ){
+					return new SingleInit( expr );
 				})
 			)
Index: src/ControlStruct/MultiLevelExit.cpp
===================================================================
--- src/ControlStruct/MultiLevelExit.cpp	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/ControlStruct/MultiLevelExit.cpp	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -176,5 +176,5 @@
 	auto mutStmt = mutate( stmt );
 	// A child statement may set the break label.
-	mutStmt->kids = move( fixBlock( stmt->kids, false ) );
+	mutStmt->kids = fixBlock( stmt->kids, false );
 
 	if ( isLabeled ) {
Index: src/InitTweak/FixInit.h
===================================================================
--- src/InitTweak/FixInit.h	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/InitTweak/FixInit.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -21,5 +21,5 @@
 class Declaration;
 namespace ast {
-	struct TranslationUnit;
+	class TranslationUnit;
 }
 
Index: src/MakeLibCfa.h
===================================================================
--- src/MakeLibCfa.h	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/MakeLibCfa.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -5,5 +5,5 @@
 // file "LICENCE" distributed with Cforall.
 //
-// MakeLibCfa.h -- 
+// MakeLibCfa.h --
 //
 // Author           : Richard C. Bilson
@@ -20,5 +20,5 @@
 class Declaration;
 namespace ast {
-	struct TranslationUnit;
+	class TranslationUnit;
 }
 
Index: src/Parser/parser.yy
===================================================================
--- src/Parser/parser.yy	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/Parser/parser.yy	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -10,6 +10,6 @@
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Tue Feb  1 11:06:13 2022
-// Update Count     : 5167
+// Last Modified On : Fri Feb 11 14:26:15 2022
+// Update Count     : 5174
 //
 
@@ -1197,5 +1197,4 @@
 		{ $$ = new StatementNode( build_while( $3, maybe_build_compound( $5 ) ) ); }
 	| WHILE '(' conditional_declaration ')' statement ELSE statement // CFA
-		// { SemanticError( yylloc, "Loop default block is currently unimplemented." ); $$ = nullptr; }
 		{ $$ = new StatementNode( build_while( $3, maybe_build_compound( $5 ), $7 ) ); }
 	| DO statement WHILE '(' ')' ';'					// CFA => do while( 1 )
@@ -1204,5 +1203,4 @@
 		{ $$ = new StatementNode( build_do_while( $5, maybe_build_compound( $2 ) ) ); }
 	| DO statement WHILE '(' comma_expression ')' ELSE statement // CFA
-		// { SemanticError( yylloc, "Loop default block is currently unimplemented." ); $$ = nullptr; }
 		{ $$ = new StatementNode( build_do_while( $5, maybe_build_compound( $2 ), $8 ) ); }
 	| FOR '(' ')' statement								// CFA => for ( ;; )
@@ -1211,5 +1209,4 @@
 	  	{ $$ = new StatementNode( build_for( $3, maybe_build_compound( $5 ) ) ); }
 	| FOR '(' for_control_expression_list ')' statement ELSE statement // CFA
-		// { SemanticError( yylloc, "Loop default block is currently unimplemented." ); $$ = nullptr; }
 		{ $$ = new StatementNode( build_for( $3, maybe_build_compound( $5 ), $7 ) ); }
 	;
@@ -2729,4 +2726,15 @@
 	| ASM '(' string_literal ')' ';'					// GCC, global assembler statement
 		{ $$ = DeclarationNode::newAsmStmt( new StatementNode( build_asm( false, $3, 0 ) ) ); }
+	| EXTERN STRINGliteral
+		{
+			linkageStack.push( linkage );				// handle nested extern "C"/"Cforall"
+			linkage = LinkageSpec::update( yylloc, linkage, $2 );
+		}
+	  up external_definition down 
+		{
+			linkage = linkageStack.top();
+			linkageStack.pop();
+			$$ = $5;
+		}
 	| EXTERN STRINGliteral								// C++-style linkage specifier
 		{
Index: src/ResolvExpr/Resolver.cc
===================================================================
--- src/ResolvExpr/Resolver.cc	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/ResolvExpr/Resolver.cc	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -1112,5 +1112,5 @@
 		}
 
-		
+
 	} // anonymous namespace
 /// Establish post-resolver invariants for expressions
@@ -1158,5 +1158,5 @@
 
 	namespace {
-		
+
 
 		/// resolve `untyped` to the expression whose candidate satisfies `pred` with the
@@ -1905,6 +1905,21 @@
 
 			clause2.target.args.reserve( clause.target.args.size() );
+			const ast::StructDecl * decl_monitor = symtab.lookupStruct( "monitor$" );
 			for ( auto arg : argsCandidates.front() ) {
-				clause2.target.args.emplace_back( std::move( arg->expr ) );
+				const auto & loc = stmt->location;
+
+				ast::Expr * init = new ast::CastExpr( loc,
+					new ast::UntypedExpr( loc,
+						new ast::NameExpr( loc, "get_monitor" ),
+						{ arg->expr }
+					),
+					new ast::PointerType(
+						new ast::StructInstType(
+							decl_monitor
+						)
+					)
+				);
+
+				clause2.target.args.emplace_back( findSingleExpression( init, symtab ) );
 			}
 
@@ -2077,5 +2092,5 @@
 		if (auto functionDecl = decl.as<ast::FunctionDecl>()) {
 			// xxx - can intrinsic gen ever fail?
-			if (functionDecl->linkage == ast::Linkage::AutoGen) { 
+			if (functionDecl->linkage == ast::Linkage::AutoGen) {
 				auto mutDecl = mutate(functionDecl);
 				mutDecl->isDeleted = true;
Index: src/ResolvExpr/Resolver.h
===================================================================
--- src/ResolvExpr/Resolver.h	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ src/ResolvExpr/Resolver.h	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -35,5 +35,5 @@
 	class StmtExpr;
 	class SymbolTable;
-	struct TranslationUnit;
+	class TranslationUnit;
 	class Type;
 	class TypeEnvironment;
@@ -72,5 +72,5 @@
 	ast::ptr< ast::Init > resolveCtorInit(
 		const ast::ConstructorInit * ctorInit, const ast::SymbolTable & symtab );
-	/// Resolves a statement expression 
+	/// Resolves a statement expression
 	const ast::Expr * resolveStmtExpr(
 		const ast::StmtExpr * stmtExpr, const ast::SymbolTable & symtab );
Index: tests/PRNG.cfa
===================================================================
--- tests/PRNG.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ tests/PRNG.cfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -8,6 +8,6 @@
 // Created On       : Wed Dec 29 09:38:12 2021
 // Last Modified By : Peter A. Buhr
-// Last Modified On : Fri Feb 11 08:16:43 2022
-// Update Count     : 328
+// Last Modified On : Sat Feb 12 12:23:57 2022
+// Update Count     : 342
 // 
 
@@ -20,4 +20,5 @@
 #include <malloc.h>										// malloc_stats
 #include <locale.h>										// setlocale
+#include <mutex_stmt.hfa>
 
 // FIX ME: spurious characters appear in output
@@ -50,11 +51,11 @@
 	} // for
 	double std = sqrt( sum / BUCKETS );
-	sout | "trials"  | TRIALS | "buckets" | BUCKETS
-		 | "min" | min | "max" | max
-		 | "avg" | wd(0,1, avg) | "std" | wd(0,1, std) | "rstd" | wd(0,1, (avg == 0 ? 0.0 : std / avg * 100)) | "%";
+	mutex( sout ) sout | "trials"  | TRIALS | "buckets" | BUCKETS
+		| "min" | min | "max" | max
+		| "avg" | wd(0,1, avg) | "std" | wd(0,1, std) | "rstd" | wd(0,1, (avg == 0 ? 0.0 : std / avg * 100)) | "%";
 } // avgstd
 
+
 uint32_t seed = 1009;
-
 
 thread T1 {};
@@ -94,5 +95,5 @@
 	unsigned int * buckets = calloc( BUCKETS );			// too big for task stack
 	for ( TRIALS ) {
-		buckets[prng( (thread$ &)th ) % BUCKETS] += 1;	// concurrent
+		buckets[prng( th ) % BUCKETS] += 1;	// concurrent
 	} // for
 	avgstd( buckets );
Index: tests/meta/dumpable.cfa
===================================================================
--- tests/meta/dumpable.cfa	(revision a55649200b5601ebf0ca0b430984b9546e7bed94)
+++ tests/meta/dumpable.cfa	(revision 9ef96449b6fa707000709413547abf0df2ec8683)
@@ -72,6 +72,8 @@
 	}
 
-	if((buf.f_bsize * buf.f_bavail) < 536870912) {
-		serr | "Available diskspace is less than ~500Mb: " | (buf.f_bsize * buf.f_bavail);
+	uint64_t avail = buf.f_bavail;
+	avail *= buf.f_bsize;
+	if(avail < 536870912_l64u) {
+		serr | "Available diskspace is less than ~500Mb: " | avail;
 	}
 
